Compare commits
39 Commits
v1.2.0
...
mark/missi
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6fca53c31d | ||
|
|
e1f3b4fdbe | ||
|
|
c09f6d5adb | ||
|
|
e2d249e5d9 | ||
|
|
956b39b0dc | ||
|
|
bc769eaa82 | ||
|
|
ee5aa4dc71 | ||
|
|
dd38fbc735 | ||
|
|
a1c40df471 | ||
|
|
c4ff9300c9 | ||
|
|
cab4585cbb | ||
|
|
18368d047e | ||
|
|
e3abb4b6d7 | ||
|
|
0fd971d59d | ||
|
|
c61672194d | ||
|
|
c51a817efa | ||
|
|
d85eda6da8 | ||
|
|
71feb42711 | ||
|
|
6b93ca0cb6 | ||
|
|
b6ecce754b | ||
|
|
d39e6bf921 | ||
|
|
63064860ef | ||
|
|
f5158d51e7 | ||
|
|
94dbd2fa68 | ||
|
|
c6ea6c6522 | ||
|
|
58a22aeeb1 | ||
|
|
5403aa56e4 | ||
|
|
0e0d76d020 | ||
|
|
b493ed8d3a | ||
|
|
c3338667b1 | ||
|
|
c8efe319b3 | ||
|
|
d6655e7a5e | ||
|
|
33b73df6ec | ||
|
|
c9f0172e9f | ||
|
|
2638885c62 | ||
|
|
cb426cbb14 | ||
|
|
d39beff817 | ||
|
|
1eade184f1 | ||
|
|
3fa193b983 |
21
CHANGELOG.md
21
CHANGELOG.md
@@ -7,6 +7,27 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
<!-- towncrier release notes start -->
|
||||
|
||||
## [1.2.1] - 2026-05-15
|
||||
|
||||
### Changed
|
||||
|
||||
- Changed the default WebSocket endpoints for `GradiumSTTService` and
|
||||
`GradiumTTSService` to the region-neutral
|
||||
`wss://api.gradium.ai/api/speech/asr` and
|
||||
`wss://api.gradium.ai/api/speech/tts`. Gradium now automatically routes
|
||||
traffic to the nearest endpoint. Override the url to pin to a specific
|
||||
region.
|
||||
(PR [#4500](https://github.com/pipecat-ai/pipecat/pull/4500))
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed bot hangs when `filter_incomplete_user_turns` was enabled and the LLM
|
||||
responded by calling a tool. The user turn never finalized, so the assistant
|
||||
aggregator gated the tool-result context push and the LLM continuation never
|
||||
ran. Tool calls now finalize the turn the moment they start, before the
|
||||
function dispatches.
|
||||
(PR [#4501](https://github.com/pipecat-ai/pipecat/pull/4501))
|
||||
|
||||
## [1.2.0] - 2026-05-14
|
||||
|
||||
### Added
|
||||
|
||||
@@ -95,7 +95,7 @@ Catch new features, interviews, and how-tos on our [Pipecat TV](https://www.yout
|
||||
| LLMs | [Anthropic](https://docs.pipecat.ai/api-reference/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/api-reference/server/services/llm/aws), [Azure](https://docs.pipecat.ai/api-reference/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/api-reference/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/api-reference/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/api-reference/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/api-reference/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/api-reference/server/services/llm/grok), [Groq](https://docs.pipecat.ai/api-reference/server/services/llm/groq), [Mistral](https://docs.pipecat.ai/api-reference/server/services/llm/mistral), [Nebius](https://docs.pipecat.ai/api-reference/server/services/llm/nebius), [Novita](https://docs.pipecat.ai/api-reference/server/services/llm/novita), [NVIDIA NIM](https://docs.pipecat.ai/api-reference/server/services/llm/nvidia), [Ollama](https://docs.pipecat.ai/api-reference/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/api-reference/server/services/llm/openai), [OpenAI Responses](https://docs.pipecat.ai/api-reference/server/services/llm/openai-responses), [OpenRouter](https://docs.pipecat.ai/api-reference/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/api-reference/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/api-reference/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/api-reference/server/services/llm/sambanova), [Sarvam](https://docs.pipecat.ai/api-reference/server/services/llm/sarvam), [Together AI](https://docs.pipecat.ai/api-reference/server/services/llm/together) |
|
||||
| Text-to-Speech | [Async](https://docs.pipecat.ai/api-reference/server/services/tts/asyncai), [AWS](https://docs.pipecat.ai/api-reference/server/services/tts/aws), [Azure](https://docs.pipecat.ai/api-reference/server/services/tts/azure), [Camb AI](https://docs.pipecat.ai/api-reference/server/services/tts/camb), [Cartesia](https://docs.pipecat.ai/api-reference/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/api-reference/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/api-reference/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/api-reference/server/services/tts/fish), [Google](https://docs.pipecat.ai/api-reference/server/services/tts/google), [Gradium](https://docs.pipecat.ai/api-reference/server/services/tts/gradium), [Groq](https://docs.pipecat.ai/api-reference/server/services/tts/groq), [Hume](https://docs.pipecat.ai/api-reference/server/services/tts/hume), [Inworld](https://docs.pipecat.ai/api-reference/server/services/tts/inworld), [Kokoro](https://docs.pipecat.ai/api-reference/server/services/tts/kokoro), [LMNT](https://docs.pipecat.ai/api-reference/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/api-reference/server/services/tts/minimax), [Mistral](https://docs.pipecat.ai/api-reference/server/services/tts/mistral), [Neuphonic](https://docs.pipecat.ai/api-reference/server/services/tts/neuphonic), [NVIDIA](https://docs.pipecat.ai/api-reference/server/services/tts/nvidia), [OpenAI](https://docs.pipecat.ai/api-reference/server/services/tts/openai), [Piper](https://docs.pipecat.ai/api-reference/server/services/tts/piper), [Resemble](https://docs.pipecat.ai/api-reference/server/services/tts/resemble), [Rime](https://docs.pipecat.ai/api-reference/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/api-reference/server/services/tts/sarvam), [Smallest](https://docs.pipecat.ai/api-reference/server/services/tts/smallest), [Soniox](https://docs.pipecat.ai/api-reference/server/services/tts/soniox), [Speechmatics](https://docs.pipecat.ai/api-reference/server/services/tts/speechmatics), [xAI](https://docs.pipecat.ai/api-reference/server/services/tts/xai), [XTTS](https://docs.pipecat.ai/api-reference/server/services/tts/xtts) |
|
||||
| Speech-to-Speech | [AWS Nova Sonic](https://docs.pipecat.ai/api-reference/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/api-reference/server/services/s2s/gemini), [Grok Voice Agent](https://docs.pipecat.ai/api-reference/server/services/s2s/grok), [OpenAI Realtime](https://docs.pipecat.ai/api-reference/server/services/s2s/openai), [Ultravox](https://docs.pipecat.ai/api-reference/server/services/s2s/ultravox), |
|
||||
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/api-reference/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/api-reference/server/services/transport/fastapi-websocket), [LiveKit (WebRTC)](https://docs.pipecat.ai/api-reference/server/services/transport/livekit), [SmallWebRTCTransport](https://docs.pipecat.ai/api-reference/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/api-reference/server/services/transport/websocket-server), [WhatsApp](https://docs.pipecat.ai/api-reference/server/services/transport/whatsapp), Local |
|
||||
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/api-reference/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/api-reference/server/services/transport/fastapi-websocket), [LiveKit (WebRTC)](https://docs.pipecat.ai/api-reference/server/services/transport/livekit), [SmallWebRTCTransport](https://docs.pipecat.ai/api-reference/server/services/transport/small-webrtc), [Vonage (WebRTC)](https://docs.pipecat.ai/api-reference/server/services/transport/vonage), [WebSocket Server](https://docs.pipecat.ai/api-reference/server/services/transport/websocket-server), [WhatsApp](https://docs.pipecat.ai/api-reference/server/services/transport/whatsapp), Local |
|
||||
| Serializers | [Exotel](https://docs.pipecat.ai/api-reference/server/services/serializers/exotel), [Genesys](https://docs.pipecat.ai/api-reference/server/services/serializers/genesys), [Plivo](https://docs.pipecat.ai/api-reference/server/services/serializers/plivo), [Twilio](https://docs.pipecat.ai/api-reference/server/services/serializers/twilio), [Telnyx](https://docs.pipecat.ai/api-reference/server/services/serializers/telnyx), [Vonage](https://docs.pipecat.ai/api-reference/server/services/serializers/vonage) |
|
||||
| Video | [HeyGen](https://docs.pipecat.ai/api-reference/server/services/video/heygen), [LemonSlice](https://docs.pipecat.ai/api-reference/server/services/transport/lemonslice), [Tavus](https://docs.pipecat.ai/api-reference/server/services/video/tavus), [Simli](https://docs.pipecat.ai/api-reference/server/services/video/simli) |
|
||||
| Memory | [mem0](https://docs.pipecat.ai/api-reference/server/services/memory/mem0) |
|
||||
|
||||
1
changelog/4052.added.md
Normal file
1
changelog/4052.added.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added `VonageVideoConnectorTransport`, a new transport integration for real-time Vonage WebRTC sessions using the Vonage Video Connector library.
|
||||
1
changelog/4442.added.2.md
Normal file
1
changelog/4442.added.2.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added `GET /status` endpoint to the development runner that reports which transports the running instance accepts (all by default, or the single transport passed via `-t`).
|
||||
1
changelog/4442.added.md
Normal file
1
changelog/4442.added.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added plain WebSocket transport support to the development runner. Bots can now accept connections from non-telephony WebSocket clients (e.g., browser apps using protobuf framing) via the `/ws-client` endpoint alongside other transports.
|
||||
1
changelog/4442.changed.md
Normal file
1
changelog/4442.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- ⚠️ The development runner now supports all transports (WebRTC, Daily, telephony, plain WebSocket) simultaneously from a single server. The `/start` endpoint accepts a `"transport"` field to select the transport per-request; omitting `-t` at startup enables all transports instead of defaulting to WebRTC. The Daily browser-redirect route moved from `GET /` to `GET /daily`.
|
||||
1
changelog/4507.fixed.md
Normal file
1
changelog/4507.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed `ElevenLabsSTTService` crashing when `language` was passed as `None`. When `language` is not set, the service now lets ElevenLabs auto-detect the audio language.
|
||||
1
changelog/4525.changed.md
Normal file
1
changelog/4525.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Services and transports with missing optional dependencies now raise `ImportError` instead of a bare `Exception` when their module is imported without the required extra installed. The original `ModuleNotFoundError` is preserved as `__cause__`, so code that wraps these imports can now use `except ImportError:` cleanly instead of `except Exception:`.
|
||||
@@ -211,6 +211,11 @@ TWILIO_AUTH_TOKEN=...
|
||||
# Ultravox Realtime
|
||||
ULTRAVOX_API_KEY=...
|
||||
|
||||
# Vonage
|
||||
VONAGE_APPLICATION_ID=...
|
||||
VONAGE_SESSION_ID=...
|
||||
VONAGE_TOKEN=...
|
||||
|
||||
# WhatsApp
|
||||
WHATSAPP_TOKEN=...
|
||||
WHATSAPP_WEBHOOK_VERIFICATION_TOKEN=...
|
||||
|
||||
@@ -68,9 +68,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
tts = OpenAITTSService(
|
||||
api_key=os.environ["OPENAI_API_KEY"],
|
||||
settings=OpenAITTSService.Settings(
|
||||
instructions="Please speak clearly and at a moderate pace.",
|
||||
voice="ballad",
|
||||
),
|
||||
instructions="Please speak clearly and at a moderate pace.",
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
|
||||
@@ -51,7 +51,6 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
|
||||
stt = GradiumSTTService(
|
||||
api_key=os.environ["GRADIUM_API_KEY"],
|
||||
api_endpoint_base_url="wss://us.api.gradium.ai/api/speech/asr",
|
||||
settings=GradiumSTTService.Settings(
|
||||
language=Language.EN,
|
||||
delay_in_frames=8,
|
||||
|
||||
134
examples/transports/transports-vonage.py
Normal file
134
examples/transports/transports-vonage.py
Normal file
@@ -0,0 +1,134 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Example of using OpenAI Realtime voice LLM service with Vonage Video Connector transport."""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
from collections.abc import Callable
|
||||
from typing import Any
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.observers.loggers.transcription_log_observer import TranscriptionLogObserver
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.vonage import configure
|
||||
from pipecat.services.openai.realtime.events import (
|
||||
AudioConfiguration,
|
||||
AudioInput,
|
||||
InputAudioNoiseReduction,
|
||||
InputAudioTranscription,
|
||||
SemanticTurnDetection,
|
||||
SessionProperties,
|
||||
)
|
||||
from pipecat.services.openai.realtime.llm import OpenAIRealtimeLLMService
|
||||
from pipecat.transports.vonage.video_connector import (
|
||||
VonageVideoConnectorTransport,
|
||||
VonageVideoConnectorTransportParams,
|
||||
)
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
"""Main entry point for the OpenAI Realtime vonage video connector example."""
|
||||
(application_id, session_id, token) = await configure()
|
||||
|
||||
transport = VonageVideoConnectorTransport(
|
||||
application_id,
|
||||
session_id,
|
||||
token,
|
||||
VonageVideoConnectorTransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
publisher_name="Bot",
|
||||
),
|
||||
)
|
||||
|
||||
llm = OpenAIRealtimeLLMService(
|
||||
api_key=os.environ["OPENAI_API_KEY"],
|
||||
settings=OpenAIRealtimeLLMService.Settings(
|
||||
system_instruction="""You are a helpful and friendly AI.
|
||||
|
||||
Act like a human, but remember that you aren't a human and that you can't do human
|
||||
things in the real world. Your voice and personality should be warm and engaging, with a lively and
|
||||
playful tone.
|
||||
|
||||
If interacting in a non-English language, start by using the standard accent or dialect familiar to
|
||||
the user. Talk quickly.
|
||||
|
||||
You are participating in a voice conversation. Keep your responses concise, short, and to the point
|
||||
unless specifically asked to elaborate on a topic.
|
||||
|
||||
Remember, your responses should be short. Just one or two sentences, usually. Respond in English.""",
|
||||
session_properties=SessionProperties(
|
||||
audio=AudioConfiguration(
|
||||
input=AudioInput(
|
||||
transcription=InputAudioTranscription(),
|
||||
turn_detection=SemanticTurnDetection(),
|
||||
noise_reduction=InputAudioNoiseReduction(type="near_field"),
|
||||
)
|
||||
),
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
context = LLMContext(
|
||||
[{"role": "developer", "content": "Say hello!"}],
|
||||
)
|
||||
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
user_aggregator,
|
||||
llm,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
observers=[TranscriptionLogObserver()],
|
||||
)
|
||||
|
||||
event_handler: Callable[[str], Callable[[Any], Any]] = transport.event_handler
|
||||
|
||||
@event_handler("on_client_connected")
|
||||
async def on_client_connected(transport: VonageVideoConnectorTransport, client: object) -> None:
|
||||
logger.info("Client connected")
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -0,0 +1,201 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Example 22: Filter Incomplete Turns
|
||||
|
||||
Demonstrates LLM-based turn completion detection to suppress bot responses when
|
||||
the user was cut off mid-thought. The LLM outputs one of three markers:
|
||||
- ✓ (complete): User finished their thought, respond normally
|
||||
- ○ (incomplete short): User was cut off, wait ~5s then prompt
|
||||
- ◐ (incomplete long): User needs time to think, wait ~10s then prompt
|
||||
|
||||
When incomplete is detected, the bot's response is suppressed. After the timeout
|
||||
expires, the LLM is automatically prompted to re-engage the user.
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
AssistantTurnStoppedMessage,
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
UserTurnStoppedMessage,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.llm_service import FunctionCallParams
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_turn_strategies import FilterIncompleteUserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def get_weather(params: FunctionCallParams, location: str):
|
||||
"""Return the current weather for a location.
|
||||
|
||||
A stub that always reports the same conditions — replace with a real
|
||||
weather API in production.
|
||||
|
||||
Args:
|
||||
location (str): The city and state or country, e.g. "Paris, France".
|
||||
"""
|
||||
await params.result_callback(
|
||||
{
|
||||
"location": location,
|
||||
"temperature_celsius": 22,
|
||||
"conditions": "partly cloudy",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.environ["DEEPGRAM_API_KEY"])
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.environ["OPENAI_API_KEY"],
|
||||
settings=OpenAILLMService.Settings(
|
||||
system_instruction=(
|
||||
"You are a helpful assistant in a voice conversation. Your "
|
||||
"responses will be spoken aloud, so avoid emojis, bullet "
|
||||
"points, or other formatting that can't be spoken. Respond to "
|
||||
"what the user said in a creative, helpful, and brief way. "
|
||||
"If the user asks about the weather, call the get_weather "
|
||||
"tool and speak the result back naturally."
|
||||
),
|
||||
),
|
||||
)
|
||||
llm.register_direct_function(get_weather)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.environ["CARTESIA_API_KEY"],
|
||||
settings=CartesiaTTSService.Settings(
|
||||
voice="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
),
|
||||
)
|
||||
|
||||
context = LLMContext(tools=ToolsSchema(standard_tools=[get_weather]))
|
||||
# `FilterIncompleteUserTurnStrategies` pairs the default detector
|
||||
# chain with `LLMTurnCompletionUserTurnStopStrategy`: detectors
|
||||
# trigger LLM inference but the public `on_user_turn_stopped` event
|
||||
# fires only when the LLM confirms ✓. The LLM marks each response
|
||||
# with one of:
|
||||
# ✓ = complete (respond normally)
|
||||
# ○ = incomplete short (wait 5s, then prompt)
|
||||
# ◐ = incomplete long (wait 10s, then prompt)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
user_turn_strategies=FilterIncompleteUserTurnStrategies(
|
||||
# Optional: customize turn completion behavior
|
||||
# config=UserTurnCompletionConfig(
|
||||
# incomplete_short_timeout=5.0,
|
||||
# incomplete_long_timeout=10.0,
|
||||
# incomplete_short_prompt="Custom prompt...",
|
||||
# incomplete_long_prompt="Custom prompt...",
|
||||
# instructions="Custom turn completion instructions...",
|
||||
# ),
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt,
|
||||
user_aggregator, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
assistant_aggregator, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
@user_aggregator.event_handler("on_user_turn_stopped")
|
||||
async def on_user_turn_stopped(aggregator, strategy, message: UserTurnStoppedMessage):
|
||||
timestamp = f"[{message.timestamp}] " if message.timestamp else ""
|
||||
line = f"{timestamp}user: {message.content}"
|
||||
logger.info(f"Transcript: {line}")
|
||||
|
||||
@assistant_aggregator.event_handler("on_assistant_turn_stopped")
|
||||
async def on_assistant_turn_stopped(aggregator, message: AssistantTurnStoppedMessage):
|
||||
timestamp = f"[{message.timestamp}] " if message.timestamp else ""
|
||||
line = f"{timestamp}assistant: {message.content}"
|
||||
logger.info(f"Transcript: {line}")
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
@@ -50,10 +50,7 @@ transport_params = {
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = GradiumSTTService(
|
||||
api_key=os.environ["GRADIUM_API_KEY"],
|
||||
api_endpoint_base_url="wss://us.api.gradium.ai/api/speech/asr",
|
||||
)
|
||||
stt = GradiumSTTService(api_key=os.environ["GRADIUM_API_KEY"])
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.environ["CARTESIA_API_KEY"],
|
||||
|
||||
@@ -55,7 +55,6 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
tts = GradiumTTSService(
|
||||
api_key=os.environ["GRADIUM_API_KEY"],
|
||||
settings=GradiumTTSService.Settings(voice="YTpq7expH9539ERJ"),
|
||||
url="wss://us.api.gradium.ai/api/speech/tts",
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
|
||||
@@ -54,7 +54,6 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
|
||||
stt = GradiumSTTService(
|
||||
api_key=os.environ["GRADIUM_API_KEY"],
|
||||
api_endpoint_base_url="wss://us.api.gradium.ai/api/speech/asr",
|
||||
settings=GradiumSTTService.Settings(
|
||||
language=Language.EN,
|
||||
),
|
||||
@@ -62,7 +61,6 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
|
||||
tts = GradiumTTSService(
|
||||
api_key=os.environ["GRADIUM_API_KEY"],
|
||||
url="wss://us.api.gradium.ai/api/speech/tts",
|
||||
settings=GradiumTTSService.Settings(
|
||||
voice="YTpq7expH9539ERJ",
|
||||
),
|
||||
|
||||
@@ -103,7 +103,7 @@ piper = [ "piper-tts>=1.3.0,<2", "requests>=2.32.5,<3" ]
|
||||
qwen = []
|
||||
resembleai = [ "pipecat-ai[websockets-base]" ]
|
||||
rime = [ "pipecat-ai[websockets-base]" ]
|
||||
runner = [ "python-dotenv>=1.0.0,<2.0.0", "uvicorn>=0.32.0,<1.0.0", "fastapi>=0.115.6,<1", "pipecat-ai-small-webrtc-prebuilt>=2.5.0"]
|
||||
runner = [ "python-dotenv>=1.0.0,<2.0.0", "uvicorn>=0.32.0,<1.0.0", "fastapi>=0.115.6,<1", "pipecat-ai-prebuilt>=1.0.0"]
|
||||
sagemaker = ["aws_sdk_sagemaker_runtime_http2; python_version>='3.12'"]
|
||||
sambanova = []
|
||||
sarvam = [ "sarvamai==0.1.28", "pipecat-ai[websockets-base]" ]
|
||||
@@ -119,6 +119,7 @@ tavus = [ "pipecat-ai[daily]" ]
|
||||
together = []
|
||||
tracing = [ "opentelemetry-sdk>=1.33.0,<2", "opentelemetry-api>=1.33.0,<2", "opentelemetry-instrumentation>=0.54b0,<1" ]
|
||||
ultravox = [ "pipecat-ai[websockets-base]" ]
|
||||
vonage-video-connector = [ "vonage-video-connector~=0.2.3b0; python_full_version>='3.13' and python_full_version<'3.14' and platform_system=='Linux'" ]
|
||||
webrtc = [ "aiortc>=1.14.0,<2", "opencv-python>=4.11.0.86,<5" ]
|
||||
websocket = [ "pipecat-ai[websockets-base]", "fastapi>=0.115.6,<1" ]
|
||||
websockets-base = [ "websockets>=13.1,<16.0" ]
|
||||
|
||||
@@ -242,6 +242,7 @@ TESTS_VIDEO_AVATAR = [
|
||||
|
||||
TESTS_TURN_MANAGEMENT = [
|
||||
("turn-management/turn-management-filter-incomplete-turns.py", EVAL_COMPLETE_TURN),
|
||||
("turn-management/turn-management-filter-incomplete-turns-function-calling.py", EVAL_WEATHER),
|
||||
]
|
||||
|
||||
TESTS_THINKING = [
|
||||
|
||||
@@ -28,7 +28,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use Google AI, you need to `pip install pipecat-ai[google]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
class GeminiLLMInvocationParams(TypedDict):
|
||||
|
||||
@@ -23,7 +23,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use the Koala filter, you need to `pip install pipecat-ai[koala]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
class KoalaFilter(BaseAudioFilter):
|
||||
|
||||
@@ -27,7 +27,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use KrispVivaFilter, you need to install krisp_audio.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
class KrispVivaFilter(BaseAudioFilter):
|
||||
|
||||
@@ -28,7 +28,7 @@ except ModuleNotFoundError as e:
|
||||
logger.error(
|
||||
"In order to use the soundfile mixer, you need to `pip install pipecat-ai[soundfile]`."
|
||||
)
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
class SoundfileMixer(BaseAudioMixer):
|
||||
|
||||
@@ -27,7 +27,7 @@ except ModuleNotFoundError as e:
|
||||
logger.error(
|
||||
"In order to use the LocalSmartTurnAnalyzer, you need to `pip install pipecat-ai[local-smart-turn]`."
|
||||
)
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
class LocalCoreMLSmartTurnAnalyzer(BaseSmartTurn):
|
||||
|
||||
@@ -33,7 +33,7 @@ except ModuleNotFoundError as e:
|
||||
logger.error(
|
||||
"In order to use LocalSmartTurnAnalyzerV2, you need to `pip install pipecat-ai[local-smart-turn]`."
|
||||
)
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
class LocalSmartTurnAnalyzerV2(BaseSmartTurn):
|
||||
|
||||
@@ -28,7 +28,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use KrispVivaVADAnalyzer, you need to install krisp_audio.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
class KrispVivaVadAnalyzer(VADAnalyzer):
|
||||
|
||||
@@ -27,7 +27,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use Silero VAD, you need to `pip install pipecat-ai`.")
|
||||
raise Exception(f"Missing module(s): {e}")
|
||||
raise ImportError(f"Missing module(s): {e}") from e
|
||||
|
||||
|
||||
class SileroOnnxModel:
|
||||
|
||||
@@ -22,7 +22,7 @@ try:
|
||||
from langchain_core.runnables import Runnable
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error("In order to use Langchain, you need to `pip install pipecat-ai[langchain]`. ")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
class LangchainProcessor(FrameProcessor):
|
||||
|
||||
@@ -21,7 +21,7 @@ try:
|
||||
from strands.multiagent.graph import Graph
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error("In order to use Strands Agents, you need to `pip install strands-agents`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
class StrandsAgentsProcessor(FrameProcessor):
|
||||
|
||||
@@ -33,7 +33,7 @@ except ModuleNotFoundError as e:
|
||||
logger.error(
|
||||
"In order to use GStreamer, you need to `pip install pipecat-ai[gstreamer]`. Also, you need to install GStreamer in your system."
|
||||
)
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
class GStreamerPipelineSource(FrameProcessor):
|
||||
|
||||
@@ -17,7 +17,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use Sentry, you need to `pip install pipecat-ai[sentry]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
from pipecat.processors.metrics.frame_processor_metrics import FrameProcessorMetrics
|
||||
|
||||
|
||||
@@ -19,6 +19,10 @@ All bots must implement a `bot(runner_args)` async function as the entry point.
|
||||
The server automatically discovers and executes this function when connections
|
||||
are established.
|
||||
|
||||
By default the runner starts a single FastAPI server that supports WebRTC, Daily,
|
||||
and telephony transports simultaneously. Clients declare which transport they want
|
||||
via the ``transport`` field in the ``/start`` request body (default: ``"webrtc"``).
|
||||
|
||||
Single transport example::
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
@@ -55,14 +59,33 @@ Supported transports:
|
||||
- WebRTC - Provides local WebRTC interface with prebuilt UI
|
||||
- Telephony - Handles webhook and WebSocket connections for Twilio, Telnyx, Plivo, Exotel
|
||||
|
||||
The ``/start`` endpoint accepts::
|
||||
|
||||
{
|
||||
"transport": "webrtc", // "webrtc" | "daily" | "twilio" | "telnyx" |
|
||||
// "plivo" | "exotel" — default: "webrtc"
|
||||
|
||||
// WebRTC-specific
|
||||
"enableDefaultIceServers": false,
|
||||
"body": {...},
|
||||
|
||||
// Daily-specific
|
||||
"createDailyRoom": true,
|
||||
"dailyRoomProperties": {...},
|
||||
"dailyMeetingTokenProperties": {...},
|
||||
"body": {...}
|
||||
}
|
||||
|
||||
To run locally:
|
||||
|
||||
- WebRTC: `python bot.py -t webrtc`
|
||||
- ESP32: `python bot.py -t webrtc --esp32 --host 192.168.1.100`
|
||||
- Daily (server): `python bot.py -t daily`
|
||||
- Daily (direct, testing only): `python bot.py -d`
|
||||
- Telephony: `python bot.py -t twilio -x your_username.ngrok.io`
|
||||
- Exotel: `python bot.py -t exotel` (no proxy needed, but ngrok connection to HTTP 7860 is required)
|
||||
- All transports (default): ``python bot.py``
|
||||
- WebRTC only: ``python bot.py -t webrtc``
|
||||
- ESP32: ``python bot.py -t webrtc --esp32 --host 192.168.1.100``
|
||||
- Daily only: ``python bot.py -t daily``
|
||||
- Daily (direct, testing only): ``python bot.py -d``
|
||||
- Telephony: ``python bot.py -t twilio -x your_username.ngrok.io``
|
||||
- Exotel: ``python bot.py -t exotel`` (no proxy needed, but ngrok connection to HTTP 7860 is required)
|
||||
- WhatsApp: ``python bot.py --whatsapp``
|
||||
"""
|
||||
|
||||
import argparse
|
||||
@@ -85,8 +108,10 @@ from pipecat.runner.types import (
|
||||
DailyRunnerArguments,
|
||||
RunnerArguments,
|
||||
SmallWebRTCRunnerArguments,
|
||||
VonageRunnerArguments,
|
||||
WebSocketRunnerArguments,
|
||||
)
|
||||
from pipecat.runner.vonage import configure as configure_vonage
|
||||
|
||||
try:
|
||||
import uvicorn
|
||||
@@ -186,8 +211,33 @@ async def _run_telephony_bot(websocket: WebSocket, args: argparse.Namespace):
|
||||
await bot_module.bot(runner_args)
|
||||
|
||||
|
||||
async def _run_websocket_bot(websocket: WebSocket, args: argparse.Namespace):
|
||||
"""Run a bot for plain WebSocket transport."""
|
||||
bot_module = _get_bot_module()
|
||||
|
||||
runner_args = WebSocketRunnerArguments(
|
||||
websocket=websocket,
|
||||
transport_type="websocket",
|
||||
session_id=str(uuid.uuid4()),
|
||||
)
|
||||
runner_args.cli_args = args
|
||||
|
||||
await bot_module.bot(runner_args)
|
||||
|
||||
|
||||
def _setup_websocket_routes(app: FastAPI, args: argparse.Namespace):
|
||||
"""Set up the plain WebSocket route at ``/ws-client``."""
|
||||
|
||||
@app.websocket("/ws-client")
|
||||
async def websocket_client_endpoint(websocket: WebSocket):
|
||||
"""Handle plain WebSocket connections (non-telephony)."""
|
||||
await websocket.accept()
|
||||
logger.debug("Plain WebSocket connection accepted")
|
||||
await _run_websocket_bot(websocket, args)
|
||||
|
||||
|
||||
def _configure_server_app(args: argparse.Namespace):
|
||||
"""Configure the module-level FastAPI app with transport-specific routes."""
|
||||
"""Configure the module-level FastAPI app with routes for all transports."""
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
@@ -196,17 +246,198 @@ def _configure_server_app(args: argparse.Namespace):
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# Set up transport-specific routes
|
||||
if args.transport == "webrtc":
|
||||
_setup_webrtc_routes(app, args)
|
||||
if args.whatsapp:
|
||||
_setup_whatsapp_routes(app, args)
|
||||
elif args.transport == "daily":
|
||||
_setup_daily_routes(app, args)
|
||||
elif args.transport in TELEPHONY_TRANSPORTS:
|
||||
_setup_telephony_routes(app, args)
|
||||
else:
|
||||
logger.warning(f"Unknown transport type: {args.transport}")
|
||||
# Shared session store: session_id -> body data. Used by the WebRTC /start
|
||||
# flow and the /sessions/{session_id}/... proxy routes.
|
||||
active_sessions: dict[str, dict[str, Any]] = {}
|
||||
|
||||
_setup_frontend_routes(app)
|
||||
_setup_webrtc_routes(app, args, active_sessions)
|
||||
_setup_daily_routes(app, args)
|
||||
_setup_telephony_routes(app, args)
|
||||
_setup_websocket_routes(app, args)
|
||||
_setup_unified_start_route(app, args, active_sessions)
|
||||
|
||||
if args.whatsapp:
|
||||
_setup_whatsapp_routes(app, args)
|
||||
|
||||
|
||||
def _setup_unified_start_route(
|
||||
app: FastAPI, args: argparse.Namespace, active_sessions: dict[str, dict[str, Any]]
|
||||
):
|
||||
"""Register the unified POST /start and GET /status endpoints.
|
||||
|
||||
Handles WebRTC, Daily, and telephony transport start flows. Clients specify
|
||||
which transport they want via the ``transport`` field in the request body.
|
||||
When ``-t`` was passed on the command line, requests for any other transport
|
||||
are rejected with HTTP 400.
|
||||
"""
|
||||
ALL_TRANSPORTS = ["webrtc", "daily", *TELEPHONY_TRANSPORTS, "websocket"]
|
||||
|
||||
@app.get("/status")
|
||||
async def status():
|
||||
"""Return the transports supported by this runner instance."""
|
||||
transports = [args.transport] if args.transport is not None else ALL_TRANSPORTS
|
||||
return {"status": "ready", "transports": transports}
|
||||
|
||||
class IceServer(TypedDict, total=False):
|
||||
urls: str | list[str]
|
||||
|
||||
class IceConfig(TypedDict):
|
||||
iceServers: list[IceServer]
|
||||
|
||||
class StartBotResult(TypedDict, total=False):
|
||||
sessionId: str
|
||||
iceConfig: IceConfig | None
|
||||
dailyRoom: str | None
|
||||
dailyToken: str | None
|
||||
wsUrl: str | None
|
||||
token: str | None
|
||||
|
||||
@app.post("/start")
|
||||
async def start_agent(request: Request):
|
||||
"""Start a bot session.
|
||||
|
||||
Accepts::
|
||||
|
||||
{
|
||||
"transport": "webrtc", // "webrtc" | "daily" | "twilio" | "telnyx" |
|
||||
// "plivo" | "exotel" — default: "webrtc"
|
||||
|
||||
// WebRTC-specific
|
||||
"enableDefaultIceServers": false,
|
||||
"body": {...},
|
||||
|
||||
// Daily-specific
|
||||
"createDailyRoom": true,
|
||||
"dailyRoomProperties": {...},
|
||||
"dailyMeetingTokenProperties": {...},
|
||||
"body": {...}
|
||||
}
|
||||
"""
|
||||
try:
|
||||
request_data = await request.json()
|
||||
logger.debug(f"Received request: {request_data}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to parse request body: {e}")
|
||||
request_data = {}
|
||||
|
||||
# Determine transport: explicit field → legacy Daily hint → CLI default → webrtc
|
||||
transport = request_data.get("transport")
|
||||
if transport is None and request_data.get("createDailyRoom", False):
|
||||
transport = "daily"
|
||||
if transport is None:
|
||||
transport = args.transport or "webrtc"
|
||||
|
||||
# Enforce restriction when -t was explicitly set on the command line
|
||||
if args.transport is not None and transport != args.transport:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=(
|
||||
f"Transport '{transport}' is not allowed. "
|
||||
f"Server is configured for '{args.transport}' only (-t {args.transport})."
|
||||
),
|
||||
)
|
||||
|
||||
if transport == "webrtc":
|
||||
# WebRTC: register the session; the bot starts when the WebRTC offer arrives.
|
||||
session_id = str(uuid.uuid4())
|
||||
active_sessions[session_id] = request_data.get("body", {})
|
||||
|
||||
result = StartBotResult(
|
||||
sessionId=session_id,
|
||||
)
|
||||
if request_data.get("enableDefaultIceServers"):
|
||||
result["iceConfig"] = IceConfig(
|
||||
iceServers=[IceServer(urls=["stun:stun.l.google.com:19302"])]
|
||||
)
|
||||
return result
|
||||
|
||||
elif transport == "daily":
|
||||
create_daily_room = request_data.get("createDailyRoom", False)
|
||||
body = request_data.get("body", {})
|
||||
daily_room_properties_dict = request_data.get("dailyRoomProperties", None)
|
||||
daily_token_properties_dict = request_data.get("dailyMeetingTokenProperties", None)
|
||||
|
||||
bot_module = _get_bot_module()
|
||||
|
||||
existing_room_url = os.getenv("DAILY_ROOM_URL")
|
||||
session_id = str(uuid.uuid4())
|
||||
result: StartBotResult | None = None
|
||||
|
||||
if create_daily_room or existing_room_url:
|
||||
from pipecat.runner.daily import configure
|
||||
from pipecat.transports.daily.utils import (
|
||||
DailyMeetingTokenProperties,
|
||||
DailyRoomProperties,
|
||||
)
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
room_properties = None
|
||||
if daily_room_properties_dict:
|
||||
daily_room_properties_dict.setdefault(
|
||||
"exp", time.time() + PIPECAT_ROOM_EXP_HOURS * 3600
|
||||
)
|
||||
daily_room_properties_dict.setdefault("eject_at_room_exp", True)
|
||||
try:
|
||||
room_properties = DailyRoomProperties(**daily_room_properties_dict)
|
||||
logger.debug(f"Using custom room properties: {room_properties}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to parse dailyRoomProperties: {e}")
|
||||
|
||||
token_properties = None
|
||||
if daily_token_properties_dict:
|
||||
try:
|
||||
token_properties = DailyMeetingTokenProperties(
|
||||
**daily_token_properties_dict
|
||||
)
|
||||
logger.debug(f"Using custom token properties: {token_properties}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to parse dailyMeetingTokenProperties: {e}")
|
||||
|
||||
room_url, token = await configure(
|
||||
session,
|
||||
room_exp_duration=PIPECAT_ROOM_EXP_HOURS,
|
||||
room_properties=room_properties,
|
||||
token_properties=token_properties,
|
||||
)
|
||||
runner_args = DailyRunnerArguments(
|
||||
room_url=room_url, token=token, body=body, session_id=session_id
|
||||
)
|
||||
result = StartBotResult(
|
||||
dailyRoom=room_url,
|
||||
dailyToken=token,
|
||||
sessionId=session_id,
|
||||
)
|
||||
else:
|
||||
runner_args = RunnerArguments(body=body, session_id=session_id)
|
||||
|
||||
runner_args.cli_args = args
|
||||
asyncio.create_task(bot_module.bot(runner_args))
|
||||
return result
|
||||
|
||||
elif transport in TELEPHONY_TRANSPORTS:
|
||||
# Telephony: the bot starts when the provider connects to /ws.
|
||||
# Return the WebSocket URL so the caller knows where to point their provider.
|
||||
scheme = "wss" if args.host != "localhost" else "ws"
|
||||
return StartBotResult(
|
||||
wsUrl=f"{scheme}://{args.host}:{args.port}/ws",
|
||||
)
|
||||
|
||||
elif transport == "websocket":
|
||||
# Plain WebSocket: the bot starts when the client connects to /ws-client.
|
||||
scheme = "wss" if args.host != "localhost" else "ws"
|
||||
session_id = str(uuid.uuid4())
|
||||
return StartBotResult(
|
||||
wsUrl=f"{scheme}://{args.host}:{args.port}/ws-client",
|
||||
sessionId=session_id,
|
||||
token="mock_token",
|
||||
)
|
||||
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Unknown transport '{transport}'.",
|
||||
)
|
||||
|
||||
|
||||
def _resolve_download_path(folder: str, filename: str) -> Path:
|
||||
@@ -220,11 +451,27 @@ def _resolve_download_path(folder: str, filename: str) -> Path:
|
||||
return file_path
|
||||
|
||||
|
||||
def _setup_webrtc_routes(app: FastAPI, args: argparse.Namespace):
|
||||
def _setup_frontend_routes(app: FastAPI):
|
||||
"""Mount the prebuilt frontend UI and root redirect for all transports."""
|
||||
try:
|
||||
from pipecat_ai_prebuilt.frontend import PipecatPrebuiltUI
|
||||
except ImportError as e:
|
||||
logger.error(f"Prebuilt frontend not available: {e}")
|
||||
return
|
||||
|
||||
app.mount("/client", PipecatPrebuiltUI)
|
||||
|
||||
@app.get("/", include_in_schema=False)
|
||||
async def root_redirect():
|
||||
"""Redirect root requests to client interface."""
|
||||
return RedirectResponse(url="/client/")
|
||||
|
||||
|
||||
def _setup_webrtc_routes(
|
||||
app: FastAPI, args: argparse.Namespace, active_sessions: dict[str, dict[str, Any]]
|
||||
):
|
||||
"""Set up WebRTC-specific routes."""
|
||||
try:
|
||||
from pipecat_ai_small_webrtc_prebuilt.frontend import SmallWebRTCPrebuiltUI
|
||||
|
||||
from pipecat.transports.smallwebrtc.connection import SmallWebRTCConnection
|
||||
from pipecat.transports.smallwebrtc.request_handler import (
|
||||
IceCandidate,
|
||||
@@ -236,27 +483,6 @@ def _setup_webrtc_routes(app: FastAPI, args: argparse.Namespace):
|
||||
logger.error(f"WebRTC transport dependencies not installed: {e}")
|
||||
return
|
||||
|
||||
class IceServer(TypedDict, total=False):
|
||||
urls: str | list[str]
|
||||
|
||||
class IceConfig(TypedDict):
|
||||
iceServers: list[IceServer]
|
||||
|
||||
class StartBotResult(TypedDict, total=False):
|
||||
sessionId: str
|
||||
iceConfig: IceConfig | None
|
||||
|
||||
# In-memory store of active sessions: session_id -> session info
|
||||
active_sessions: dict[str, dict[str, Any]] = {}
|
||||
|
||||
# Mount the frontend
|
||||
app.mount("/client", SmallWebRTCPrebuiltUI)
|
||||
|
||||
@app.get("/", include_in_schema=False)
|
||||
async def root_redirect():
|
||||
"""Redirect root requests to client interface."""
|
||||
return RedirectResponse(url="/client/")
|
||||
|
||||
@app.get("/files/{filename:path}")
|
||||
async def download_file(filename: str):
|
||||
"""Handle file downloads."""
|
||||
@@ -315,29 +541,6 @@ def _setup_webrtc_routes(app: FastAPI, args: argparse.Namespace):
|
||||
await small_webrtc_handler.handle_patch_request(request)
|
||||
return {"status": "success"}
|
||||
|
||||
@app.post("/start")
|
||||
async def rtvi_start(request: Request):
|
||||
"""Mimic Pipecat Cloud's /start endpoint."""
|
||||
# Parse the request body
|
||||
try:
|
||||
request_data = await request.json()
|
||||
logger.debug(f"Received request: {request_data}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to parse request body: {e}")
|
||||
request_data = {}
|
||||
|
||||
# Store session info immediately in memory, replicate the behavior expected on Pipecat Cloud
|
||||
session_id = str(uuid.uuid4())
|
||||
active_sessions[session_id] = request_data.get("body", {})
|
||||
|
||||
result: StartBotResult = {"sessionId": session_id}
|
||||
if request_data.get("enableDefaultIceServers"):
|
||||
result["iceConfig"] = IceConfig(
|
||||
iceServers=[IceServer(urls=["stun:stun.l.google.com:19302"])]
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
@app.api_route(
|
||||
"/sessions/{session_id}/{path:path}",
|
||||
methods=["GET", "POST", "PUT", "PATCH", "DELETE"],
|
||||
@@ -563,12 +766,10 @@ def _setup_whatsapp_routes(app: FastAPI, args: argparse.Namespace):
|
||||
def _setup_daily_routes(app: FastAPI, args: argparse.Namespace):
|
||||
"""Set up Daily-specific routes."""
|
||||
|
||||
@app.get("/")
|
||||
@app.get("/daily")
|
||||
async def create_room_and_start_agent():
|
||||
"""Launch a Daily bot and redirect to room."""
|
||||
print("Starting bot with Daily transport and redirecting to Daily room")
|
||||
|
||||
import aiohttp
|
||||
logger.debug("Starting bot with Daily transport and redirecting to Daily room")
|
||||
|
||||
from pipecat.runner.daily import configure
|
||||
|
||||
@@ -584,105 +785,6 @@ def _setup_daily_routes(app: FastAPI, args: argparse.Namespace):
|
||||
asyncio.create_task(bot_module.bot(runner_args))
|
||||
return RedirectResponse(room_url)
|
||||
|
||||
@app.post("/start")
|
||||
async def start_agent(request: Request):
|
||||
"""Handler for /start endpoints.
|
||||
|
||||
Expects POST body like::
|
||||
{
|
||||
"createDailyRoom": true,
|
||||
"dailyRoomProperties": { "start_video_off": true },
|
||||
"dailyMeetingTokenProperties": { "is_owner": true, "user_name": "Bot" },
|
||||
"body": { "custom_data": "value" }
|
||||
}
|
||||
"""
|
||||
print("Starting bot with Daily transport")
|
||||
|
||||
# Parse the request body
|
||||
try:
|
||||
request_data = await request.json()
|
||||
logger.debug(f"Received request: {request_data}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to parse request body: {e}")
|
||||
request_data = {}
|
||||
|
||||
create_daily_room = request_data.get("createDailyRoom", False)
|
||||
body = request_data.get("body", {})
|
||||
daily_room_properties_dict = request_data.get("dailyRoomProperties", None)
|
||||
daily_token_properties_dict = request_data.get("dailyMeetingTokenProperties", None)
|
||||
|
||||
bot_module = _get_bot_module()
|
||||
|
||||
existing_room_url = os.getenv("DAILY_ROOM_URL")
|
||||
|
||||
session_id = str(uuid.uuid4())
|
||||
result = None
|
||||
|
||||
# Configure room if:
|
||||
# 1. Explicitly requested via createDailyRoom in payload
|
||||
# 2. Using pre-configured room from DAILY_ROOM_URL env var
|
||||
if create_daily_room or existing_room_url:
|
||||
import aiohttp
|
||||
|
||||
from pipecat.runner.daily import configure
|
||||
from pipecat.transports.daily.utils import (
|
||||
DailyMeetingTokenProperties,
|
||||
DailyRoomProperties,
|
||||
)
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
# Parse dailyRoomProperties if provided
|
||||
room_properties = None
|
||||
if daily_room_properties_dict:
|
||||
# Apply Pipecat Cloud's session policy if caller didn't override.
|
||||
daily_room_properties_dict.setdefault(
|
||||
"exp", time.time() + PIPECAT_ROOM_EXP_HOURS * 3600
|
||||
)
|
||||
daily_room_properties_dict.setdefault("eject_at_room_exp", True)
|
||||
try:
|
||||
room_properties = DailyRoomProperties(**daily_room_properties_dict)
|
||||
logger.debug(f"Using custom room properties: {room_properties}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to parse dailyRoomProperties: {e}")
|
||||
# Continue without custom properties
|
||||
|
||||
# Parse dailyMeetingTokenProperties if provided
|
||||
token_properties = None
|
||||
if daily_token_properties_dict:
|
||||
try:
|
||||
token_properties = DailyMeetingTokenProperties(
|
||||
**daily_token_properties_dict
|
||||
)
|
||||
logger.debug(f"Using custom token properties: {token_properties}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to parse dailyMeetingTokenProperties: {e}")
|
||||
# Continue without custom properties
|
||||
|
||||
room_url, token = await configure(
|
||||
session,
|
||||
room_exp_duration=PIPECAT_ROOM_EXP_HOURS,
|
||||
room_properties=room_properties,
|
||||
token_properties=token_properties,
|
||||
)
|
||||
runner_args = DailyRunnerArguments(
|
||||
room_url=room_url, token=token, body=body, session_id=session_id
|
||||
)
|
||||
result = {
|
||||
"dailyRoom": room_url,
|
||||
"dailyToken": token,
|
||||
"sessionId": session_id,
|
||||
}
|
||||
else:
|
||||
runner_args = RunnerArguments(body=body, session_id=session_id)
|
||||
|
||||
# Update CLI args.
|
||||
runner_args.cli_args = args
|
||||
|
||||
# Start the bot in the background
|
||||
asyncio.create_task(bot_module.bot(runner_args))
|
||||
|
||||
return result
|
||||
|
||||
if args.dialin:
|
||||
|
||||
@app.post("/daily-dialin-webhook")
|
||||
@@ -731,8 +833,6 @@ def _setup_daily_routes(app: FastAPI, args: argparse.Namespace):
|
||||
detail="Missing required fields: From, To, callId, callDomain",
|
||||
)
|
||||
|
||||
import aiohttp
|
||||
|
||||
from pipecat.runner.daily import configure
|
||||
from pipecat.runner.types import DailyDialinRequest, DialinSettings
|
||||
|
||||
@@ -801,44 +901,51 @@ def _setup_daily_routes(app: FastAPI, args: argparse.Namespace):
|
||||
|
||||
|
||||
def _setup_telephony_routes(app: FastAPI, args: argparse.Namespace):
|
||||
"""Set up telephony-specific routes."""
|
||||
# XML response templates (Exotel doesn't use XML webhooks)
|
||||
XML_TEMPLATES = {
|
||||
"twilio": f"""<?xml version="1.0" encoding="UTF-8"?>
|
||||
"""Set up telephony-specific routes.
|
||||
|
||||
The WebSocket endpoint (``/ws``) is always registered so providers can
|
||||
connect directly. The XML webhook (``POST /``) is only registered when a
|
||||
specific telephony transport is chosen via ``-t`` because the XML template
|
||||
is provider-specific and requires a proxy hostname (``--proxy``).
|
||||
"""
|
||||
if args.transport in TELEPHONY_TRANSPORTS:
|
||||
# XML response templates (Exotel doesn't use XML webhooks)
|
||||
XML_TEMPLATES = {
|
||||
"twilio": f"""<?xml version="1.0" encoding="UTF-8"?>
|
||||
<Response>
|
||||
<Connect>
|
||||
<Stream url="wss://{args.proxy}/ws"></Stream>
|
||||
</Connect>
|
||||
<Pause length="40"/>
|
||||
</Response>""",
|
||||
"telnyx": f"""<?xml version="1.0" encoding="UTF-8"?>
|
||||
"telnyx": f"""<?xml version="1.0" encoding="UTF-8"?>
|
||||
<Response>
|
||||
<Connect>
|
||||
<Stream url="wss://{args.proxy}/ws" bidirectionalMode="rtp"></Stream>
|
||||
</Connect>
|
||||
<Pause length="40"/>
|
||||
</Response>""",
|
||||
"plivo": f"""<?xml version="1.0" encoding="UTF-8"?>
|
||||
"plivo": f"""<?xml version="1.0" encoding="UTF-8"?>
|
||||
<Response>
|
||||
<Stream bidirectional="true" keepCallAlive="true" contentType="audio/x-mulaw;rate=8000">wss://{args.proxy}/ws</Stream>
|
||||
</Response>""",
|
||||
}
|
||||
}
|
||||
|
||||
@app.post("/")
|
||||
async def start_call():
|
||||
"""Handle telephony webhook and return XML response."""
|
||||
if args.transport == "exotel":
|
||||
# Exotel doesn't use POST webhooks - redirect to proper documentation
|
||||
logger.debug("POST Exotel endpoint - not used")
|
||||
return {
|
||||
"error": "Exotel doesn't use POST webhooks",
|
||||
"websocket_url": f"wss://{args.proxy}/ws",
|
||||
"note": "Configure the WebSocket URL above in your Exotel App Bazaar Voicebot Applet",
|
||||
}
|
||||
else:
|
||||
logger.debug(f"POST {args.transport.upper()} XML")
|
||||
xml_content = XML_TEMPLATES.get(args.transport, "<Response></Response>")
|
||||
return HTMLResponse(content=xml_content, media_type="application/xml")
|
||||
@app.post("/")
|
||||
async def start_call():
|
||||
"""Handle telephony webhook and return XML response."""
|
||||
if args.transport == "exotel":
|
||||
# Exotel doesn't use POST webhooks - redirect to proper documentation
|
||||
logger.debug("POST Exotel endpoint - not used")
|
||||
return {
|
||||
"error": "Exotel doesn't use POST webhooks",
|
||||
"websocket_url": f"wss://{args.proxy}/ws",
|
||||
"note": "Configure the WebSocket URL above in your Exotel App Bazaar Voicebot Applet",
|
||||
}
|
||||
else:
|
||||
logger.debug(f"POST {args.transport.upper()} XML")
|
||||
xml_content = XML_TEMPLATES.get(args.transport, "<Response></Response>")
|
||||
return HTMLResponse(content=xml_content, media_type="application/xml")
|
||||
|
||||
@app.websocket("/ws")
|
||||
async def websocket_endpoint(websocket: WebSocket):
|
||||
@@ -847,11 +954,6 @@ def _setup_telephony_routes(app: FastAPI, args: argparse.Namespace):
|
||||
logger.debug("WebSocket connection accepted")
|
||||
await _run_telephony_bot(websocket, args)
|
||||
|
||||
@app.get("/")
|
||||
async def start_agent():
|
||||
"""Simple status endpoint for telephony transports."""
|
||||
return {"status": f"Bot started with {args.transport}"}
|
||||
|
||||
|
||||
async def _run_daily_direct(args: argparse.Namespace):
|
||||
"""Run Daily bot with direct connection (no FastAPI server)."""
|
||||
@@ -883,6 +985,25 @@ async def _run_daily_direct(args: argparse.Namespace):
|
||||
await bot_module.bot(runner_args)
|
||||
|
||||
|
||||
async def _run_vonage():
|
||||
"""Run Vonage bot (no FastAPI server)."""
|
||||
logger.info("Running Vonage transport...")
|
||||
|
||||
application_id, session_id, token = await configure_vonage()
|
||||
runner_args = VonageRunnerArguments(
|
||||
application_id=application_id, vonage_session_id=session_id, token=token
|
||||
)
|
||||
runner_args.handle_sigint = True
|
||||
|
||||
# Get the bot module and run it directly
|
||||
bot_module = _get_bot_module()
|
||||
|
||||
print(f"Joining Vonage session: {runner_args.vonage_session_id}")
|
||||
print()
|
||||
|
||||
await bot_module.bot(runner_args)
|
||||
|
||||
|
||||
def _validate_and_clean_proxy(proxy: str) -> str:
|
||||
"""Validate and clean proxy hostname, removing protocol if present."""
|
||||
if not proxy:
|
||||
@@ -922,22 +1043,27 @@ def runner_port() -> int:
|
||||
def main(parser: argparse.ArgumentParser | None = None):
|
||||
"""Start the Pipecat development runner.
|
||||
|
||||
Parses command-line arguments and starts a FastAPI server configured
|
||||
for the specified transport type.
|
||||
Parses command-line arguments and starts a FastAPI server that supports
|
||||
WebRTC, Daily, and telephony transports simultaneously. Clients declare
|
||||
which transport to use via the ``transport`` field in the ``/start`` body.
|
||||
|
||||
When ``-t`` is provided, the server restricts ``/start`` to that transport
|
||||
only and displays transport-specific startup information.
|
||||
|
||||
The runner discovers and runs any ``bot(runner_args)`` function found in the
|
||||
calling module.
|
||||
|
||||
Command-line arguments:
|
||||
- --host: Server host address (default: localhost) 879
|
||||
- --host: Server host address (default: localhost)
|
||||
- --port: Server port (default: 7860)
|
||||
- -t/--transport: Transport type (daily, webrtc, twilio, telnyx, plivo, exotel)
|
||||
- -t/--transport: Restrict to a single transport and set as default for /start
|
||||
(daily, webrtc, twilio, telnyx, plivo, exotel). Omit to support all transports.
|
||||
- -x/--proxy: Public proxy hostname for telephony webhooks
|
||||
- -d/--direct: Connect directly to Daily room (automatically sets transport to daily)
|
||||
- -f/--folder: Path to downloads folder
|
||||
- --dialin: Enable Daily PSTN dial-in webhook handling (requires Daily transport)
|
||||
- --dialin: Enable Daily PSTN dial-in webhook handling
|
||||
- --esp32: Enable SDP munging for ESP32 compatibility (requires --host with IP address)
|
||||
- --whatsapp: Ensure requried WhatsApp environment variables are present
|
||||
- --whatsapp: Ensure required WhatsApp environment variables are present
|
||||
- -v/--verbose: Increase logging verbosity
|
||||
|
||||
Args:
|
||||
@@ -957,9 +1083,12 @@ def main(parser: argparse.ArgumentParser | None = None):
|
||||
"-t",
|
||||
"--transport",
|
||||
type=str,
|
||||
choices=["daily", "webrtc", *TELEPHONY_TRANSPORTS],
|
||||
default="webrtc",
|
||||
help="Transport type",
|
||||
choices=["daily", "vonage", "webrtc", *TELEPHONY_TRANSPORTS],
|
||||
default=None,
|
||||
help=(
|
||||
"Restrict the server to a single transport and set it as the default for /start. "
|
||||
"Omit to support all transports simultaneously (default behaviour)."
|
||||
),
|
||||
)
|
||||
parser.add_argument("-x", "--proxy", help="Public proxy host name")
|
||||
parser.add_argument(
|
||||
@@ -977,7 +1106,7 @@ def main(parser: argparse.ArgumentParser | None = None):
|
||||
"--dialin",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Enable Daily PSTN dial-in webhook handling (requires Daily transport)",
|
||||
help="Enable Daily PSTN dial-in webhook handling",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--esp32",
|
||||
@@ -989,7 +1118,7 @@ def main(parser: argparse.ArgumentParser | None = None):
|
||||
"--whatsapp",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Ensure requried WhatsApp environment variables are present",
|
||||
help="Ensure required WhatsApp environment variables are present",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
@@ -998,12 +1127,13 @@ def main(parser: argparse.ArgumentParser | None = None):
|
||||
if args.proxy:
|
||||
args.proxy = _validate_and_clean_proxy(args.proxy)
|
||||
|
||||
# Auto-set transport to daily if --direct is used without explicit transport
|
||||
if args.direct and args.transport == "webrtc": # webrtc is the default
|
||||
args.transport = "daily"
|
||||
elif args.direct and args.transport != "daily":
|
||||
logger.error("--direct flag only works with Daily transport (-t daily)")
|
||||
return
|
||||
# --direct implies Daily transport
|
||||
if args.direct:
|
||||
if args.transport is None or args.transport == "daily":
|
||||
args.transport = "daily"
|
||||
else:
|
||||
logger.error("--direct flag only works with Daily transport (-t daily)")
|
||||
return
|
||||
|
||||
# Validate ESP32 requirements
|
||||
if args.esp32 and args.host == "localhost":
|
||||
@@ -1011,7 +1141,7 @@ def main(parser: argparse.ArgumentParser | None = None):
|
||||
return
|
||||
|
||||
# Validate dial-in requirements
|
||||
if args.dialin and args.transport != "daily":
|
||||
if args.dialin and args.transport is not None and args.transport != "daily":
|
||||
logger.error("--dialin flag only works with Daily transport (-t daily)")
|
||||
return
|
||||
|
||||
@@ -1029,28 +1159,44 @@ def main(parser: argparse.ArgumentParser | None = None):
|
||||
asyncio.run(_run_daily_direct(args))
|
||||
return
|
||||
|
||||
# Print startup message for server-based transports
|
||||
if args.transport == "webrtc":
|
||||
print()
|
||||
# Print startup message
|
||||
print()
|
||||
if args.transport is None:
|
||||
print("🚀 Bot ready!")
|
||||
print(f" → WebRTC: http://{args.host}:{args.port}/client")
|
||||
print(f" → Daily: http://{args.host}:{args.port}/daily")
|
||||
print(f" → Telephony: ws://{args.host}:{args.port}/ws")
|
||||
elif args.transport == "webrtc":
|
||||
if args.esp32:
|
||||
print(f"🚀 Bot ready! (ESP32 mode)")
|
||||
print("🚀 Bot ready! (ESP32 mode)")
|
||||
elif args.whatsapp:
|
||||
print(f"🚀 Bot ready! (WhatsApp)")
|
||||
print("🚀 Bot ready! (WhatsApp)")
|
||||
else:
|
||||
print(f"🚀 Bot ready!")
|
||||
print("🚀 Bot ready! (WebRTC)")
|
||||
print(f" → Open http://{args.host}:{args.port}/client in your browser")
|
||||
print()
|
||||
elif args.transport == "daily":
|
||||
print()
|
||||
print(f"🚀 Bot ready!")
|
||||
print("🚀 Bot ready! (Daily)")
|
||||
if args.dialin:
|
||||
print(
|
||||
f" → Daily dial-in webhook: http://{args.host}:{args.port}/daily-dialin-webhook"
|
||||
)
|
||||
print(f" → Configure this URL in your Daily phone number settings")
|
||||
else:
|
||||
print(f" → Open http://{args.host}:{args.port} in your browser to start a session")
|
||||
print(
|
||||
f" → Open http://{args.host}:{args.port}/daily in your browser to start a session"
|
||||
)
|
||||
elif args.transport in TELEPHONY_TRANSPORTS:
|
||||
print(f"🚀 Bot ready! ({args.transport.capitalize()})")
|
||||
if args.proxy:
|
||||
print(f" → XML webhook: http://{args.host}:{args.port}/")
|
||||
print(f" → WebSocket: ws://{args.host}:{args.port}/ws")
|
||||
elif args.transport == "vonage":
|
||||
print()
|
||||
print(f"🚀 Bot ready!")
|
||||
asyncio.run(_run_vonage())
|
||||
print()
|
||||
return
|
||||
print()
|
||||
|
||||
RUNNER_DOWNLOADS_FOLDER = args.folder
|
||||
RUNNER_HOST = args.host
|
||||
|
||||
@@ -99,16 +99,35 @@ class DailyRunnerArguments(RunnerArguments):
|
||||
token: str | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class VonageRunnerArguments(RunnerArguments):
|
||||
"""Vonage transport session arguments for the runner.
|
||||
|
||||
Parameters:
|
||||
application_id: Vonage application ID
|
||||
vonage_session_id: Vonage session ID
|
||||
token: Vonage Session Token
|
||||
"""
|
||||
|
||||
application_id: str
|
||||
vonage_session_id: str
|
||||
token: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class WebSocketRunnerArguments(RunnerArguments):
|
||||
"""WebSocket transport session arguments for the runner.
|
||||
|
||||
Parameters:
|
||||
websocket: WebSocket connection for audio streaming
|
||||
transport_type: Transport type identifier. Set to ``"websocket"`` for plain
|
||||
WebSocket connections; ``None`` triggers auto-detection from the first
|
||||
telephony provider message.
|
||||
body: Additional request data
|
||||
"""
|
||||
|
||||
websocket: WebSocket
|
||||
transport_type: str | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -33,7 +33,7 @@ import json
|
||||
import os
|
||||
import re
|
||||
from collections.abc import Callable
|
||||
from typing import Any
|
||||
from typing import Any, cast
|
||||
|
||||
from fastapi import WebSocket
|
||||
from loguru import logger
|
||||
@@ -42,9 +42,10 @@ from pipecat.runner.types import (
|
||||
DailyRunnerArguments,
|
||||
LiveKitRunnerArguments,
|
||||
SmallWebRTCRunnerArguments,
|
||||
VonageRunnerArguments,
|
||||
WebSocketRunnerArguments,
|
||||
)
|
||||
from pipecat.transports.base_transport import BaseTransport
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
|
||||
|
||||
def _detect_transport_type_from_message(message_data: dict) -> str:
|
||||
@@ -271,6 +272,14 @@ def get_transport_client_id(transport: BaseTransport, client: Any) -> str:
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
try:
|
||||
from pipecat.transports.vonage.video_connector import VonageVideoConnectorTransport
|
||||
|
||||
if isinstance(transport, VonageVideoConnectorTransport):
|
||||
return client["streamId"]
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
logger.warning(f"Unable to get client id from unsupported transport {type(transport)}")
|
||||
return ""
|
||||
|
||||
@@ -303,6 +312,24 @@ async def maybe_capture_participant_camera(
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
try:
|
||||
from pipecat.transports.vonage.video_connector import (
|
||||
SubscribeSettings,
|
||||
VonageVideoConnectorTransport,
|
||||
)
|
||||
|
||||
if isinstance(transport, VonageVideoConnectorTransport):
|
||||
await transport.subscribe_to_stream(
|
||||
client["streamId"],
|
||||
SubscribeSettings(
|
||||
subscribe_to_audio=True,
|
||||
subscribe_to_video=True,
|
||||
preferred_framerate=framerate if framerate != 0 else None,
|
||||
),
|
||||
)
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
async def maybe_capture_participant_screen(
|
||||
transport: BaseTransport, client: Any, framerate: int = 0
|
||||
@@ -534,6 +561,10 @@ async def create_transport(
|
||||
audio_out_enabled=True,
|
||||
# add_wav_header and serializer will be set automatically
|
||||
),
|
||||
"vonage": lambda: VonageVideoConnectorTransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True
|
||||
),
|
||||
}
|
||||
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
@@ -562,6 +593,12 @@ async def create_transport(
|
||||
)
|
||||
|
||||
elif isinstance(runner_args, WebSocketRunnerArguments):
|
||||
if runner_args.transport_type == "websocket":
|
||||
params = _get_transport_params("websocket", transport_params)
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketTransport
|
||||
|
||||
return FastAPIWebsocketTransport(websocket=runner_args.websocket, params=params)
|
||||
|
||||
# Parse once to determine the provider and get data
|
||||
transport_type, call_data = await parse_telephony_websocket(runner_args.websocket)
|
||||
params = _get_transport_params(transport_type, transport_params)
|
||||
@@ -581,6 +618,31 @@ async def create_transport(
|
||||
runner_args.room_name,
|
||||
params=params,
|
||||
)
|
||||
elif isinstance(runner_args, VonageRunnerArguments):
|
||||
from pipecat.transports.vonage.video_connector import (
|
||||
VonageVideoConnectorTransport,
|
||||
VonageVideoConnectorTransportParams,
|
||||
)
|
||||
|
||||
try:
|
||||
params = cast(
|
||||
VonageVideoConnectorTransportParams,
|
||||
_get_transport_params("vonage", transport_params),
|
||||
)
|
||||
except ValueError:
|
||||
webrtc_params: TransportParams = cast(
|
||||
TransportParams, _get_transport_params("webrtc", transport_params)
|
||||
)
|
||||
params = VonageVideoConnectorTransportParams(
|
||||
**webrtc_params.model_dump(),
|
||||
video_in_auto_subscribe=True,
|
||||
)
|
||||
|
||||
return VonageVideoConnectorTransport(
|
||||
runner_args.application_id,
|
||||
runner_args.vonage_session_id,
|
||||
runner_args.token,
|
||||
params=params,
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unsupported runner arguments type: {type(runner_args)}")
|
||||
|
||||
52
src/pipecat/runner/vonage.py
Normal file
52
src/pipecat/runner/vonage.py
Normal file
@@ -0,0 +1,52 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Vonage session configuration utilities.
|
||||
|
||||
This module extracts the necessary parameters to connect to a Vonage Video session.
|
||||
|
||||
Required environment variables:
|
||||
|
||||
- VONAGE_APPLICATION_ID - Vonage application ID
|
||||
- VONAGE_SESSION_ID - Vonage session ID
|
||||
- VONAGE_TOKEN - Vonage token
|
||||
|
||||
Example:
|
||||
from pipecat.runner.vonage import configure
|
||||
|
||||
application_id, session_id, token = await configure()
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
|
||||
async def configure() -> tuple[str, str, str]:
|
||||
"""Configure Vonage application ID, session ID and token from environment.
|
||||
|
||||
Returns:
|
||||
Tuple containing the server application_id, session_id and token.
|
||||
|
||||
Raises:
|
||||
Exception: If required Vonage configuration is not provided.
|
||||
"""
|
||||
application_id = os.getenv("VONAGE_APPLICATION_ID")
|
||||
session_id = os.getenv("VONAGE_SESSION_ID")
|
||||
token = os.getenv("VONAGE_TOKEN")
|
||||
|
||||
if not application_id:
|
||||
raise Exception(
|
||||
"No Vonage application ID specified. Use set VONAGE_APPLICATION_ID in your environment."
|
||||
)
|
||||
|
||||
if not session_id:
|
||||
raise Exception(
|
||||
"No Vonage Session ID specified. Use set VONAGE_SESSION_ID in your environment."
|
||||
)
|
||||
|
||||
if not token:
|
||||
raise Exception("No Vonage token specified. Use set VONAGE_TOKEN in your environment.")
|
||||
|
||||
return (application_id, session_id, token)
|
||||
@@ -48,7 +48,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use Anthropic, you need to `pip install pipecat-ai[anthropic]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
class AnthropicThinkingConfig(BaseModel):
|
||||
|
||||
@@ -55,7 +55,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error('In order to use AssemblyAI, you need to `pip install "pipecat-ai[assemblyai]"`.')
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
def map_language_from_assemblyai(language_code: str) -> Language:
|
||||
|
||||
@@ -39,7 +39,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use Async, you need to `pip install pipecat-ai[asyncai]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
def language_to_async_language(language: Language) -> str:
|
||||
|
||||
@@ -49,7 +49,7 @@ except ModuleNotFoundError as e:
|
||||
logger.error(
|
||||
"In order to use AWS services, you need to `pip install pipecat-ai[aws]`. Also, remember to set `AWS_SECRET_ACCESS_KEY`, `AWS_ACCESS_KEY_ID`, and `AWS_REGION` environment variable."
|
||||
)
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -81,7 +81,7 @@ except ModuleNotFoundError as e:
|
||||
logger.error(
|
||||
"In order to use AWS services, you need to `pip install pipecat-ai[aws-nova-sonic]`."
|
||||
)
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
class AWSNovaSonicUnhandledFunctionException(Exception):
|
||||
|
||||
@@ -32,7 +32,7 @@ except ModuleNotFoundError as e:
|
||||
logger.error(
|
||||
"In order to use SageMaker BiDi client, you need to `pip install pipecat-ai[sagemaker]`."
|
||||
)
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
class SageMakerBidiClient:
|
||||
|
||||
@@ -48,7 +48,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use AWS services, you need to `pip install pipecat-ai[aws]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -34,7 +34,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use AWS services, you need to `pip install pipecat-ai[aws]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
def language_to_aws_language(language: Language) -> str:
|
||||
|
||||
@@ -17,7 +17,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use Azure Realtime, you need to `pip install pipecat-ai[openai]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -49,7 +49,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use Azure, you need to `pip install pipecat-ai[azure]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -42,7 +42,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use Azure, you need to `pip install pipecat-ai[azure]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
def sample_rate_to_output_format(sample_rate: int) -> SpeechSynthesisOutputFormat:
|
||||
|
||||
@@ -42,7 +42,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use Cartesia, you need to `pip install pipecat-ai[cartesia]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -39,7 +39,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use Cartesia, you need to `pip install pipecat-ai[cartesia]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
class GenerationConfig(BaseModel):
|
||||
|
||||
@@ -31,7 +31,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use Deepgram Flux, you need to `pip install pipecat-ai[deepgram]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
# Re-export for backward compatibility
|
||||
__all__ = [
|
||||
|
||||
@@ -48,7 +48,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use Deepgram, you need to `pip install pipecat-ai[deepgram]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
class LiveOptions:
|
||||
|
||||
@@ -39,7 +39,7 @@ except ModuleNotFoundError as e:
|
||||
logger.error(
|
||||
"In order to use DeepgramWebsocketTTSService, you need to `pip install pipecat-ai[deepgram]`."
|
||||
)
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -52,7 +52,7 @@ except ModuleNotFoundError as e:
|
||||
logger.error(
|
||||
"In order to use ElevenLabs Realtime STT, you need to `pip install pipecat-ai[elevenlabs]`."
|
||||
)
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
def language_to_elevenlabs_language(language: Language) -> str:
|
||||
@@ -358,7 +358,8 @@ class ElevenLabsSTTService(SegmentedSTTService):
|
||||
|
||||
# Add required model_id and language_code
|
||||
data.add_field("model_id", self._settings.model)
|
||||
data.add_field("language_code", self._settings.language)
|
||||
if self._settings.language:
|
||||
data.add_field("language_code", self._settings.language)
|
||||
if self._settings.tag_audio_events is not None:
|
||||
data.add_field("tag_audio_events", str(self._settings.tag_audio_events).lower())
|
||||
keyterms = self._settings.keyterms
|
||||
|
||||
@@ -56,7 +56,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use ElevenLabs, you need to `pip install pipecat-ai[elevenlabs]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
# Models that support language codes
|
||||
# The following models are excluded as they don't support language codes:
|
||||
|
||||
@@ -38,7 +38,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use Fish Audio, you need to `pip install pipecat-ai[fish]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
# FishAudio supports various output formats
|
||||
FishAudioOutputFormat = Literal["opus", "mp3", "pcm", "wav"]
|
||||
|
||||
@@ -53,7 +53,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use Gladia, you need to `pip install pipecat-ai[gladia]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
def language_to_gladia_language(language: Language) -> str:
|
||||
|
||||
@@ -105,7 +105,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use Google AI, you need to `pip install pipecat-ai[google]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
# Connection management constants
|
||||
|
||||
@@ -36,7 +36,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use Google Vertex AI, you need to `pip install pipecat-ai[google]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -35,7 +35,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use Google AI, you need to `pip install pipecat-ai[google]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -65,7 +65,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use Google AI, you need to `pip install pipecat-ai[google]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
class GoogleThinkingConfig(BaseModel):
|
||||
|
||||
@@ -57,7 +57,7 @@ except ModuleNotFoundError as e:
|
||||
logger.error(
|
||||
"In order to use Google AI, you need to `pip install pipecat-ai[google]`. Also, set `GOOGLE_APPLICATION_CREDENTIALS` environment variable."
|
||||
)
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
def language_to_google_stt_language(language: Language) -> str:
|
||||
|
||||
@@ -57,7 +57,7 @@ except ModuleNotFoundError as e:
|
||||
logger.error(
|
||||
"In order to use Google AI, you need to `pip install pipecat-ai[google]`. Also, set `GOOGLE_APPLICATION_CREDENTIALS` environment variable."
|
||||
)
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
def language_to_google_tts_language(language: Language) -> str:
|
||||
|
||||
@@ -35,7 +35,7 @@ except ModuleNotFoundError as e:
|
||||
logger.error(
|
||||
"In order to use Google AI, you need to `pip install pipecat-ai[google]`. Also, set `GOOGLE_APPLICATION_CREDENTIALS` environment variable."
|
||||
)
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -44,7 +44,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error('In order to use Gradium, you need to `pip install "pipecat-ai[gradium]"`.')
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
# Seconds to wait after a "flushed" message for trailing text tokens to arrive
|
||||
# before finalizing the transcription.
|
||||
@@ -150,7 +150,7 @@ class GradiumSTTService(WebsocketSTTService):
|
||||
self,
|
||||
*,
|
||||
api_key: str,
|
||||
api_endpoint_base_url: str = "wss://eu.api.gradium.ai/api/speech/asr",
|
||||
api_endpoint_base_url: str = "wss://api.gradium.ai/api/speech/asr",
|
||||
encoding: str = "pcm",
|
||||
sample_rate: int | None = None,
|
||||
params: InputParams | None = None,
|
||||
@@ -163,7 +163,7 @@ class GradiumSTTService(WebsocketSTTService):
|
||||
|
||||
Args:
|
||||
api_key: Gradium API key for authentication.
|
||||
api_endpoint_base_url: WebSocket endpoint URL. Defaults to Gradium's streaming endpoint.
|
||||
api_endpoint_base_url: WebSocket endpoint URL.
|
||||
encoding: Base audio encoding type. One of "pcm", "wav", or "opus".
|
||||
For PCM, the sample rate is appended automatically from the
|
||||
pipeline's audio_in_sample_rate (e.g., "pcm" becomes "pcm_16000").
|
||||
|
||||
@@ -33,7 +33,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use Gradium, you need to `pip install pipecat-ai[gradium]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
SAMPLE_RATE = 48000
|
||||
|
||||
@@ -68,7 +68,7 @@ class GradiumTTSService(WebsocketTTSService):
|
||||
*,
|
||||
api_key: str,
|
||||
voice_id: str | None = None,
|
||||
url: str = "wss://eu.api.gradium.ai/api/speech/tts",
|
||||
url: str = "wss://api.gradium.ai/api/speech/tts",
|
||||
model: str | None = None,
|
||||
json_config: str | None = None,
|
||||
params: InputParams | None = None,
|
||||
|
||||
@@ -30,7 +30,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use Groq, you need to `pip install pipecat-ai[groq]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
# Hint set for `output_format`. The values mirror the Literal that
|
||||
# `groq.resources.audio.speech.AsyncSpeech.create` accepts on its
|
||||
|
||||
@@ -46,7 +46,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use HeyGen, you need to `pip install pipecat-ai[heygen]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
HEY_GEN_SAMPLE_RATE = 24000
|
||||
|
||||
|
||||
@@ -38,7 +38,7 @@ try:
|
||||
except ModuleNotFoundError as e: # pragma: no cover - import-time guidance
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use Hume, you need to `pip install pipecat-ai[hume]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
HUME_SAMPLE_RATE = 48_000 # Hume TTS streams at 48 kHz
|
||||
|
||||
@@ -68,7 +68,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use Inworld Realtime, you need to `pip install pipecat-ai[inworld]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -43,7 +43,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use Inworld WebSocket TTS, you need to `pip install websockets`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
from pipecat.frames.frames import (
|
||||
AggregationType,
|
||||
|
||||
@@ -32,7 +32,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use Kokoro, you need to `pip install pipecat-ai[kokoro]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
KOKORO_CACHE_DIR = Path(os.path.expanduser("~/.cache/kokoro-onnx"))
|
||||
KOKORO_MODEL_URL = "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/kokoro-v1.0.onnx"
|
||||
|
||||
@@ -34,7 +34,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use LMNT, you need to `pip install pipecat-ai[lmnt]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
def language_to_lmnt_language(language: Language) -> str:
|
||||
|
||||
@@ -29,7 +29,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use an MCP client, you need to `pip install pipecat-ai[mcp]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
ServerParameters: TypeAlias = StdioServerParameters | SseServerParameters | StreamableHttpParameters
|
||||
|
||||
|
||||
@@ -28,7 +28,7 @@ except ModuleNotFoundError as e:
|
||||
logger.error(
|
||||
"In order to use Mem0, you need to `pip install mem0ai`. Also, set the environment variable MEM0_API_KEY."
|
||||
)
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
class Mem0MemoryService(FrameProcessor):
|
||||
|
||||
@@ -48,7 +48,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use Mistral STT, you need to `pip install pipecat-ai[mistral]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -31,7 +31,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use Mistral TTS, you need to `pip install pipecat-ai[mistral]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -34,7 +34,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use Moondream, you need to `pip install pipecat-ai[moondream]`.")
|
||||
raise Exception(f"Missing module(s): {e}")
|
||||
raise ImportError(f"Missing module(s): {e}") from e
|
||||
|
||||
|
||||
def detect_device():
|
||||
|
||||
@@ -41,7 +41,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use Neuphonic, you need to `pip install pipecat-ai[neuphonic]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
def language_to_neuphonic_lang_code(language: Language) -> str:
|
||||
|
||||
@@ -46,7 +46,7 @@ except ModuleNotFoundError as e:
|
||||
logger.error(
|
||||
"In order to use NVIDIA Nemotron Speech STT, you need to `pip install pipecat-ai[nvidia]`."
|
||||
)
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
def language_to_nvidia_nemotron_speech_language(language: Language) -> str:
|
||||
|
||||
@@ -55,7 +55,7 @@ except ModuleNotFoundError as e:
|
||||
logger.error(
|
||||
"In order to use NVIDIA Nemotron Speech TTS, you need to `pip install pipecat-ai[nvidia]`."
|
||||
)
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -71,7 +71,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use OpenAI, you need to `pip install pipecat-ai[openai]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -59,7 +59,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use OpenAI, you need to `pip install pipecat-ai[openai]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -30,7 +30,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use Piper, you need to `pip install pipecat-ai[piper]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -33,7 +33,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use Resemble AI, you need to `pip install pipecat-ai[resembleai]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -47,7 +47,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use Rime, you need to `pip install pipecat-ai[rime]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
def language_to_rime_language(language: Language) -> str:
|
||||
|
||||
@@ -53,7 +53,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use Sarvam, you need to `pip install pipecat-ai[sarvam]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
def language_to_sarvam_language(language: Language) -> str:
|
||||
|
||||
@@ -70,7 +70,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use Sarvam, you need to `pip install pipecat-ai[sarvam]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
class SarvamTTSModel(StrEnum):
|
||||
|
||||
@@ -35,7 +35,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use Simli, you need to `pip install pipecat-ai[simli]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -48,7 +48,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use Smallest, you need to `pip install pipecat-ai[smallest]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
def language_to_smallest_stt_language(language: Language) -> str:
|
||||
|
||||
@@ -41,7 +41,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use Smallest, you need to `pip install pipecat-ai[smallest]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
class SmallestTTSModel(StrEnum):
|
||||
|
||||
@@ -39,7 +39,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use Soniox, you need to `pip install pipecat-ai[soniox]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
KEEPALIVE_MESSAGE = '{"type": "keepalive"}'
|
||||
|
||||
@@ -44,7 +44,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use Soniox, you need to `pip install pipecat-ai[soniox]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
# Soniox idle timeout is 20-30s; keepalive cadence must stay well inside it.
|
||||
|
||||
@@ -61,7 +61,7 @@ except ModuleNotFoundError as e:
|
||||
logger.error(
|
||||
"In order to use Speechmatics, you need to `pip install pipecat-ai[speechmatics]`."
|
||||
)
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
load_dotenv()
|
||||
|
||||
@@ -32,7 +32,7 @@ except ModuleNotFoundError as e:
|
||||
logger.error(
|
||||
"In order to use Speechmatics, you need to `pip install pipecat-ai[speechmatics]`."
|
||||
)
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -57,7 +57,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use Ultravox, you need to `pip install pipecat-ai[ultravox]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
# Result shipped as the client_tool_result when we see an async-tool
|
||||
|
||||
@@ -33,14 +33,14 @@ if TYPE_CHECKING:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use Whisper, you need to `pip install pipecat-ai[whisper]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
try:
|
||||
import mlx_whisper # noqa: F401
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use Whisper, you need to `pip install pipecat-ai[mlx-whisper]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
class Model(Enum):
|
||||
|
||||
@@ -67,7 +67,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use Grok Realtime, you need to `pip install pipecat-ai[grok]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -41,7 +41,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error('In order to use xAI STT, you need to `pip install "pipecat-ai[xai]"`.')
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
def language_to_xai_stt_language(language: Language) -> str:
|
||||
|
||||
@@ -46,7 +46,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use XAITTSService, you need to `pip install pipecat-ai[xai]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
def language_to_xai_language(language: Language) -> str:
|
||||
|
||||
@@ -75,7 +75,7 @@ except ModuleNotFoundError as e:
|
||||
logger.error(
|
||||
"In order to use the Daily transport, you need to `pip install pipecat-ai[daily]`."
|
||||
)
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
VAD_RESET_PERIOD_MS = 2000
|
||||
|
||||
|
||||
@@ -52,7 +52,7 @@ try:
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use LiveKit, you need to `pip install pipecat-ai[livekit]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
# DTMF mapping according to RFC 4733
|
||||
DTMF_CODE_MAP = {
|
||||
|
||||
@@ -28,7 +28,7 @@ except ModuleNotFoundError as e:
|
||||
logger.error(
|
||||
"In order to use local audio, you need to `pip install pipecat-ai[local]`. On MacOS, you also need to `brew install portaudio`."
|
||||
)
|
||||
raise Exception(f"Missing module: {e}")
|
||||
raise ImportError(f"Missing module: {e}") from e
|
||||
|
||||
|
||||
class LocalAudioTransportParams(TransportParams):
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user