Compare commits
34 Commits
hush/prere
...
v0.0.87
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
63bc825008 | ||
|
|
e7ffde1c4c | ||
|
|
1c88565725 | ||
|
|
07a6c2fb0e | ||
|
|
e99f3bf75a | ||
|
|
f09d780413 | ||
|
|
e370d23374 | ||
|
|
b68ec14146 | ||
|
|
c567fd71b1 | ||
|
|
2ca1b2d6f8 | ||
|
|
04041a9a9a | ||
|
|
6c498dc70f | ||
|
|
32b07c1720 | ||
|
|
ad507ce23d | ||
|
|
be562cedfc | ||
|
|
089e703e1f | ||
|
|
4dc1e15a99 | ||
|
|
c7dc2e886f | ||
|
|
11bc4ea854 | ||
|
|
029d76033d | ||
|
|
924d7dea9a | ||
|
|
244e94f3ce | ||
|
|
af1f51d49e | ||
|
|
9ba3c168b8 | ||
|
|
e6ee8f7a16 | ||
|
|
2ea2bd99e0 | ||
|
|
0c2ced7c52 | ||
|
|
fb160646b8 | ||
|
|
89fed57af2 | ||
|
|
032032df65 | ||
|
|
a5595b82ea | ||
|
|
4d1915eb41 | ||
|
|
b3a84fc772 | ||
|
|
403d22e62c |
22
.github/workflows/publish.yaml
vendored
22
.github/workflows/publish.yaml
vendored
@@ -5,25 +5,25 @@ on:
|
||||
inputs:
|
||||
gitref:
|
||||
type: string
|
||||
description: "what git tag to build (e.g. v0.0.74)"
|
||||
description: 'what git tag to build (e.g. v0.0.74)'
|
||||
required: true
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: "Build and upload wheels"
|
||||
name: 'Build and upload wheels'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ github.event.inputs.gitref }}
|
||||
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v3
|
||||
with:
|
||||
version: "latest"
|
||||
version: 'latest'
|
||||
- name: Set up Python
|
||||
run: uv python install 3.10
|
||||
run: uv python install 3.12
|
||||
- name: Install development dependencies
|
||||
run: uv sync --group dev
|
||||
- name: Build project
|
||||
@@ -35,9 +35,9 @@ jobs:
|
||||
path: ./dist
|
||||
|
||||
publish-to-pypi:
|
||||
name: "Publish to PyPI"
|
||||
name: 'Publish to PyPI'
|
||||
runs-on: ubuntu-latest
|
||||
needs: [ build ]
|
||||
needs: [build]
|
||||
environment:
|
||||
name: pypi
|
||||
url: https://pypi.org/p/pipecat-ai
|
||||
@@ -56,12 +56,12 @@ jobs:
|
||||
print-hash: true
|
||||
|
||||
publish-to-test-pypi:
|
||||
name: "Publish to Test PyPI"
|
||||
name: 'Publish to Test PyPI'
|
||||
runs-on: ubuntu-latest
|
||||
needs: [ build ]
|
||||
needs: [build]
|
||||
environment:
|
||||
name: testpypi
|
||||
url: https://pypi.org/p/pipecat-ai
|
||||
url: https://test.pypi.org/p/pipecat-ai
|
||||
permissions:
|
||||
id-token: write
|
||||
steps:
|
||||
@@ -70,7 +70,7 @@ jobs:
|
||||
with:
|
||||
name: wheels
|
||||
path: ./dist
|
||||
- name: Publish to PyPI
|
||||
- name: Publish to Test PyPI
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
verbose: true
|
||||
|
||||
12
.github/workflows/publish_test.yaml
vendored
12
.github/workflows/publish_test.yaml
vendored
@@ -4,7 +4,7 @@ on: workflow_dispatch
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: "Build and upload wheels"
|
||||
name: 'Build and upload wheels'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repo
|
||||
@@ -15,9 +15,9 @@ jobs:
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v3
|
||||
with:
|
||||
version: "latest"
|
||||
version: 'latest'
|
||||
- name: Set up Python
|
||||
run: uv python install 3.10
|
||||
run: uv python install 3.12
|
||||
- name: Install development dependencies
|
||||
run: uv sync --group dev
|
||||
- name: Build project
|
||||
@@ -29,12 +29,12 @@ jobs:
|
||||
path: ./dist
|
||||
|
||||
publish-to-test-pypi:
|
||||
name: "Publish to Test PyPI"
|
||||
name: 'Publish to Test PyPI'
|
||||
runs-on: ubuntu-latest
|
||||
needs: [build]
|
||||
environment:
|
||||
name: testpypi
|
||||
url: https://pypi.org/p/pipecat-ai
|
||||
url: https://test.pypi.org/p/pipecat-ai
|
||||
permissions:
|
||||
id-token: write
|
||||
steps:
|
||||
@@ -43,7 +43,7 @@ jobs:
|
||||
with:
|
||||
name: wheels
|
||||
path: ./dist
|
||||
- name: Publish to PyPI
|
||||
- name: Publish to Test PyPI
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
verbose: true
|
||||
|
||||
28
CHANGELOG.md
28
CHANGELOG.md
@@ -5,10 +5,18 @@ All notable changes to **Pipecat** will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [Unreleased]
|
||||
## [0.0.87] - 2025-10-02
|
||||
|
||||
### Added
|
||||
|
||||
- Added `WebsocketSTTService` base class for websocket-based STT services.
|
||||
Combines STT functionality with websocket connectivity, providing automatic
|
||||
error handling and reconnection capabilities with exponential backoff.
|
||||
|
||||
- Added `DeepgramFluxSTTService` for real-time speech recognition using
|
||||
Deepgram's Flux WebSocket API. Flux understands conversational flow and
|
||||
automatically handles turn-taking.
|
||||
|
||||
- Added RTVI messages for user/bot audio levels and system logs.
|
||||
|
||||
- Include OpenAI-based LLM services cached tokens to `MetricsFrame`.
|
||||
@@ -20,6 +28,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
### Deprecated
|
||||
|
||||
- `DailyTransportMessageFrame` and `DailyTransportMessageUrgentFrame` are
|
||||
deprecated, use `DailyOutputTransportMessageFrame` and
|
||||
`DailyOutputTransportMessageUrgentFrame` respectively instead.
|
||||
|
||||
- `LiveKitTransportMessageFrame` and `LiveKitTransportMessageUrgentFrame` are
|
||||
deprecated, use `LiveKitOutputTransportMessageFrame` and
|
||||
`LiveKitOutputTransportMessageUrgentFrame` respectively instead.
|
||||
|
||||
- `TransportMessageFrame` and `TransportMessageUrgentFrame` are deprecated, use
|
||||
`OutputTransportMessageFrame` and `OutputTransportMessageUrgentFrame`
|
||||
respectively instead.
|
||||
|
||||
- `InputTransportMessageUrgentFrame` is deprecated, use
|
||||
`InputTransportMessageFrame` instead.
|
||||
|
||||
- `DailyUpdateRemoteParticipantsFrame` is deprecated and will be removed in a
|
||||
future version. Instead, create your own custom frame and handle it in the
|
||||
`@transport.output().event_handler("on_after_push_frame")` event handler or a
|
||||
@@ -27,6 +50,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
## Fixed
|
||||
|
||||
- Fixed an issue in `AWSBedrockLLMService` where timeout exceptions weren't
|
||||
being detected.
|
||||
|
||||
- Fixed a `PipelineTask` issue that could prevent the application to exit if
|
||||
`task.cancel()` was called when the task was already finished.
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@ from pipecat.processors.aggregators.llm_response_universal import LLMContextAggr
|
||||
from pipecat.runner.daily import configure
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.daily.transport import DailyLogLevel, DailyParams, DailyTransport
|
||||
from pipecat.transports.daily.transport import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -49,7 +49,6 @@ async def main():
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
||||
),
|
||||
)
|
||||
transport.set_log_level(DailyLogLevel.Info)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
|
||||
118
examples/foundational/07c-interruptible-deepgram-flux.py
Normal file
118
examples/foundational/07c-interruptible-deepgram-flux.py
Normal file
@@ -0,0 +1,118 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContext,
|
||||
LLMContextAggregatorPair,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.deepgram.flux.stt import DeepgramFluxSTTService
|
||||
from pipecat.services.deepgram.tts import DeepgramTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramFluxSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = DeepgramTTSService(api_key=os.getenv("DEEPGRAM_API_KEY"), voice="aura-2-andromeda-en")
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
@@ -26,7 +26,11 @@ from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.deepgram.tts import DeepgramTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams, DailyTransportMessageFrame
|
||||
from pipecat.transports.daily.transport import (
|
||||
DailyOutputTransportMessageFrame,
|
||||
DailyOutputTransportMessageUrgentFrame,
|
||||
DailyParams,
|
||||
)
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
@@ -128,14 +132,14 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.debug(f"Received latency ping app message: {message}")
|
||||
ts = message["latency-ping"]["ts"]
|
||||
# Send immediately
|
||||
transport.output().send_message(
|
||||
DailyTransportMessageFrame(
|
||||
await task.queue_frame(
|
||||
DailyOutputTransportMessageUrgentFrame(
|
||||
message={"latency-pong-msg-handler": {"ts": ts}}, participant_id=sender
|
||||
)
|
||||
)
|
||||
# And push to the pipeline for the Daily transport.output to send
|
||||
await task.queue_frame(
|
||||
DailyTransportMessageFrame(
|
||||
DailyOutputTransportMessageFrame(
|
||||
message={"latency-pong-pipeline-delivery": {"ts": ts}},
|
||||
participant_id=sender,
|
||||
)
|
||||
|
||||
@@ -67,6 +67,7 @@ TESTS_07 = [
|
||||
("07ac-interruptible-asyncai-http.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
|
||||
("07b-interruptible-langchain.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
|
||||
("07c-interruptible-deepgram.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
|
||||
("07c-interruptible-deepgram-flux.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
|
||||
("07d-interruptible-elevenlabs.py", PROMPT_SIMPLE_MATH, EVAL_SIMPLE_MATH, BOT_SPEAKS_FIRST),
|
||||
(
|
||||
"07d-interruptible-elevenlabs-http.py",
|
||||
|
||||
@@ -672,7 +672,7 @@ class TTSSpeakFrame(DataFrame):
|
||||
|
||||
|
||||
@dataclass
|
||||
class TransportMessageFrame(DataFrame):
|
||||
class OutputTransportMessageFrame(DataFrame):
|
||||
"""Frame containing transport-specific message data.
|
||||
|
||||
Parameters:
|
||||
@@ -685,6 +685,32 @@ class TransportMessageFrame(DataFrame):
|
||||
return f"{self.name}(message: {self.message})"
|
||||
|
||||
|
||||
@dataclass
|
||||
class TransportMessageFrame(OutputTransportMessageFrame):
|
||||
"""Frame containing transport-specific message data.
|
||||
|
||||
.. deprecated:: 0.0.87
|
||||
This frame is deprecated and will be removed in a future version.
|
||||
Instead, use `OutputTransportMessageFrame`.
|
||||
|
||||
Parameters:
|
||||
message: The transport message payload.
|
||||
"""
|
||||
|
||||
def __post_init__(self):
|
||||
super().__post_init__()
|
||||
import warnings
|
||||
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("always")
|
||||
warnings.warn(
|
||||
"TransportMessageFrame is deprecated and will be removed in a future version. "
|
||||
"Instead, use OutputTransportMessageFrame.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class DTMFFrame:
|
||||
"""Base class for DTMF (Dual-Tone Multi-Frequency) keypad frames.
|
||||
@@ -1092,8 +1118,8 @@ class STTMuteFrame(SystemFrame):
|
||||
|
||||
|
||||
@dataclass
|
||||
class TransportMessageUrgentFrame(SystemFrame):
|
||||
"""Frame for urgent transport messages that need immediate processing.
|
||||
class InputTransportMessageFrame(SystemFrame):
|
||||
"""Frame for transport messages received from external sources.
|
||||
|
||||
Parameters:
|
||||
message: The urgent transport message payload.
|
||||
@@ -1106,20 +1132,69 @@ class TransportMessageUrgentFrame(SystemFrame):
|
||||
|
||||
|
||||
@dataclass
|
||||
class InputTransportMessageUrgentFrame(TransportMessageUrgentFrame):
|
||||
class InputTransportMessageUrgentFrame(InputTransportMessageFrame):
|
||||
"""Frame for transport messages received from external sources.
|
||||
|
||||
This frame wraps incoming transport messages to distinguish them from outgoing
|
||||
urgent transport messages (TransportMessageUrgentFrame), preventing infinite
|
||||
message loops in the transport layer. It inherits the message payload from
|
||||
TransportMessageFrame while marking the message as having been received
|
||||
rather than generated locally.
|
||||
.. deprecated:: 0.0.87
|
||||
This frame is deprecated and will be removed in a future version.
|
||||
Instead, use `InputTransportMessageFrame`.
|
||||
|
||||
Used by transport implementations to properly handle bidirectional message
|
||||
flow without creating feedback loops.
|
||||
Parameters:
|
||||
message: The urgent transport message payload.
|
||||
"""
|
||||
|
||||
pass
|
||||
def __post_init__(self):
|
||||
super().__post_init__()
|
||||
import warnings
|
||||
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("always")
|
||||
warnings.warn(
|
||||
"InputTransportMessageUrgentFrame is deprecated and will be removed in a future version. "
|
||||
"Instead, use InputTransportMessageFrame.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class OutputTransportMessageUrgentFrame(SystemFrame):
|
||||
"""Frame for urgent transport messages that need to be sent immediately.
|
||||
|
||||
Parameters:
|
||||
message: The urgent transport message payload.
|
||||
"""
|
||||
|
||||
message: Any
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.name}(message: {self.message})"
|
||||
|
||||
|
||||
@dataclass
|
||||
class TransportMessageUrgentFrame(OutputTransportMessageUrgentFrame):
|
||||
"""Frame for urgent transport messages that need to be sent immediately.
|
||||
|
||||
.. deprecated:: 0.0.87
|
||||
This frame is deprecated and will be removed in a future version.
|
||||
Instead, use `OutputTransportMessageUrgentFrame`.
|
||||
|
||||
Parameters:
|
||||
message: The urgent transport message payload.
|
||||
"""
|
||||
|
||||
def __post_init__(self):
|
||||
super().__post_init__()
|
||||
import warnings
|
||||
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("always")
|
||||
warnings.warn(
|
||||
"TransportMessageUrgentFrame is deprecated and will be removed in a future version. "
|
||||
"Instead, use OutputTransportMessageFrame.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -130,9 +130,11 @@ class PipelineTask(BasePipelineTask):
|
||||
|
||||
- on_pipeline_finished: Called after the pipeline has reached any terminal state.
|
||||
This includes:
|
||||
|
||||
- StopFrame: pipeline was stopped (processors keep connections open)
|
||||
- EndFrame: pipeline ended normally
|
||||
- CancelFrame: pipeline was cancelled
|
||||
|
||||
Use this event for cleanup, logging, or post-processing tasks. Users can inspect
|
||||
the frame if they need to handle specific cases.
|
||||
|
||||
|
||||
@@ -42,6 +42,7 @@ from pipecat.frames.frames import (
|
||||
Frame,
|
||||
FunctionCallResultFrame,
|
||||
InputAudioRawFrame,
|
||||
InputTransportMessageUrgentFrame,
|
||||
InterimTranscriptionFrame,
|
||||
LLMConfigureOutputFrame,
|
||||
LLMContextFrame,
|
||||
@@ -50,10 +51,10 @@ from pipecat.frames.frames import (
|
||||
LLMMessagesAppendFrame,
|
||||
LLMTextFrame,
|
||||
MetricsFrame,
|
||||
OutputTransportMessageUrgentFrame,
|
||||
StartFrame,
|
||||
SystemFrame,
|
||||
TranscriptionFrame,
|
||||
TransportMessageUrgentFrame,
|
||||
TTSAudioRawFrame,
|
||||
TTSStartedFrame,
|
||||
TTSStoppedFrame,
|
||||
@@ -919,7 +920,7 @@ class RTVIObserverParams:
|
||||
user_audio_level_enabled: bool = False
|
||||
metrics_enabled: bool = True
|
||||
system_logs_enabled: bool = False
|
||||
errors_enabled: bool = True
|
||||
errors_enabled: Optional[bool] = None
|
||||
audio_level_period_secs: float = 0.15
|
||||
|
||||
|
||||
@@ -962,7 +963,7 @@ class RTVIObserver(BaseObserver):
|
||||
if self._params.system_logs_enabled:
|
||||
self._system_logger_id = logger.add(self._logger_sink)
|
||||
|
||||
if self._params.errors_enabled:
|
||||
if self._params.errors_enabled is not None:
|
||||
import warnings
|
||||
|
||||
with warnings.catch_warnings():
|
||||
@@ -1209,11 +1210,10 @@ class RTVIObserver(BaseObserver):
|
||||
|
||||
async def _send_error_response(self, frame: RTVIServerResponseFrame):
|
||||
"""Send a response to the client for a specific request."""
|
||||
if self._params.errors_enabled:
|
||||
message = RTVIErrorResponse(
|
||||
id=str(frame.client_msg.msg_id), data=RTVIErrorResponseData(error=frame.error)
|
||||
)
|
||||
await self.send_rtvi_message(message)
|
||||
message = RTVIErrorResponse(
|
||||
id=str(frame.client_msg.msg_id), data=RTVIErrorResponseData(error=frame.error)
|
||||
)
|
||||
await self.send_rtvi_message(message)
|
||||
|
||||
|
||||
class RTVIProcessor(FrameProcessor):
|
||||
@@ -1346,7 +1346,9 @@ class RTVIProcessor(FrameProcessor):
|
||||
|
||||
async def push_transport_message(self, model: BaseModel, exclude_none: bool = True):
|
||||
"""Push a transport message frame."""
|
||||
frame = TransportMessageUrgentFrame(message=model.model_dump(exclude_none=exclude_none))
|
||||
frame = OutputTransportMessageUrgentFrame(
|
||||
message=model.model_dump(exclude_none=exclude_none)
|
||||
)
|
||||
await self.push_frame(frame)
|
||||
|
||||
async def handle_message(self, message: RTVIMessage):
|
||||
@@ -1419,7 +1421,7 @@ class RTVIProcessor(FrameProcessor):
|
||||
elif isinstance(frame, ErrorFrame):
|
||||
await self._send_error_frame(frame)
|
||||
await self.push_frame(frame, direction)
|
||||
elif isinstance(frame, TransportMessageUrgentFrame):
|
||||
elif isinstance(frame, InputTransportMessageUrgentFrame):
|
||||
await self._handle_transport_message(frame)
|
||||
# All other system frames
|
||||
elif isinstance(frame, SystemFrame):
|
||||
@@ -1482,7 +1484,7 @@ class RTVIProcessor(FrameProcessor):
|
||||
await self._handle_message(message)
|
||||
self._message_queue.task_done()
|
||||
|
||||
async def _handle_transport_message(self, frame: TransportMessageUrgentFrame):
|
||||
async def _handle_transport_message(self, frame: InputTransportMessageUrgentFrame):
|
||||
"""Handle an incoming transport message frame."""
|
||||
try:
|
||||
transport_message = frame.message
|
||||
|
||||
@@ -15,7 +15,7 @@ from pipecat.frames.frames import (
|
||||
Frame,
|
||||
InputAudioRawFrame,
|
||||
OutputAudioRawFrame,
|
||||
TransportMessageFrame,
|
||||
UserSpeakingFrame,
|
||||
)
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
|
||||
@@ -36,9 +36,9 @@ class FrameLogger(FrameProcessor):
|
||||
color: Optional[str] = None,
|
||||
ignored_frame_types: Tuple[Type[Frame], ...] = (
|
||||
BotSpeakingFrame,
|
||||
UserSpeakingFrame,
|
||||
InputAudioRawFrame,
|
||||
OutputAudioRawFrame,
|
||||
TransportMessageFrame,
|
||||
),
|
||||
):
|
||||
"""Initialize the frame logger.
|
||||
|
||||
@@ -70,7 +70,9 @@ import asyncio
|
||||
import os
|
||||
import sys
|
||||
from contextlib import asynccontextmanager
|
||||
from typing import Optional
|
||||
|
||||
import aiohttp
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.runner.types import (
|
||||
@@ -82,7 +84,7 @@ from pipecat.runner.types import (
|
||||
try:
|
||||
import uvicorn
|
||||
from dotenv import load_dotenv
|
||||
from fastapi import BackgroundTasks, FastAPI, Request, WebSocket
|
||||
from fastapi import BackgroundTasks, FastAPI, HTTPException, Request, WebSocket
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import HTMLResponse, RedirectResponse
|
||||
except ImportError as e:
|
||||
@@ -166,6 +168,7 @@ def _create_server_app(
|
||||
# Set up transport-specific routes
|
||||
if transport_type == "webrtc":
|
||||
_setup_webrtc_routes(app, esp32_mode=esp32_mode, host=host)
|
||||
_setup_whatsapp_routes(app)
|
||||
elif transport_type == "daily":
|
||||
_setup_daily_routes(app)
|
||||
elif transport_type in ["twilio", "telnyx", "plivo", "exotel"]:
|
||||
@@ -221,12 +224,174 @@ def _setup_webrtc_routes(app: FastAPI, esp32_mode: bool = False, host: str = "lo
|
||||
return answer
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
async def smallwebrtc_lifespan(app: FastAPI):
|
||||
"""Manage FastAPI application lifecycle and cleanup connections."""
|
||||
yield
|
||||
await small_webrtc_handler.close()
|
||||
|
||||
app.router.lifespan_context = lifespan
|
||||
# Add the SmallWebRTC lifespan to the app
|
||||
_add_lifespan_to_app(app, smallwebrtc_lifespan)
|
||||
|
||||
|
||||
def _add_lifespan_to_app(app: FastAPI, new_lifespan):
|
||||
"""Add a new lifespan context manager to the app, combining with existing if present.
|
||||
|
||||
Args:
|
||||
app: The FastAPI application instance
|
||||
new_lifespan: The new lifespan context manager to add
|
||||
"""
|
||||
if hasattr(app.router, "lifespan_context") and app.router.lifespan_context is not None:
|
||||
# If there's already a lifespan context, combine them
|
||||
existing_lifespan = app.router.lifespan_context
|
||||
|
||||
@asynccontextmanager
|
||||
async def combined_lifespan(app: FastAPI):
|
||||
async with existing_lifespan(app):
|
||||
async with new_lifespan(app):
|
||||
yield
|
||||
|
||||
app.router.lifespan_context = combined_lifespan
|
||||
else:
|
||||
# No existing lifespan, use the new one
|
||||
app.router.lifespan_context = new_lifespan
|
||||
|
||||
|
||||
def _setup_whatsapp_routes(app: FastAPI):
|
||||
"""Set up WebRTC-specific routes."""
|
||||
try:
|
||||
from pipecat_ai_small_webrtc_prebuilt.frontend import SmallWebRTCPrebuiltUI
|
||||
|
||||
from pipecat.transports.smallwebrtc.connection import SmallWebRTCConnection
|
||||
from pipecat.transports.smallwebrtc.request_handler import (
|
||||
SmallWebRTCRequest,
|
||||
SmallWebRTCRequestHandler,
|
||||
)
|
||||
from pipecat.transports.whatsapp.api import WhatsAppWebhookRequest
|
||||
from pipecat.transports.whatsapp.client import WhatsAppClient
|
||||
except ImportError as e:
|
||||
logger.error(f"WebRTC transport dependencies not installed: {e}")
|
||||
return
|
||||
|
||||
WHATSAPP_TOKEN = os.getenv("WHATSAPP_TOKEN")
|
||||
WHATSAPP_PHONE_NUMBER_ID = os.getenv("WHATSAPP_PHONE_NUMBER_ID")
|
||||
WHATSAPP_WEBHOOK_VERIFICATION_TOKEN = os.getenv("WHATSAPP_WEBHOOK_VERIFICATION_TOKEN")
|
||||
|
||||
if not all(
|
||||
[
|
||||
WHATSAPP_TOKEN,
|
||||
WHATSAPP_PHONE_NUMBER_ID,
|
||||
WHATSAPP_WEBHOOK_VERIFICATION_TOKEN,
|
||||
]
|
||||
):
|
||||
logger.debug(
|
||||
"Missing required environment variables for WhatsApp transport. Keeping it disabled."
|
||||
)
|
||||
return
|
||||
|
||||
# Global WhatsApp client instance
|
||||
whatsapp_client: Optional[WhatsAppClient] = None
|
||||
|
||||
@app.get(
|
||||
"/whatsapp",
|
||||
summary="Verify WhatsApp webhook",
|
||||
description="Handles WhatsApp webhook verification requests from Meta",
|
||||
)
|
||||
async def verify_webhook(request: Request):
|
||||
"""Verify WhatsApp webhook endpoint.
|
||||
|
||||
This endpoint is called by Meta's WhatsApp Business API to verify
|
||||
the webhook URL during setup. It validates the verification token
|
||||
and returns the challenge parameter if successful.
|
||||
"""
|
||||
if whatsapp_client is None:
|
||||
logger.error("WhatsApp client is not initialized")
|
||||
raise HTTPException(status_code=503, detail="Service unavailable")
|
||||
|
||||
params = dict(request.query_params)
|
||||
logger.debug(f"Webhook verification request received with params: {list(params.keys())}")
|
||||
|
||||
try:
|
||||
result = await whatsapp_client.handle_verify_webhook_request(
|
||||
params=params, expected_verification_token=WHATSAPP_WEBHOOK_VERIFICATION_TOKEN
|
||||
)
|
||||
logger.info("Webhook verification successful")
|
||||
return result
|
||||
except ValueError as e:
|
||||
logger.warning(f"Webhook verification failed: {e}")
|
||||
raise HTTPException(status_code=403, detail="Verification failed")
|
||||
|
||||
@app.post(
|
||||
"/whatsapp",
|
||||
summary="Handle WhatsApp webhook events",
|
||||
description="Processes incoming WhatsApp messages and call events",
|
||||
)
|
||||
async def whatsapp_webhook(body: WhatsAppWebhookRequest, background_tasks: BackgroundTasks):
|
||||
"""Handle incoming WhatsApp webhook events.
|
||||
|
||||
For call events, establishes WebRTC connections and spawns bot instances
|
||||
in the background to handle real-time communication.
|
||||
"""
|
||||
if whatsapp_client is None:
|
||||
logger.error("WhatsApp client is not initialized")
|
||||
raise HTTPException(status_code=503, detail="Service unavailable")
|
||||
|
||||
# Validate webhook object type
|
||||
if body.object != "whatsapp_business_account":
|
||||
logger.warning(f"Invalid webhook object type: {body.object}")
|
||||
raise HTTPException(status_code=400, detail="Invalid object type")
|
||||
|
||||
logger.debug(f"Processing WhatsApp webhook: {body.model_dump()}")
|
||||
|
||||
async def connection_callback(connection: SmallWebRTCConnection):
|
||||
"""Handle new WebRTC connections from WhatsApp calls.
|
||||
|
||||
Called when a WebRTC connection is established for a WhatsApp call.
|
||||
Spawns a bot instance to handle the conversation.
|
||||
|
||||
Args:
|
||||
connection: The established WebRTC connection
|
||||
"""
|
||||
bot_module = _get_bot_module()
|
||||
runner_args = SmallWebRTCRunnerArguments(webrtc_connection=connection)
|
||||
background_tasks.add_task(bot_module.bot, runner_args)
|
||||
|
||||
try:
|
||||
# Process the webhook request
|
||||
result = await whatsapp_client.handle_webhook_request(body, connection_callback)
|
||||
logger.debug(f"Webhook processed successfully: {result}")
|
||||
return {"status": "success", "message": "Webhook processed successfully"}
|
||||
except ValueError as ve:
|
||||
logger.warning(f"Invalid webhook request format: {ve}")
|
||||
raise HTTPException(status_code=400, detail=f"Invalid request: {str(ve)}")
|
||||
except Exception as e:
|
||||
logger.error(f"Internal error processing webhook: {e}")
|
||||
raise HTTPException(status_code=500, detail="Internal server error processing webhook")
|
||||
|
||||
@asynccontextmanager
|
||||
async def whatsapp_lifespan(app: FastAPI):
|
||||
"""Manage WhatsApp client lifecycle and cleanup connections."""
|
||||
nonlocal whatsapp_client
|
||||
|
||||
# Initialize WhatsApp client with persistent HTTP session
|
||||
async with aiohttp.ClientSession() as session:
|
||||
whatsapp_client = WhatsAppClient(
|
||||
whatsapp_token=WHATSAPP_TOKEN,
|
||||
phone_number_id=WHATSAPP_PHONE_NUMBER_ID,
|
||||
session=session,
|
||||
)
|
||||
logger.info("WhatsApp client initialized successfully")
|
||||
|
||||
try:
|
||||
yield # Run the application
|
||||
finally:
|
||||
# Cleanup all active calls on shutdown
|
||||
logger.info("Cleaning up WhatsApp client resources...")
|
||||
if whatsapp_client:
|
||||
await whatsapp_client.terminate_all_calls()
|
||||
logger.info("WhatsApp cleanup completed")
|
||||
|
||||
# Add the WhatsApp lifespan to the app
|
||||
_add_lifespan_to_app(app, whatsapp_lifespan)
|
||||
|
||||
|
||||
def _setup_daily_routes(app: FastAPI):
|
||||
|
||||
@@ -99,29 +99,41 @@ async def parse_telephony_websocket(websocket: WebSocket):
|
||||
tuple: (transport_type: str, call_data: dict)
|
||||
|
||||
call_data contains provider-specific fields:
|
||||
- Twilio: {
|
||||
"stream_id": str,
|
||||
"call_id": str,
|
||||
"body": dict
|
||||
}
|
||||
- Telnyx: {
|
||||
"stream_id": str,
|
||||
"call_control_id": str,
|
||||
"outbound_encoding": str,
|
||||
"from": str,
|
||||
"to": str,
|
||||
}
|
||||
- Plivo: {
|
||||
"stream_id": str,
|
||||
"call_id": str,
|
||||
}
|
||||
- Exotel: {
|
||||
"stream_id": str,
|
||||
"call_id": str,
|
||||
"account_sid": str,
|
||||
"from": str,
|
||||
"to": str,
|
||||
}
|
||||
|
||||
- Twilio::
|
||||
|
||||
{
|
||||
"stream_id": str,
|
||||
"call_id": str,
|
||||
"body": dict
|
||||
}
|
||||
|
||||
- Telnyx::
|
||||
|
||||
{
|
||||
"stream_id": str,
|
||||
"call_control_id": str,
|
||||
"outbound_encoding": str,
|
||||
"from": str,
|
||||
"to": str,
|
||||
}
|
||||
|
||||
- Plivo::
|
||||
|
||||
{
|
||||
"stream_id": str,
|
||||
"call_id": str,
|
||||
}
|
||||
|
||||
- Exotel::
|
||||
|
||||
{
|
||||
"stream_id": str,
|
||||
"call_id": str,
|
||||
"account_sid": str,
|
||||
"from": str,
|
||||
"to": str,
|
||||
}
|
||||
|
||||
Example usage::
|
||||
|
||||
@@ -301,6 +313,7 @@ def _smallwebrtc_sdp_cleanup_ice_candidates(text: str, pattern: str) -> str:
|
||||
Returns:
|
||||
Cleaned SDP text with filtered ICE candidates.
|
||||
"""
|
||||
logger.debug("Removing unsupported ICE candidates from SDP")
|
||||
result = []
|
||||
lines = text.splitlines()
|
||||
for line in lines:
|
||||
@@ -309,7 +322,7 @@ def _smallwebrtc_sdp_cleanup_ice_candidates(text: str, pattern: str) -> str:
|
||||
result.append(line)
|
||||
else:
|
||||
result.append(line)
|
||||
return "\r\n".join(result)
|
||||
return "\r\n".join(result) + "\r\n"
|
||||
|
||||
|
||||
def _smallwebrtc_sdp_cleanup_fingerprints(text: str) -> str:
|
||||
@@ -321,15 +334,16 @@ def _smallwebrtc_sdp_cleanup_fingerprints(text: str) -> str:
|
||||
Returns:
|
||||
SDP text with sha-384 and sha-512 fingerprints removed.
|
||||
"""
|
||||
logger.debug("Removing unsupported fingerprints from SDP")
|
||||
result = []
|
||||
lines = text.splitlines()
|
||||
for line in lines:
|
||||
if not re.search("sha-384", line) and not re.search("sha-512", line):
|
||||
result.append(line)
|
||||
return "\r\n".join(result)
|
||||
return "\r\n".join(result) + "\r\n"
|
||||
|
||||
|
||||
def smallwebrtc_sdp_munging(sdp: str, host: str) -> str:
|
||||
def smallwebrtc_sdp_munging(sdp: str, host: Optional[str]) -> str:
|
||||
"""Apply SDP modifications for SmallWebRTC compatibility.
|
||||
|
||||
Args:
|
||||
@@ -340,7 +354,8 @@ def smallwebrtc_sdp_munging(sdp: str, host: str) -> str:
|
||||
Modified SDP string with fingerprint and ICE candidate cleanup.
|
||||
"""
|
||||
sdp = _smallwebrtc_sdp_cleanup_fingerprints(sdp)
|
||||
sdp = _smallwebrtc_sdp_cleanup_ice_candidates(sdp, host)
|
||||
if host:
|
||||
sdp = _smallwebrtc_sdp_cleanup_ice_candidates(sdp, host)
|
||||
return sdp
|
||||
|
||||
|
||||
|
||||
@@ -21,9 +21,9 @@ from pipecat.frames.frames import (
|
||||
InputAudioRawFrame,
|
||||
InputDTMFFrame,
|
||||
InterruptionFrame,
|
||||
OutputTransportMessageFrame,
|
||||
OutputTransportMessageUrgentFrame,
|
||||
StartFrame,
|
||||
TransportMessageFrame,
|
||||
TransportMessageUrgentFrame,
|
||||
)
|
||||
from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializerType
|
||||
|
||||
@@ -121,7 +121,7 @@ class ExotelFrameSerializer(FrameSerializer):
|
||||
}
|
||||
|
||||
return json.dumps(answer)
|
||||
elif isinstance(frame, (TransportMessageFrame, TransportMessageUrgentFrame)):
|
||||
elif isinstance(frame, (OutputTransportMessageFrame, OutputTransportMessageUrgentFrame)):
|
||||
return json.dumps(frame.message)
|
||||
|
||||
return None
|
||||
|
||||
@@ -23,9 +23,9 @@ from pipecat.frames.frames import (
|
||||
InputAudioRawFrame,
|
||||
InputDTMFFrame,
|
||||
InterruptionFrame,
|
||||
OutputTransportMessageFrame,
|
||||
OutputTransportMessageUrgentFrame,
|
||||
StartFrame,
|
||||
TransportMessageFrame,
|
||||
TransportMessageUrgentFrame,
|
||||
)
|
||||
from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializerType
|
||||
|
||||
@@ -148,7 +148,7 @@ class PlivoFrameSerializer(FrameSerializer):
|
||||
}
|
||||
|
||||
return json.dumps(answer)
|
||||
elif isinstance(frame, (TransportMessageFrame, TransportMessageUrgentFrame)):
|
||||
elif isinstance(frame, (OutputTransportMessageFrame, OutputTransportMessageUrgentFrame)):
|
||||
return json.dumps(frame.message)
|
||||
|
||||
# Return None for unhandled frames
|
||||
|
||||
@@ -15,11 +15,12 @@ import pipecat.frames.protobufs.frames_pb2 as frame_protos
|
||||
from pipecat.frames.frames import (
|
||||
Frame,
|
||||
InputAudioRawFrame,
|
||||
InputTransportMessageFrame,
|
||||
OutputAudioRawFrame,
|
||||
OutputTransportMessageFrame,
|
||||
OutputTransportMessageUrgentFrame,
|
||||
TextFrame,
|
||||
TranscriptionFrame,
|
||||
TransportMessageFrame,
|
||||
TransportMessageUrgentFrame,
|
||||
)
|
||||
from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializerType
|
||||
|
||||
@@ -82,7 +83,7 @@ class ProtobufFrameSerializer(FrameSerializer):
|
||||
Serialized frame as bytes, or None if frame type is not serializable.
|
||||
"""
|
||||
# Wrapping this messages as a JSONFrame to send
|
||||
if isinstance(frame, (TransportMessageFrame, TransportMessageUrgentFrame)):
|
||||
if isinstance(frame, (OutputTransportMessageFrame, OutputTransportMessageUrgentFrame)):
|
||||
frame = MessageFrame(
|
||||
data=json.dumps(frame.message),
|
||||
)
|
||||
@@ -134,11 +135,11 @@ class ProtobufFrameSerializer(FrameSerializer):
|
||||
if "pts" in args_dict:
|
||||
del args_dict["pts"]
|
||||
|
||||
# Special handling for MessageFrame -> TransportMessageUrgentFrame
|
||||
# Special handling for MessageFrame -> OutputTransportMessageUrgentFrame
|
||||
if class_name == MessageFrame:
|
||||
try:
|
||||
msg = json.loads(args_dict["data"])
|
||||
instance = TransportMessageUrgentFrame(message=msg)
|
||||
instance = InputTransportMessageFrame(message=msg)
|
||||
logger.debug(f"ProtobufFrameSerializer: Transport message {instance}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing MessageFrame data: {e}")
|
||||
|
||||
@@ -23,9 +23,9 @@ from pipecat.frames.frames import (
|
||||
InputAudioRawFrame,
|
||||
InputDTMFFrame,
|
||||
InterruptionFrame,
|
||||
OutputTransportMessageFrame,
|
||||
OutputTransportMessageUrgentFrame,
|
||||
StartFrame,
|
||||
TransportMessageFrame,
|
||||
TransportMessageUrgentFrame,
|
||||
)
|
||||
from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializerType
|
||||
|
||||
@@ -175,7 +175,7 @@ class TwilioFrameSerializer(FrameSerializer):
|
||||
}
|
||||
|
||||
return json.dumps(answer)
|
||||
elif isinstance(frame, (TransportMessageFrame, TransportMessageUrgentFrame)):
|
||||
elif isinstance(frame, (OutputTransportMessageFrame, OutputTransportMessageUrgentFrame)):
|
||||
return json.dumps(frame.message)
|
||||
|
||||
# Return None for unhandled frames
|
||||
|
||||
@@ -61,7 +61,6 @@ from pipecat.utils.tracing.service_decorators import traced_llm
|
||||
|
||||
try:
|
||||
import aioboto3
|
||||
import httpx
|
||||
from botocore.config import Config
|
||||
from botocore.exceptions import ReadTimeoutError
|
||||
except ModuleNotFoundError as e:
|
||||
@@ -1117,7 +1116,7 @@ class AWSBedrockLLMService(LLMService):
|
||||
# also get cancelled.
|
||||
use_completion_tokens_estimate = True
|
||||
raise
|
||||
except httpx.TimeoutException:
|
||||
except (ReadTimeoutError, asyncio.TimeoutError):
|
||||
await self._call_event_handler("on_completion_timeout")
|
||||
except Exception as e:
|
||||
logger.exception(f"{self} exception: {e}")
|
||||
|
||||
636
src/pipecat/services/deepgram/flux/stt.py
Normal file
636
src/pipecat/services/deepgram/flux/stt.py
Normal file
@@ -0,0 +1,636 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Deepgram Flux speech-to-text service implementation."""
|
||||
|
||||
import json
|
||||
from enum import Enum
|
||||
from typing import Any, AsyncGenerator, Dict, Optional
|
||||
|
||||
from loguru import logger
|
||||
from pydantic import BaseModel
|
||||
|
||||
from pipecat.frames.frames import (
|
||||
CancelFrame,
|
||||
EndFrame,
|
||||
ErrorFrame,
|
||||
Frame,
|
||||
InterimTranscriptionFrame,
|
||||
StartFrame,
|
||||
TranscriptionFrame,
|
||||
UserStartedSpeakingFrame,
|
||||
UserStoppedSpeakingFrame,
|
||||
)
|
||||
from pipecat.processors.frame_processor import FrameDirection
|
||||
from pipecat.services.stt_service import WebsocketSTTService
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.utils.time import time_now_iso8601
|
||||
from pipecat.utils.tracing.service_decorators import traced_stt
|
||||
|
||||
try:
|
||||
from websockets.asyncio.client import connect as websocket_connect
|
||||
from websockets.protocol import State
|
||||
except ModuleNotFoundError as e:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use Deepgram Flux, you need to `pip install pipecat-ai[deepgram]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
|
||||
|
||||
class FluxMessageType(str, Enum):
|
||||
"""Deepgram Flux WebSocket message types.
|
||||
|
||||
These are the top-level message types that can be received from the
|
||||
Deepgram Flux WebSocket connection.
|
||||
"""
|
||||
|
||||
RECEIVE_CONNECTED = "Connected"
|
||||
RECEIVE_FATAL_ERROR = "Error"
|
||||
TURN_INFO = "TurnInfo"
|
||||
|
||||
|
||||
class FluxEventType(str, Enum):
|
||||
"""Deepgram Flux TurnInfo event types.
|
||||
|
||||
These events are contained within TurnInfo messages and indicate
|
||||
different stages of speech processing and turn detection.
|
||||
"""
|
||||
|
||||
START_OF_TURN = "StartOfTurn"
|
||||
TURN_RESUMED = "TurnResumed"
|
||||
END_OF_TURN = "EndOfTurn"
|
||||
EAGER_END_OF_TURN = "EagerEndOfTurn"
|
||||
UPDATE = "Update"
|
||||
|
||||
|
||||
class DeepgramFluxSTTService(WebsocketSTTService):
|
||||
"""Deepgram Flux speech-to-text service.
|
||||
|
||||
Provides real-time speech recognition using Deepgram's WebSocket API with Flux capabilities.
|
||||
Supports configurable models, VAD events, and various audio processing options
|
||||
including advanced turn detection and EagerEndOfTurn events for improved conversational AI performance.
|
||||
"""
|
||||
|
||||
class InputParams(BaseModel):
|
||||
"""Configuration parameters for Deepgram Flux API.
|
||||
|
||||
This class defines all available connection parameters for the Deepgram Flux API
|
||||
based on the official documentation.
|
||||
|
||||
Parameters:
|
||||
eager_eot_threshold: Optional. EagerEndOfTurn/TurnResumed are off by default.
|
||||
You can turn them on by setting eager_eot_threshold to a valid value.
|
||||
Lower values = more aggressive EagerEndOfTurning (faster response, more LLM calls).
|
||||
Higher values = more conservative EagerEndOfTurning (slower response, fewer LLM calls).
|
||||
eot_threshold: Optional. End-of-turn confidence required to finish a turn (default 0.7).
|
||||
Lower values = turns end sooner (more interruptions, faster responses).
|
||||
Higher values = turns end later (fewer interruptions, more complete utterances).
|
||||
eot_timeout_ms: Optional. Time in milliseconds after speech to finish a turn
|
||||
regardless of EOT confidence (default 5000).
|
||||
keyterm: List of keyterms to boost recognition accuracy for specialized terminology.
|
||||
mip_opt_out: Optional. Opts out requests from the Deepgram Model Improvement Program
|
||||
(default False).
|
||||
tag: List of tags to label requests for identification during usage reporting.
|
||||
"""
|
||||
|
||||
eager_eot_threshold: Optional[float] = None
|
||||
eot_threshold: Optional[float] = None
|
||||
eot_timeout_ms: Optional[int] = None
|
||||
keyterm: list = []
|
||||
mip_opt_out: Optional[bool] = None
|
||||
tag: list = []
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
api_key: str,
|
||||
url: str = "wss://api.deepgram.com/v2/listen",
|
||||
sample_rate: Optional[int] = None,
|
||||
model: str = "flux-general-en",
|
||||
flux_encoding: str = "linear16",
|
||||
params: Optional[InputParams] = None,
|
||||
**kwargs,
|
||||
):
|
||||
"""Initialize the Deepgram Flux STT service.
|
||||
|
||||
Args:
|
||||
api_key: Deepgram API key for authentication. Required for API access.
|
||||
url: WebSocket URL for the Deepgram Flux API. Defaults to the preview endpoint.
|
||||
sample_rate: Audio sample rate in Hz. If None, uses the rate from params or 16000.
|
||||
model: Deepgram Flux model to use for transcription. Currently only supports "flux-general-en".
|
||||
flux_encoding: Audio encoding format required by Flux API. Must be "linear16".
|
||||
Raw signed little-endian 16-bit PCM encoding.
|
||||
params: InputParams instance containing detailed API configuration options.
|
||||
If None, default parameters will be used.
|
||||
**kwargs: Additional arguments passed to the parent WebsocketSTTService class.
|
||||
|
||||
Examples:
|
||||
Basic usage with default parameters::
|
||||
|
||||
stt = DeepgramFluxSTTService(api_key="your-api-key")
|
||||
|
||||
Advanced usage with custom parameters::
|
||||
|
||||
params = DeepgramFluxSTTService.InputParams(
|
||||
eager_eot_threshold=0.5,
|
||||
eot_threshold=0.8,
|
||||
keyterm=["AI", "machine learning", "neural network"],
|
||||
tag=["production", "voice-agent"]
|
||||
)
|
||||
stt = DeepgramFluxSTTService(
|
||||
api_key="your-api-key",
|
||||
model="flux-general-en",
|
||||
params=params
|
||||
)
|
||||
"""
|
||||
super().__init__(sample_rate=sample_rate, **kwargs)
|
||||
|
||||
self._api_key = api_key
|
||||
self._url = url
|
||||
self._model = model
|
||||
self._params = params or DeepgramFluxSTTService.InputParams()
|
||||
self._flux_encoding = flux_encoding
|
||||
# This is the currently only supported language
|
||||
self._language = Language.EN
|
||||
self._websocket_url = None
|
||||
self._receive_task = None
|
||||
|
||||
async def _connect(self):
|
||||
"""Connect to WebSocket and start background tasks.
|
||||
|
||||
Establishes the WebSocket connection to the Deepgram Flux API and starts
|
||||
the background task for receiving transcription results.
|
||||
"""
|
||||
await self._connect_websocket()
|
||||
|
||||
if self._websocket and not self._receive_task:
|
||||
self._receive_task = self.create_task(self._receive_task_handler(self._report_error))
|
||||
|
||||
async def _disconnect(self):
|
||||
"""Disconnect from WebSocket and clean up tasks.
|
||||
|
||||
Gracefully disconnects from the Deepgram Flux API, cancels background tasks,
|
||||
and cleans up resources to prevent memory leaks.
|
||||
"""
|
||||
try:
|
||||
# Cancel background tasks BEFORE closing websocket
|
||||
if self._receive_task:
|
||||
await self.cancel_task(self._receive_task, timeout=2.0)
|
||||
self._receive_task = None
|
||||
|
||||
# Now close the websocket
|
||||
await self._disconnect_websocket()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during disconnect: {e}")
|
||||
finally:
|
||||
# Reset state only after everything is cleaned up
|
||||
self._websocket = None
|
||||
|
||||
async def _connect_websocket(self):
|
||||
"""Establish WebSocket connection to API.
|
||||
|
||||
Creates a WebSocket connection to the Deepgram Flux API using the configured
|
||||
URL and authentication headers. Handles connection errors and reports them
|
||||
through the event handler system.
|
||||
"""
|
||||
try:
|
||||
if self._websocket and self._websocket.state is State.OPEN:
|
||||
return
|
||||
|
||||
self._websocket = await websocket_connect(
|
||||
self._websocket_url,
|
||||
additional_headers={"Authorization": f"Token {self._api_key}"},
|
||||
)
|
||||
logger.debug("Connected to Deepgram Flux Websocket")
|
||||
except Exception as e:
|
||||
logger.error(f"{self} initialization error: {e}")
|
||||
self._websocket = None
|
||||
await self._call_event_handler("on_connection_error", f"{e}")
|
||||
|
||||
async def _disconnect_websocket(self):
|
||||
"""Close WebSocket connection and clean up state.
|
||||
|
||||
Closes the WebSocket connection to the Deepgram Flux API and stops all
|
||||
metrics collection. Handles disconnection errors gracefully.
|
||||
"""
|
||||
try:
|
||||
await self.stop_all_metrics()
|
||||
|
||||
if self._websocket:
|
||||
await self._send_close_stream()
|
||||
logger.debug("Disconnecting from Deepgram Flux Websocket")
|
||||
await self._websocket.close()
|
||||
except Exception as e:
|
||||
logger.error(f"{self} error closing websocket: {e}")
|
||||
|
||||
async def _send_close_stream(self) -> None:
|
||||
"""Sends a CloseStream control message to the Deepgram Flux WebSocket API.
|
||||
|
||||
This signals to the server that no more audio data will be sent.
|
||||
"""
|
||||
if self._websocket:
|
||||
logger.debug("Sending CloseStream message to Deepgram Flux")
|
||||
message = {"type": "CloseStream"}
|
||||
await self._websocket.send(json.dumps(message))
|
||||
|
||||
def can_generate_metrics(self) -> bool:
|
||||
"""Check if this service can generate processing metrics.
|
||||
|
||||
Returns:
|
||||
True, as Deepgram service supports metrics generation.
|
||||
"""
|
||||
return True
|
||||
|
||||
async def start(self, frame: StartFrame):
|
||||
"""Start the Deepgram Flux STT service.
|
||||
|
||||
Initializes the service by constructing the WebSocket URL with all configured
|
||||
parameters and establishing the connection to begin transcription processing.
|
||||
|
||||
Args:
|
||||
frame: The start frame containing initialization parameters and metadata.
|
||||
"""
|
||||
await super().start(frame)
|
||||
|
||||
url_params = [
|
||||
f"model={self._model}",
|
||||
f"sample_rate={self.sample_rate}",
|
||||
f"encoding={self._flux_encoding}",
|
||||
]
|
||||
|
||||
if self._params.eager_eot_threshold is not None:
|
||||
url_params.append(f"eager_eot_threshold={self._params.eager_eot_threshold}")
|
||||
|
||||
if self._params.eot_threshold is not None:
|
||||
url_params.append(f"eot_threshold={self._params.eot_threshold}")
|
||||
|
||||
if self._params.eot_timeout_ms is not None:
|
||||
url_params.append(f"eot_timeout_ms={self._params.eot_timeout_ms}")
|
||||
|
||||
if self._params.mip_opt_out is not None:
|
||||
url_params.append(f"mip_opt_out={str(self._params.mip_opt_out).lower()}")
|
||||
|
||||
# Add keyterm parameters (can have multiple)
|
||||
for keyterm in self._params.keyterm:
|
||||
url_params.append(f"keyterm={keyterm}")
|
||||
|
||||
# Add tag parameters (can have multiple)
|
||||
for tag_value in self._params.tag:
|
||||
url_params.append(f"tag={tag_value}")
|
||||
|
||||
self._websocket_url = f"{self._url}?{'&'.join(url_params)}"
|
||||
await self._connect()
|
||||
|
||||
async def stop(self, frame: EndFrame):
|
||||
"""Stop the Deepgram Flux STT service.
|
||||
|
||||
Args:
|
||||
frame: The end frame.
|
||||
"""
|
||||
await super().stop(frame)
|
||||
await self._disconnect()
|
||||
|
||||
async def cancel(self, frame: CancelFrame):
|
||||
"""Cancel the Deepgram Flux STT service.
|
||||
|
||||
Args:
|
||||
frame: The cancel frame.
|
||||
"""
|
||||
await super().cancel(frame)
|
||||
await self._disconnect()
|
||||
|
||||
async def run_stt(self, audio: bytes) -> AsyncGenerator[Frame, None]:
|
||||
"""Send audio data to Deepgram Flux for transcription.
|
||||
|
||||
Transmits raw audio bytes to the Deepgram Flux API for real-time speech
|
||||
recognition. Transcription results are received asynchronously through
|
||||
WebSocket callbacks and processed in the background.
|
||||
|
||||
Args:
|
||||
audio: Raw audio bytes in linear16 format (signed little-endian 16-bit PCM).
|
||||
|
||||
Yields:
|
||||
Frame: None (transcription results are delivered via WebSocket callbacks
|
||||
rather than as return values from this method).
|
||||
|
||||
Raises:
|
||||
Exception: If the WebSocket connection is not established or if there
|
||||
are issues sending the audio data.
|
||||
"""
|
||||
if not self._websocket:
|
||||
logger.error("Not connected to Deepgram Flux.")
|
||||
yield ErrorFrame("Not connected to Deepgram Flux.", fatal=True)
|
||||
return
|
||||
|
||||
try:
|
||||
await self._websocket.send(audio)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to send audio to Flux: {e}")
|
||||
yield ErrorFrame(f"Failed to send audio to Flux: {e}")
|
||||
return
|
||||
|
||||
yield None
|
||||
|
||||
async def start_metrics(self):
|
||||
"""Start TTFB and processing metrics collection."""
|
||||
# TTFB (Time To First Byte) metrics are currently disabled for Deepgram Flux.
|
||||
# Ideally, TTFB should measure the time from when a user starts speaking
|
||||
# until we receive the first transcript. However, Deepgram Flux delivers
|
||||
# both the "user started speaking" event and the first transcript simultaneously,
|
||||
# making this timing measurement meaningless in this context.
|
||||
# await self.start_ttfb_metrics()
|
||||
await self.start_processing_metrics()
|
||||
|
||||
@traced_stt
|
||||
async def _handle_transcription(
|
||||
self, transcript: str, is_final: bool, language: Optional[Language] = None
|
||||
):
|
||||
"""Handle a transcription result with tracing."""
|
||||
pass
|
||||
|
||||
def _get_websocket(self):
|
||||
"""Get the current WebSocket connection.
|
||||
|
||||
Returns the active WebSocket connection instance, raising an exception
|
||||
if no connection is currently established.
|
||||
|
||||
Returns:
|
||||
The active WebSocket connection instance.
|
||||
|
||||
Raises:
|
||||
Exception: If no WebSocket connection is currently active.
|
||||
"""
|
||||
if self._websocket:
|
||||
return self._websocket
|
||||
raise Exception("Websocket not connected")
|
||||
|
||||
def _validate_message(self, data: Dict[str, Any]) -> bool:
|
||||
"""Validate basic message structure from Deepgram Flux.
|
||||
|
||||
Ensures the received message has the expected structure before processing.
|
||||
|
||||
Args:
|
||||
data: The parsed JSON message data to validate.
|
||||
|
||||
Returns:
|
||||
True if the message structure is valid, False otherwise.
|
||||
"""
|
||||
if not isinstance(data, dict):
|
||||
logger.warning("Message is not a dictionary")
|
||||
return False
|
||||
|
||||
if "type" not in data:
|
||||
logger.warning("Message missing 'type' field")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
async def _receive_messages(self):
|
||||
"""Receive and process messages from WebSocket.
|
||||
|
||||
Continuously receives messages from the Deepgram Flux WebSocket connection
|
||||
and processes various message types including connection status, transcription
|
||||
results, turn information, and error conditions. Handles different event types
|
||||
such as StartOfTurn, EndOfTurn, EagerEndOfTurn, and Update events.
|
||||
"""
|
||||
async for message in self._get_websocket():
|
||||
if isinstance(message, str):
|
||||
try:
|
||||
data = json.loads(message)
|
||||
await self._handle_message(data)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Failed to decode JSON message: {e}")
|
||||
# Skip malformed messages
|
||||
continue
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing message: {e}")
|
||||
# Error will be handled inside WebsocketService->_receive_task_handler
|
||||
raise
|
||||
else:
|
||||
logger.warning(f"Received non-string message: {type(message)}")
|
||||
|
||||
async def _handle_message(self, data: Dict[str, Any]):
|
||||
"""Handle a parsed WebSocket message from Deepgram Flux.
|
||||
|
||||
Routes messages to appropriate handlers based on their type. Validates
|
||||
message structure before processing.
|
||||
|
||||
Args:
|
||||
data: The parsed JSON message data from the WebSocket.
|
||||
"""
|
||||
if not self._validate_message(data):
|
||||
return
|
||||
|
||||
message_type = data.get("type")
|
||||
|
||||
try:
|
||||
flux_message_type = FluxMessageType(message_type)
|
||||
except ValueError:
|
||||
logger.debug(f"Unhandled message type: {message_type or 'unknown'}")
|
||||
return
|
||||
|
||||
match flux_message_type:
|
||||
case FluxMessageType.RECEIVE_CONNECTED:
|
||||
await self._handle_connection_established()
|
||||
case FluxMessageType.RECEIVE_FATAL_ERROR:
|
||||
await self._handle_fatal_error(data)
|
||||
case FluxMessageType.TURN_INFO:
|
||||
await self._handle_turn_info(data)
|
||||
|
||||
async def _handle_connection_established(self):
|
||||
"""Handle successful connection establishment to Deepgram Flux.
|
||||
|
||||
This event is fired when the WebSocket connection to Deepgram Flux
|
||||
is successfully established and ready to receive audio data for
|
||||
transcription processing.
|
||||
"""
|
||||
logger.info("Connected to Flux - ready to stream audio")
|
||||
|
||||
async def _handle_fatal_error(self, data: Dict[str, Any]):
|
||||
"""Handle fatal error messages from Deepgram Flux.
|
||||
|
||||
Fatal errors indicate unrecoverable issues with the connection or
|
||||
configuration that require intervention. These errors will cause
|
||||
the connection to be terminated.
|
||||
|
||||
Args:
|
||||
data: The error message data containing error details.
|
||||
|
||||
Raises:
|
||||
Exception: Always raises to trigger error handling in the parent service.
|
||||
"""
|
||||
error_msg = data.get("error", "Unknown error")
|
||||
deepgram_error = f"Fatal error: {error_msg}"
|
||||
logger.error(deepgram_error)
|
||||
# Error will be handled inside WebsocketService->_receive_task_handler
|
||||
raise Exception(deepgram_error)
|
||||
|
||||
async def _handle_turn_info(self, data: Dict[str, Any]):
|
||||
"""Handle TurnInfo events from Deepgram Flux.
|
||||
|
||||
TurnInfo messages contain various turn-based events that indicate
|
||||
the state of speech processing, including turn boundaries, interim
|
||||
results, and turn finalization events.
|
||||
|
||||
Args:
|
||||
data: The TurnInfo message data containing event type, transcript and some extra metadata.
|
||||
"""
|
||||
event = data.get("event")
|
||||
transcript = data.get("transcript", "")
|
||||
|
||||
try:
|
||||
flux_event_type = FluxEventType(event)
|
||||
except ValueError:
|
||||
logger.debug(f"Unhandled TurnInfo event: {event}")
|
||||
return
|
||||
|
||||
match flux_event_type:
|
||||
case FluxEventType.START_OF_TURN:
|
||||
await self._handle_start_of_turn(transcript)
|
||||
case FluxEventType.TURN_RESUMED:
|
||||
await self._handle_turn_resumed(event)
|
||||
case FluxEventType.END_OF_TURN:
|
||||
await self._handle_end_of_turn(transcript, data)
|
||||
case FluxEventType.EAGER_END_OF_TURN:
|
||||
await self._handle_eager_end_of_turn(transcript, data)
|
||||
case FluxEventType.UPDATE:
|
||||
await self._handle_update(transcript)
|
||||
|
||||
async def _handle_start_of_turn(self, transcript: str):
|
||||
"""Handle StartOfTurn events from Deepgram Flux.
|
||||
|
||||
StartOfTurn events are fired when Deepgram Flux detects the beginning
|
||||
of a new speaking turn. This triggers bot interruption to stop any
|
||||
ongoing speech synthesis and signals the start of user speech detection.
|
||||
|
||||
The service will:
|
||||
- Send a BotInterruptionFrame upstream to stop bot speech
|
||||
- Send a UserStartedSpeakingFrame downstream to notify other components
|
||||
- Start metrics collection for measuring response times
|
||||
|
||||
Args:
|
||||
transcript: maybe the first few words of the turn.
|
||||
"""
|
||||
logger.debug("User started speaking")
|
||||
await self.push_interruption_task_frame_and_wait()
|
||||
await self.push_frame(UserStartedSpeakingFrame(), FrameDirection.DOWNSTREAM)
|
||||
await self.push_frame(UserStartedSpeakingFrame(), FrameDirection.UPSTREAM)
|
||||
await self.start_metrics()
|
||||
if transcript:
|
||||
logger.trace(f"Start of turn transcript: {transcript}")
|
||||
|
||||
async def _handle_turn_resumed(self, event: str):
|
||||
"""Handle TurnResumed events from Deepgram Flux.
|
||||
|
||||
TurnResumed events indicate that speech has resumed after a brief pause
|
||||
within the same turn. This is primarily used for logging and debugging
|
||||
purposes and doesn't trigger any significant processing changes.
|
||||
|
||||
Args:
|
||||
event: The event type string for logging purposes.
|
||||
"""
|
||||
logger.trace(f"Received event TurnResumed: {event}")
|
||||
|
||||
async def _handle_end_of_turn(self, transcript: str, data: Dict[str, Any]):
|
||||
"""Handle EndOfTurn events from Deepgram Flux.
|
||||
|
||||
EndOfTurn events are fired when Deepgram Flux determines that a speaking
|
||||
turn has concluded, either due to sufficient silence or end-of-turn
|
||||
confidence thresholds being met. This provides the final transcript
|
||||
for the completed turn.
|
||||
|
||||
The service will:
|
||||
- Create and send a final TranscriptionFrame with the complete transcript
|
||||
- Trigger transcription handling with tracing for metrics
|
||||
- Stop processing metrics collection
|
||||
- Send a UserStoppedSpeakingFrame to signal turn completion
|
||||
|
||||
Args:
|
||||
transcript: The final transcript text for the completed turn.
|
||||
data: The TurnInfo message data containing event type, transcript and some extra metadata.
|
||||
"""
|
||||
logger.debug("User stopped speaking")
|
||||
|
||||
await self.push_frame(
|
||||
TranscriptionFrame(
|
||||
transcript,
|
||||
self._user_id,
|
||||
time_now_iso8601(),
|
||||
self._language,
|
||||
result=data,
|
||||
)
|
||||
)
|
||||
await self._handle_transcription(transcript, True, self._language)
|
||||
await self.stop_processing_metrics()
|
||||
await self.push_frame(UserStoppedSpeakingFrame(), FrameDirection.DOWNSTREAM)
|
||||
await self.push_frame(UserStoppedSpeakingFrame(), FrameDirection.UPSTREAM)
|
||||
|
||||
async def _handle_eager_end_of_turn(self, transcript: str, data: Dict[str, Any]):
|
||||
"""Handle EagerEndOfTurn events from Deepgram Flux.
|
||||
|
||||
EagerEndOfTurn events are fired when the end-of-turn confidence reaches the
|
||||
EagerEndOfTurn threshold but hasn't yet reached the full end-of-turn threshold.
|
||||
These provide interim transcripts that can be used for faster response
|
||||
generation while still allowing the user to continue speaking.
|
||||
|
||||
EagerEndOfTurn events enable more responsive conversational AI by allowing
|
||||
the LLM to start processing likely final transcripts before the turn
|
||||
is definitively ended.
|
||||
|
||||
Args:
|
||||
transcript: The interim transcript text that triggered the EagerEndOfTurn event.
|
||||
data: The TurnInfo message data containing event type, transcript and some extra metadata.
|
||||
"""
|
||||
logger.trace(f"EagerEndOfTurn - {transcript}")
|
||||
# Deepgram's EagerEndOfTurn feature enables lower-latency voice agents by sending
|
||||
# medium-confidence transcripts before EndOfTurn certainty, allowing LLM processing to
|
||||
# begin early.
|
||||
#
|
||||
# However, if speech resumes or the transcripts differ from the final EndOfTurn, the
|
||||
# EagerEndOfTurn response should be cancelled to avoid incorrect or partial responses.
|
||||
#
|
||||
# Pipecat doesn't yet provide built-in Gate/control mechanisms to:
|
||||
# 1. Start LLM/TTS processing early on EagerEndOfTurn events
|
||||
# 2. Cancel in-flight processing when TurnResumed occurs
|
||||
#
|
||||
# By pushing EagerEndOfTurn transcripts as InterimTranscriptionFrame, we enable
|
||||
# developers to implement custom EagerEndOfTurn handling in their applications while
|
||||
# maintaining compatibility with existing interim transcription workflows.
|
||||
#
|
||||
# TODO: Implement proper EagerEndOfTurn support with cancellable processing pipeline
|
||||
# that can start response generation on EagerEndOfTurn and cancel or confirm it.
|
||||
await self.push_frame(
|
||||
InterimTranscriptionFrame(
|
||||
transcript,
|
||||
self._user_id,
|
||||
time_now_iso8601(),
|
||||
self._language,
|
||||
result=data,
|
||||
)
|
||||
)
|
||||
|
||||
async def _handle_update(self, transcript: str):
|
||||
"""Handle Update events from Deepgram Flux.
|
||||
|
||||
Update events provide incremental transcript updates during an ongoing
|
||||
turn. These events allow for real-time display of transcription progress
|
||||
and can be used to provide visual feedback to users about what's being
|
||||
recognized.
|
||||
|
||||
The service stops TTFB (Time To First Byte) metrics when the first
|
||||
substantial update is received, indicating successful processing start.
|
||||
|
||||
Args:
|
||||
transcript: The current partial transcript text for the ongoing turn.
|
||||
"""
|
||||
if transcript:
|
||||
logger.trace(f"Update event: {transcript}")
|
||||
# TTFB (Time To First Byte) metrics are currently disabled for Deepgram Flux.
|
||||
# Ideally, TTFB should measure the time from when a user starts speaking
|
||||
# until we receive the first transcript. However, Deepgram Flux delivers
|
||||
# both the "user started speaking" event and the first transcript simultaneously,
|
||||
# making this timing measurement meaningless in this context.
|
||||
# await self.stop_ttfb_metrics()
|
||||
@@ -12,7 +12,7 @@ from typing import TYPE_CHECKING
|
||||
from pipecat.frames.frames import DataFrame, FunctionCallResultFrame
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pipecat.services.openai_realtime_beta.context import OpenAIRealtimeLLMContext
|
||||
from pipecat.services.openai_realtime.context import OpenAIRealtimeLLMContext
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -15,6 +15,7 @@ from loguru import logger
|
||||
|
||||
from pipecat.frames.frames import (
|
||||
AudioRawFrame,
|
||||
ErrorFrame,
|
||||
Frame,
|
||||
StartFrame,
|
||||
STTMuteFrame,
|
||||
@@ -24,6 +25,7 @@ from pipecat.frames.frames import (
|
||||
)
|
||||
from pipecat.processors.frame_processor import FrameDirection
|
||||
from pipecat.services.ai_service import AIService
|
||||
from pipecat.services.websocket_service import WebsocketService
|
||||
from pipecat.transcriptions.language import Language
|
||||
|
||||
|
||||
@@ -283,3 +285,35 @@ class SegmentedSTTService(STTService):
|
||||
if not self._user_speaking and len(self._audio_buffer) > self._audio_buffer_size_1s:
|
||||
discarded = len(self._audio_buffer) - self._audio_buffer_size_1s
|
||||
self._audio_buffer = self._audio_buffer[discarded:]
|
||||
|
||||
|
||||
class WebsocketSTTService(STTService, WebsocketService):
|
||||
"""Base class for websocket-based STT services.
|
||||
|
||||
Combines STT functionality with websocket connectivity, providing automatic
|
||||
error handling and reconnection capabilities.
|
||||
|
||||
Event handlers:
|
||||
on_connection_error: Called when a websocket connection error occurs.
|
||||
|
||||
Example::
|
||||
|
||||
@stt.event_handler("on_connection_error")
|
||||
async def on_connection_error(stt: STTService, error: str):
|
||||
logger.error(f"STT connection error: {error}")
|
||||
"""
|
||||
|
||||
def __init__(self, *, reconnect_on_error: bool = True, **kwargs):
|
||||
"""Initialize the Websocket STT service.
|
||||
|
||||
Args:
|
||||
reconnect_on_error: Whether to automatically reconnect on websocket errors.
|
||||
**kwargs: Additional arguments passed to parent classes.
|
||||
"""
|
||||
STTService.__init__(self, **kwargs)
|
||||
WebsocketService.__init__(self, reconnect_on_error=reconnect_on_error, **kwargs)
|
||||
self._register_event_handler("on_connection_error")
|
||||
|
||||
async def _report_error(self, error: ErrorFrame):
|
||||
await self._call_event_handler("on_connection_error", error.error)
|
||||
await self.push_error(error)
|
||||
|
||||
@@ -29,20 +29,19 @@ from pipecat.frames.frames import (
|
||||
CancelFrame,
|
||||
EndFrame,
|
||||
Frame,
|
||||
InputTransportMessageUrgentFrame,
|
||||
InterruptionFrame,
|
||||
MixerControlFrame,
|
||||
OutputAudioRawFrame,
|
||||
OutputDTMFFrame,
|
||||
OutputDTMFUrgentFrame,
|
||||
OutputImageRawFrame,
|
||||
OutputTransportMessageFrame,
|
||||
OutputTransportMessageUrgentFrame,
|
||||
OutputTransportReadyFrame,
|
||||
SpeechOutputAudioRawFrame,
|
||||
SpriteFrame,
|
||||
StartFrame,
|
||||
SystemFrame,
|
||||
TransportMessageFrame,
|
||||
TransportMessageUrgentFrame,
|
||||
TTSAudioRawFrame,
|
||||
)
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
@@ -178,7 +177,9 @@ class BaseOutputTransport(FrameProcessor):
|
||||
# Sending a frame indicating that the output transport is ready and able to receive frames.
|
||||
await self.push_frame(OutputTransportReadyFrame(), FrameDirection.UPSTREAM)
|
||||
|
||||
async def send_message(self, frame: TransportMessageFrame | TransportMessageUrgentFrame):
|
||||
async def send_message(
|
||||
self, frame: OutputTransportMessageFrame | OutputTransportMessageUrgentFrame
|
||||
):
|
||||
"""Send a transport message.
|
||||
|
||||
Args:
|
||||
@@ -307,9 +308,7 @@ class BaseOutputTransport(FrameProcessor):
|
||||
elif isinstance(frame, InterruptionFrame):
|
||||
await self.push_frame(frame, direction)
|
||||
await self._handle_frame(frame)
|
||||
elif isinstance(frame, TransportMessageUrgentFrame) and not isinstance(
|
||||
frame, InputTransportMessageUrgentFrame
|
||||
):
|
||||
elif isinstance(frame, OutputTransportMessageUrgentFrame):
|
||||
await self.send_message(frame)
|
||||
elif isinstance(frame, OutputDTMFUrgentFrame):
|
||||
await self.write_dtmf(frame)
|
||||
@@ -646,7 +645,7 @@ class BaseOutputTransport(FrameProcessor):
|
||||
await self._set_video_image(frame)
|
||||
elif isinstance(frame, SpriteFrame):
|
||||
await self._set_video_images(frame.images)
|
||||
elif isinstance(frame, TransportMessageFrame):
|
||||
elif isinstance(frame, OutputTransportMessageFrame):
|
||||
await self._transport.send_message(frame)
|
||||
elif isinstance(frame, OutputDTMFFrame):
|
||||
await self._transport.write_dtmf(frame)
|
||||
|
||||
@@ -30,15 +30,15 @@ from pipecat.frames.frames import (
|
||||
ErrorFrame,
|
||||
Frame,
|
||||
InputAudioRawFrame,
|
||||
InputTransportMessageUrgentFrame,
|
||||
InputTransportMessageFrame,
|
||||
InterimTranscriptionFrame,
|
||||
OutputAudioRawFrame,
|
||||
OutputImageRawFrame,
|
||||
OutputTransportMessageFrame,
|
||||
OutputTransportMessageUrgentFrame,
|
||||
SpriteFrame,
|
||||
StartFrame,
|
||||
TranscriptionFrame,
|
||||
TransportMessageFrame,
|
||||
TransportMessageUrgentFrame,
|
||||
UserAudioRawFrame,
|
||||
UserImageRawFrame,
|
||||
UserImageRequestFrame,
|
||||
@@ -74,7 +74,7 @@ VAD_RESET_PERIOD_MS = 2000
|
||||
|
||||
|
||||
@dataclass
|
||||
class DailyTransportMessageFrame(TransportMessageFrame):
|
||||
class DailyOutputTransportMessageFrame(OutputTransportMessageFrame):
|
||||
"""Frame for transport messages in Daily calls.
|
||||
|
||||
Parameters:
|
||||
@@ -85,7 +85,7 @@ class DailyTransportMessageFrame(TransportMessageFrame):
|
||||
|
||||
|
||||
@dataclass
|
||||
class DailyTransportMessageUrgentFrame(TransportMessageUrgentFrame):
|
||||
class DailyOutputTransportMessageUrgentFrame(OutputTransportMessageUrgentFrame):
|
||||
"""Frame for urgent transport messages in Daily calls.
|
||||
|
||||
Parameters:
|
||||
@@ -96,7 +96,59 @@ class DailyTransportMessageUrgentFrame(TransportMessageUrgentFrame):
|
||||
|
||||
|
||||
@dataclass
|
||||
class DailyInputTransportMessageUrgentFrame(InputTransportMessageUrgentFrame):
|
||||
class DailyTransportMessageFrame(DailyOutputTransportMessageFrame):
|
||||
"""Frame for transport messages in Daily calls.
|
||||
|
||||
.. deprecated:: 0.0.87
|
||||
This frame is deprecated and will be removed in a future version.
|
||||
Instead, use `DailyOutputTransportMessageFrame`.
|
||||
|
||||
Parameters:
|
||||
participant_id: Optional ID of the participant this message is for/from.
|
||||
"""
|
||||
|
||||
def __post_init__(self):
|
||||
super().__post_init__()
|
||||
import warnings
|
||||
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("always")
|
||||
warnings.warn(
|
||||
"DailyTransportMessageFrame is deprecated and will be removed in a future version. "
|
||||
"Instead, use DailyOutputTransportMessageFrame.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class DailyTransportMessageUrgentFrame(DailyOutputTransportMessageUrgentFrame):
|
||||
"""Frame for urgent transport messages in Daily calls.
|
||||
|
||||
.. deprecated:: 0.0.87
|
||||
This frame is deprecated and will be removed in a future version.
|
||||
Instead, use `DailyOutputTransportMessageUrgentFrame`.
|
||||
|
||||
Parameters:
|
||||
participant_id: Optional ID of the participant this message is for/from.
|
||||
"""
|
||||
|
||||
def __post_init__(self):
|
||||
super().__post_init__()
|
||||
import warnings
|
||||
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("always")
|
||||
warnings.warn(
|
||||
"DailyTransportMessageUrgentFrame is deprecated and will be removed in a future version. "
|
||||
"Instead, use DailyOutputTransportMessageUrgentFrame.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class DailyInputTransportMessageFrame(InputTransportMessageFrame):
|
||||
"""Frame for input urgent transport messages in Daily calls.
|
||||
|
||||
Parameters:
|
||||
@@ -106,6 +158,31 @@ class DailyInputTransportMessageUrgentFrame(InputTransportMessageUrgentFrame):
|
||||
participant_id: Optional[str] = None
|
||||
|
||||
|
||||
class DailyInputTransportMessageUrgentFrame(DailyInputTransportMessageFrame):
|
||||
"""Frame for input urgent transport messages in Daily calls.
|
||||
|
||||
.. deprecated:: 0.0.87
|
||||
This frame is deprecated and will be removed in a future version.
|
||||
Instead, use `DailyInputTransportMessageFrame`.
|
||||
|
||||
Parameters:
|
||||
participant_id: Optional ID of the participant this message is for/from.
|
||||
"""
|
||||
|
||||
def __post_init__(self):
|
||||
super().__post_init__()
|
||||
import warnings
|
||||
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("always")
|
||||
warnings.warn(
|
||||
"DailyInputTransportMessageUrgentFrame is deprecated and will be removed in a future version. "
|
||||
"Instead, use DailyInputTransportMessageFrame.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class DailyUpdateRemoteParticipantsFrame(ControlFrame):
|
||||
"""Frame to update remote participants in Daily calls.
|
||||
@@ -474,7 +551,9 @@ class DailyTransportClient(EventHandler):
|
||||
"""
|
||||
return self._out_sample_rate
|
||||
|
||||
async def send_message(self, frame: TransportMessageFrame | TransportMessageUrgentFrame):
|
||||
async def send_message(
|
||||
self, frame: OutputTransportMessageFrame | OutputTransportMessageUrgentFrame
|
||||
):
|
||||
"""Send an application message to participants.
|
||||
|
||||
Args:
|
||||
@@ -484,7 +563,9 @@ class DailyTransportClient(EventHandler):
|
||||
return
|
||||
|
||||
participant_id = None
|
||||
if isinstance(frame, (DailyTransportMessageFrame, DailyTransportMessageUrgentFrame)):
|
||||
if isinstance(
|
||||
frame, (DailyOutputTransportMessageFrame, DailyOutputTransportMessageUrgentFrame)
|
||||
):
|
||||
participant_id = frame.participant_id
|
||||
|
||||
future = self._get_event_loop().create_future()
|
||||
@@ -1621,7 +1702,7 @@ class DailyInputTransport(BaseInputTransport):
|
||||
message: The message data to send.
|
||||
sender: ID of the message sender.
|
||||
"""
|
||||
frame = DailyInputTransportMessageUrgentFrame(message=message, participant_id=sender)
|
||||
frame = DailyInputTransportMessageFrame(message=message, participant_id=sender)
|
||||
await self.push_frame(frame)
|
||||
|
||||
#
|
||||
@@ -1843,7 +1924,9 @@ class DailyOutputTransport(BaseOutputTransport):
|
||||
if isinstance(frame, DailyUpdateRemoteParticipantsFrame):
|
||||
await self._client.update_remote_participants(frame.remote_participants)
|
||||
|
||||
async def send_message(self, frame: TransportMessageFrame | TransportMessageUrgentFrame):
|
||||
async def send_message(
|
||||
self, frame: OutputTransportMessageFrame | OutputTransportMessageUrgentFrame
|
||||
):
|
||||
"""Send a transport message to participants.
|
||||
|
||||
Args:
|
||||
|
||||
@@ -29,9 +29,9 @@ from pipecat.frames.frames import (
|
||||
OutputAudioRawFrame,
|
||||
OutputDTMFFrame,
|
||||
OutputDTMFUrgentFrame,
|
||||
OutputTransportMessageFrame,
|
||||
OutputTransportMessageUrgentFrame,
|
||||
StartFrame,
|
||||
TransportMessageFrame,
|
||||
TransportMessageUrgentFrame,
|
||||
UserAudioRawFrame,
|
||||
UserImageRawFrame,
|
||||
)
|
||||
@@ -68,7 +68,7 @@ DTMF_CODE_MAP = {
|
||||
|
||||
|
||||
@dataclass
|
||||
class LiveKitTransportMessageFrame(TransportMessageFrame):
|
||||
class LiveKitOutputTransportMessageFrame(OutputTransportMessageFrame):
|
||||
"""Frame for transport messages in LiveKit rooms.
|
||||
|
||||
Parameters:
|
||||
@@ -79,7 +79,7 @@ class LiveKitTransportMessageFrame(TransportMessageFrame):
|
||||
|
||||
|
||||
@dataclass
|
||||
class LiveKitTransportMessageUrgentFrame(TransportMessageUrgentFrame):
|
||||
class LiveKitOutputTransportMessageUrgentFrame(OutputTransportMessageUrgentFrame):
|
||||
"""Frame for urgent transport messages in LiveKit rooms.
|
||||
|
||||
Parameters:
|
||||
@@ -89,6 +89,50 @@ class LiveKitTransportMessageUrgentFrame(TransportMessageUrgentFrame):
|
||||
participant_id: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class LiveKitTransportMessageFrame(LiveKitOutputTransportMessageFrame):
|
||||
"""Frame for transport messages in LiveKit rooms.
|
||||
|
||||
Parameters:
|
||||
participant_id: Optional ID of the participant this message is for/from.
|
||||
"""
|
||||
|
||||
def __post_init__(self):
|
||||
super().__post_init__()
|
||||
import warnings
|
||||
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("always")
|
||||
warnings.warn(
|
||||
"LiveKitTransportMessageFrame is deprecated and will be removed in a future version. "
|
||||
"Instead, use LiveKitOutputTransportMessageFrame.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class LiveKitTransportMessageUrgentFrame(LiveKitOutputTransportMessageUrgentFrame):
|
||||
"""Frame for urgent transport messages in LiveKit rooms.
|
||||
|
||||
Parameters:
|
||||
participant_id: Optional ID of the participant this message is for/from.
|
||||
"""
|
||||
|
||||
def __post_init__(self):
|
||||
super().__post_init__()
|
||||
import warnings
|
||||
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("always")
|
||||
warnings.warn(
|
||||
"LiveKitTransportMessageUrgentFrame is deprecated and will be removed in a future version. "
|
||||
"Instead, use LiveKitOutputTransportMessageUrgentFrame.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
|
||||
class LiveKitParams(TransportParams):
|
||||
"""Configuration parameters for LiveKit transport.
|
||||
|
||||
@@ -310,10 +354,10 @@ class LiveKitTransportClient:
|
||||
logger.error(f"Error sending data: {e}")
|
||||
|
||||
async def send_dtmf(self, digit: str):
|
||||
"""Send DTMF tone to the room.
|
||||
r"""Send DTMF tone to the room.
|
||||
|
||||
Args:
|
||||
digit: The DTMF digit to send (0-9, *, #).
|
||||
digit: The DTMF digit to send (0-9, \*, #).
|
||||
"""
|
||||
if not self._connected:
|
||||
return
|
||||
@@ -677,7 +721,7 @@ class LiveKitInputTransport(BaseInputTransport):
|
||||
message: The message data to send.
|
||||
sender: ID of the message sender.
|
||||
"""
|
||||
frame = LiveKitTransportMessageUrgentFrame(message=message, participant_id=sender)
|
||||
frame = LiveKitOutputTransportMessageUrgentFrame(message=message, participant_id=sender)
|
||||
await self.push_frame(frame)
|
||||
|
||||
async def _audio_in_task_handler(self):
|
||||
@@ -836,7 +880,9 @@ class LiveKitOutputTransport(BaseOutputTransport):
|
||||
await super().cleanup()
|
||||
await self._transport.cleanup()
|
||||
|
||||
async def send_message(self, frame: TransportMessageFrame | TransportMessageUrgentFrame):
|
||||
async def send_message(
|
||||
self, frame: OutputTransportMessageFrame | OutputTransportMessageUrgentFrame
|
||||
):
|
||||
"""Send a transport message to participants.
|
||||
|
||||
Args:
|
||||
@@ -846,7 +892,9 @@ class LiveKitOutputTransport(BaseOutputTransport):
|
||||
if isinstance(message, dict):
|
||||
# fix message encoding for dict-like messages, e.g. RTVI messages.
|
||||
message = json.dumps(message, ensure_ascii=False)
|
||||
if isinstance(frame, (LiveKitTransportMessageFrame, LiveKitTransportMessageUrgentFrame)):
|
||||
if isinstance(
|
||||
frame, (LiveKitOutputTransportMessageFrame, LiveKitOutputTransportMessageUrgentFrame)
|
||||
):
|
||||
await self._client.send_data(message.encode(), frame.participant_id)
|
||||
else:
|
||||
await self._client.send_data(message.encode())
|
||||
@@ -1105,7 +1153,9 @@ class LiveKitTransport(BaseTransport):
|
||||
participant_id: Optional specific participant to send to.
|
||||
"""
|
||||
if self._output:
|
||||
frame = LiveKitTransportMessageFrame(message=message, participant_id=participant_id)
|
||||
frame = LiveKitOutputTransportMessageFrame(
|
||||
message=message, participant_id=participant_id
|
||||
)
|
||||
await self._output.send_message(frame)
|
||||
|
||||
async def send_message_urgent(self, message: str, participant_id: Optional[str] = None):
|
||||
@@ -1116,7 +1166,7 @@ class LiveKitTransport(BaseTransport):
|
||||
participant_id: Optional specific participant to send to.
|
||||
"""
|
||||
if self._output:
|
||||
frame = LiveKitTransportMessageUrgentFrame(
|
||||
frame = LiveKitOutputTransportMessageUrgentFrame(
|
||||
message=message, participant_id=participant_id
|
||||
)
|
||||
await self._output.send_message(frame)
|
||||
|
||||
@@ -283,7 +283,6 @@ class SmallWebRTCConnection(BaseObject):
|
||||
self._data_channel = None
|
||||
self._renegotiation_in_progress = False
|
||||
self._last_received_time = None
|
||||
self._message_queue = []
|
||||
self._pending_app_messages = []
|
||||
self._connecting_timeout_task = None
|
||||
|
||||
@@ -297,10 +296,7 @@ class SmallWebRTCConnection(BaseObject):
|
||||
# Flush queued messages once the data channel is open
|
||||
@channel.on("open")
|
||||
async def on_open():
|
||||
logger.debug("Data channel is open, flushing queued messages")
|
||||
while self._message_queue:
|
||||
message = self._message_queue.pop(0)
|
||||
self._data_channel.send(message)
|
||||
logger.debug("Data channel is open!")
|
||||
|
||||
@channel.on("message")
|
||||
async def on_message(message):
|
||||
@@ -503,7 +499,6 @@ class SmallWebRTCConnection(BaseObject):
|
||||
self._track_map.clear()
|
||||
if self._pc:
|
||||
await self._pc.close()
|
||||
self._message_queue.clear()
|
||||
self._pending_app_messages.clear()
|
||||
self._track_map = {}
|
||||
self._cancel_monitoring_connecting_state()
|
||||
@@ -669,8 +664,8 @@ class SmallWebRTCConnection(BaseObject):
|
||||
if self._data_channel and self._data_channel.readyState == "open":
|
||||
self._data_channel.send(json_message)
|
||||
else:
|
||||
logger.debug("Data channel not ready, queuing message")
|
||||
self._message_queue.append(json_message)
|
||||
# The client might choose never to create a data channel.
|
||||
logger.trace("Data channel not ready, discarding message!")
|
||||
|
||||
def ask_to_renegotiate(self):
|
||||
"""Request renegotiation of the WebRTC connection."""
|
||||
|
||||
@@ -180,7 +180,7 @@ class SmallWebRTCRequestHandler:
|
||||
|
||||
answer = pipecat_connection.get_answer()
|
||||
|
||||
if self._esp32_mode and self._host and self._host != "localhost":
|
||||
if self._esp32_mode:
|
||||
from pipecat.runner.utils import smallwebrtc_sdp_munging
|
||||
|
||||
answer["sdp"] = smallwebrtc_sdp_munging(answer["sdp"], self._host)
|
||||
|
||||
@@ -26,13 +26,13 @@ from pipecat.frames.frames import (
|
||||
EndFrame,
|
||||
Frame,
|
||||
InputAudioRawFrame,
|
||||
InputTransportMessageUrgentFrame,
|
||||
InputTransportMessageFrame,
|
||||
OutputAudioRawFrame,
|
||||
OutputImageRawFrame,
|
||||
OutputTransportMessageFrame,
|
||||
OutputTransportMessageUrgentFrame,
|
||||
SpriteFrame,
|
||||
StartFrame,
|
||||
TransportMessageFrame,
|
||||
TransportMessageUrgentFrame,
|
||||
UserImageRawFrame,
|
||||
UserImageRequestFrame,
|
||||
)
|
||||
@@ -461,7 +461,9 @@ class SmallWebRTCClient:
|
||||
await self._webrtc_connection.disconnect()
|
||||
await self._handle_peer_disconnected()
|
||||
|
||||
async def send_message(self, frame: TransportMessageFrame | TransportMessageUrgentFrame):
|
||||
async def send_message(
|
||||
self, frame: OutputTransportMessageFrame | OutputTransportMessageUrgentFrame
|
||||
):
|
||||
"""Send an application message through the WebRTC connection.
|
||||
|
||||
Args:
|
||||
@@ -683,7 +685,7 @@ class SmallWebRTCInputTransport(BaseInputTransport):
|
||||
message: The application message to process.
|
||||
"""
|
||||
logger.debug(f"Received app message inside SmallWebRTCInputTransport {message}")
|
||||
frame = InputTransportMessageUrgentFrame(message=message)
|
||||
frame = InputTransportMessageFrame(message=message)
|
||||
await self.push_frame(frame)
|
||||
|
||||
# Add this method similar to DailyInputTransport.request_participant_image
|
||||
@@ -820,7 +822,9 @@ class SmallWebRTCOutputTransport(BaseOutputTransport):
|
||||
await super().cancel(frame)
|
||||
await self._client.disconnect()
|
||||
|
||||
async def send_message(self, frame: TransportMessageFrame | TransportMessageUrgentFrame):
|
||||
async def send_message(
|
||||
self, frame: OutputTransportMessageFrame | OutputTransportMessageUrgentFrame
|
||||
):
|
||||
"""Send a transport message through the WebRTC connection.
|
||||
|
||||
Args:
|
||||
|
||||
@@ -27,9 +27,9 @@ from pipecat.frames.frames import (
|
||||
InputAudioRawFrame,
|
||||
InterruptionFrame,
|
||||
OutputAudioRawFrame,
|
||||
OutputTransportMessageFrame,
|
||||
OutputTransportMessageUrgentFrame,
|
||||
StartFrame,
|
||||
TransportMessageFrame,
|
||||
TransportMessageUrgentFrame,
|
||||
)
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor, FrameProcessorSetup
|
||||
from pipecat.transports.base_input import BaseInputTransport
|
||||
@@ -345,7 +345,9 @@ class TavusTransportClient:
|
||||
participant_id, callback, audio_source, sample_rate, callback_interval_ms
|
||||
)
|
||||
|
||||
async def send_message(self, frame: TransportMessageFrame | TransportMessageUrgentFrame):
|
||||
async def send_message(
|
||||
self, frame: OutputTransportMessageFrame | OutputTransportMessageUrgentFrame
|
||||
):
|
||||
"""Send a message to participants.
|
||||
|
||||
Args:
|
||||
@@ -373,7 +375,7 @@ class TavusTransportClient:
|
||||
|
||||
async def send_interrupt_message(self) -> None:
|
||||
"""Send an interrupt message to the conversation."""
|
||||
transport_frame = TransportMessageUrgentFrame(
|
||||
transport_frame = OutputTransportMessageUrgentFrame(
|
||||
message={
|
||||
"message_type": "conversation",
|
||||
"event_type": "conversation.interrupt",
|
||||
@@ -605,7 +607,9 @@ class TavusOutputTransport(BaseOutputTransport):
|
||||
await super().cancel(frame)
|
||||
await self._client.stop()
|
||||
|
||||
async def send_message(self, frame: TransportMessageFrame | TransportMessageUrgentFrame):
|
||||
async def send_message(
|
||||
self, frame: OutputTransportMessageFrame | OutputTransportMessageUrgentFrame
|
||||
):
|
||||
"""Send a message to participants.
|
||||
|
||||
Args:
|
||||
|
||||
@@ -28,9 +28,9 @@ from pipecat.frames.frames import (
|
||||
Frame,
|
||||
InputAudioRawFrame,
|
||||
OutputAudioRawFrame,
|
||||
OutputTransportMessageFrame,
|
||||
OutputTransportMessageUrgentFrame,
|
||||
StartFrame,
|
||||
TransportMessageFrame,
|
||||
TransportMessageUrgentFrame,
|
||||
)
|
||||
from pipecat.processors.frame_processor import FrameProcessorSetup
|
||||
from pipecat.serializers.base_serializer import FrameSerializer
|
||||
@@ -385,7 +385,9 @@ class WebsocketClientOutputTransport(BaseOutputTransport):
|
||||
await super().cleanup()
|
||||
await self._transport.cleanup()
|
||||
|
||||
async def send_message(self, frame: TransportMessageFrame | TransportMessageUrgentFrame):
|
||||
async def send_message(
|
||||
self, frame: OutputTransportMessageFrame | OutputTransportMessageUrgentFrame
|
||||
):
|
||||
"""Send a transport message through the WebSocket.
|
||||
|
||||
Args:
|
||||
|
||||
@@ -28,9 +28,9 @@ from pipecat.frames.frames import (
|
||||
InputAudioRawFrame,
|
||||
InterruptionFrame,
|
||||
OutputAudioRawFrame,
|
||||
OutputTransportMessageFrame,
|
||||
OutputTransportMessageUrgentFrame,
|
||||
StartFrame,
|
||||
TransportMessageFrame,
|
||||
TransportMessageUrgentFrame,
|
||||
)
|
||||
from pipecat.processors.frame_processor import FrameDirection
|
||||
from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializerType
|
||||
@@ -402,7 +402,9 @@ class FastAPIWebsocketOutputTransport(BaseOutputTransport):
|
||||
await self._write_frame(frame)
|
||||
self._next_send_time = 0
|
||||
|
||||
async def send_message(self, frame: TransportMessageFrame | TransportMessageUrgentFrame):
|
||||
async def send_message(
|
||||
self, frame: OutputTransportMessageFrame | OutputTransportMessageUrgentFrame
|
||||
):
|
||||
"""Send a transport message frame.
|
||||
|
||||
Args:
|
||||
|
||||
@@ -27,9 +27,9 @@ from pipecat.frames.frames import (
|
||||
InputAudioRawFrame,
|
||||
InterruptionFrame,
|
||||
OutputAudioRawFrame,
|
||||
OutputTransportMessageFrame,
|
||||
OutputTransportMessageUrgentFrame,
|
||||
StartFrame,
|
||||
TransportMessageFrame,
|
||||
TransportMessageUrgentFrame,
|
||||
)
|
||||
from pipecat.processors.frame_processor import FrameDirection
|
||||
from pipecat.serializers.base_serializer import FrameSerializer
|
||||
@@ -338,7 +338,9 @@ class WebsocketServerOutputTransport(BaseOutputTransport):
|
||||
await self._write_frame(frame)
|
||||
self._next_send_time = 0
|
||||
|
||||
async def send_message(self, frame: TransportMessageFrame | TransportMessageUrgentFrame):
|
||||
async def send_message(
|
||||
self, frame: OutputTransportMessageFrame | OutputTransportMessageUrgentFrame
|
||||
):
|
||||
"""Send a transport message frame to the client.
|
||||
|
||||
Args:
|
||||
|
||||
@@ -29,7 +29,7 @@ class EventHandler:
|
||||
This data class stores the event name, a list of handlers to run for this
|
||||
event, and whether these handlers will be executed in a task.
|
||||
|
||||
Attributes:
|
||||
Parameters:
|
||||
name (str): The name of the event handler.
|
||||
handlers (List[Any]): A list of functions to be called when this event is triggered.
|
||||
is_sync (bool): Indicates whether the functions are executed in a task.
|
||||
|
||||
@@ -11,8 +11,8 @@ from pipecat.frames.frames import (
|
||||
EndFrame,
|
||||
Frame,
|
||||
InterruptionFrame,
|
||||
OutputTransportMessageUrgentFrame,
|
||||
TextFrame,
|
||||
TransportMessageUrgentFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.processors.filters.identity_filter import IdentityFilter
|
||||
@@ -81,7 +81,7 @@ class TestFrameProcessor(unittest.IsolatedAsyncioTestCase):
|
||||
|
||||
if isinstance(frame, TextFrame):
|
||||
await self.push_interruption_task_frame_and_wait()
|
||||
await self.push_frame(TransportMessageUrgentFrame(message=frame.text))
|
||||
await self.push_frame(OutputTransportMessageUrgentFrame(message=frame.text))
|
||||
else:
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
@@ -101,7 +101,7 @@ class TestFrameProcessor(unittest.IsolatedAsyncioTestCase):
|
||||
expected_down_frames = [
|
||||
InterruptionFrame,
|
||||
InterruptionFrame,
|
||||
TransportMessageUrgentFrame,
|
||||
OutputTransportMessageUrgentFrame,
|
||||
EndFrame,
|
||||
]
|
||||
await run_test(
|
||||
|
||||
Reference in New Issue
Block a user