Add tones to OutputDTMFFrame and simplify DTMF frame hierarchy
Introduces a new `tones` field on `OutputDTMFFrame` and `OutputDTMFUrgentFrame` for sending multi-digit DTMF sequences and deprecates the existing single-key `button` field. When only `button` is set, it is used as a single-character `tones` string for backward compatibility. `DTMFFrame` is kept as an empty marker class so both input and output DTMF frames can still be identified via isinstance. `InputDTMFFrame` keeps its required `button` field (single keypress semantics). The Daily-specific `DailyOutputDTMFFrame` and `DailyOutputDTMFUrgentFrame` frames no longer need to override `button` and simply add `session_id` and `digit_duration_ms`, which are forwarded to Daily's `send_dtmf` as `sessionId` and `digitDurationMs`. The base output audio fallback now iterates `tones` and generates a tone per character; LiveKit's native DTMF path sends `tones[0]` since its API is single-tone.
This commit is contained in:
@@ -1,2 +1,3 @@
|
||||
- Added `DailyTransport.send_dtmf()` to expose the Daily call client's DTMF sending capability, enabling applications to send tones during a call (e.g. IVR navigation).
|
||||
- Added `DailyOutputDTMFFrame` and `DailyOutputDTMFUrgentFrame` frames for sending DTMF through the Daily transport pipeline. Both carry explicit `tones`, `session_id` and `digit_duration_ms` fields that are forwarded to Daily's `send_dtmf` as `tones`, `sessionId` and `digitDurationMs`. When the Daily transport processes these frames, the inherited `button` and `transport_destination` fields are ignored.
|
||||
- Added `tones` field to `OutputDTMFFrame` and `OutputDTMFUrgentFrame` for sending multi-digit DTMF sequences (e.g. `"123#"`). Valid characters are the values of `KeypadEntry`.
|
||||
- Added `DailyOutputDTMFFrame` and `DailyOutputDTMFUrgentFrame` frames. In addition to the inherited `tones`, they accept `session_id` and `digit_duration_ms`, which are forwarded to Daily's `send_dtmf` as `sessionId` and `digitDurationMs`.
|
||||
|
||||
1
changelog/4313.deprecated.md
Normal file
1
changelog/4313.deprecated.md
Normal file
@@ -0,0 +1 @@
|
||||
- Deprecated the `button` field on `OutputDTMFFrame` and `OutputDTMFUrgentFrame`. Use the new `tones` field instead. When only `button` is set, `button.value` is used as a single-tone `tones` string for backward compatibility.
|
||||
@@ -730,13 +730,14 @@ class OutputTransportMessageFrame(DataFrame):
|
||||
|
||||
@dataclass
|
||||
class DTMFFrame:
|
||||
"""Base class for DTMF (Dual-Tone Multi-Frequency) keypad frames.
|
||||
"""Marker base class for DTMF (Dual-Tone Multi-Frequency) keypad frames.
|
||||
|
||||
Parameters:
|
||||
button: The DTMF keypad entry that was pressed.
|
||||
Used only as a shared tag so that both input and output DTMF frames can
|
||||
be identified via ``isinstance(frame, DTMFFrame)``. The concrete frames
|
||||
define their own fields.
|
||||
"""
|
||||
|
||||
button: KeypadEntry
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -744,12 +745,32 @@ class OutputDTMFFrame(DTMFFrame, DataFrame):
|
||||
"""DTMF keypress output frame for transport queuing.
|
||||
|
||||
A DTMF keypress output that will be queued. If your transport supports
|
||||
multiple dial-out destinations, use the `transport_destination` field to
|
||||
specify where the DTMF keypress should be sent.
|
||||
multiple dial-out destinations, use the ``transport_destination`` field
|
||||
to specify where the DTMF keypress should be sent.
|
||||
|
||||
Parameters:
|
||||
tones: String of one or more DTMF tones to send (e.g. ``"1"`` or
|
||||
``"123#"``). Valid characters are the values of
|
||||
:class:`~pipecat.audio.dtmf.types.KeypadEntry`.
|
||||
button: A single DTMF keypad entry to send.
|
||||
|
||||
.. deprecated:: 1.1.0
|
||||
Use ``tones`` instead. When only ``button`` is set,
|
||||
``button.value`` is used as a single-tone ``tones`` string.
|
||||
"""
|
||||
|
||||
button: Optional[KeypadEntry] = None
|
||||
tones: Optional[str] = None
|
||||
|
||||
def __post_init__(self):
|
||||
super().__post_init__()
|
||||
if self.tones is None and self.button is not None:
|
||||
self.tones = self.button.value
|
||||
if not self.tones:
|
||||
raise ValueError(f"{self.__class__.__name__} requires `tones` or `button` to be set")
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.name}(tone: {self.button})"
|
||||
return f"{self.name}(tones: {self.tones})"
|
||||
|
||||
|
||||
#
|
||||
@@ -1232,7 +1253,13 @@ class AssistantImageRawFrame(OutputImageRawFrame):
|
||||
|
||||
@dataclass
|
||||
class InputDTMFFrame(DTMFFrame, SystemFrame):
|
||||
"""DTMF keypress input frame from transport."""
|
||||
"""DTMF keypress input frame from transport.
|
||||
|
||||
Parameters:
|
||||
button: The DTMF keypad entry that was pressed.
|
||||
"""
|
||||
|
||||
button: KeypadEntry
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.name}(tone: {self.button.value})"
|
||||
@@ -1243,11 +1270,32 @@ class OutputDTMFUrgentFrame(DTMFFrame, SystemFrame):
|
||||
"""DTMF keypress output frame for immediate sending.
|
||||
|
||||
A DTMF keypress output that will be sent right away. If your transport
|
||||
supports multiple dial-out destinations, use the `transport_destination`
|
||||
supports multiple dial-out destinations, use the ``transport_destination``
|
||||
field to specify where the DTMF keypress should be sent.
|
||||
|
||||
Parameters:
|
||||
tones: String of one or more DTMF tones to send (e.g. ``"1"`` or
|
||||
``"123#"``). Valid characters are the values of
|
||||
:class:`~pipecat.audio.dtmf.types.KeypadEntry`.
|
||||
button: A single DTMF keypad entry to send.
|
||||
|
||||
.. deprecated:: 1.1.0
|
||||
Use ``tones`` instead. When only ``button`` is set,
|
||||
``button.value`` is used as a single-tone ``tones`` string.
|
||||
"""
|
||||
|
||||
pass
|
||||
button: Optional[KeypadEntry] = None
|
||||
tones: Optional[str] = None
|
||||
|
||||
def __post_init__(self):
|
||||
super().__post_init__()
|
||||
if self.tones is None and self.button is not None:
|
||||
self.tones = self.button.value
|
||||
if not self.tones:
|
||||
raise ValueError(f"{self.__class__.__name__} requires `tones` or `button` to be set")
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.name}(tones: {self.tones})"
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -19,6 +19,7 @@ from typing import Any, AsyncGenerator, Dict, List, Mapping, Optional
|
||||
from loguru import logger
|
||||
from PIL import Image
|
||||
|
||||
from pipecat.audio.dtmf.types import KeypadEntry
|
||||
from pipecat.audio.dtmf.utils import load_dtmf_audio
|
||||
from pipecat.audio.mixers.base_audio_mixer import BaseAudioMixer
|
||||
from pipecat.audio.utils import create_stream_resampler, is_silence
|
||||
@@ -275,11 +276,19 @@ class BaseOutputTransport(FrameProcessor):
|
||||
Args:
|
||||
frame: The DTMF frame to write.
|
||||
"""
|
||||
dtmf_audio = await load_dtmf_audio(frame.button, sample_rate=self._sample_rate)
|
||||
dtmf_audio_frame = OutputAudioRawFrame(
|
||||
audio=dtmf_audio, sample_rate=self._sample_rate, num_channels=1
|
||||
)
|
||||
await self.write_audio_frame(dtmf_audio_frame)
|
||||
if not frame.tones:
|
||||
return
|
||||
for char in frame.tones:
|
||||
try:
|
||||
keypad_entry = KeypadEntry(char)
|
||||
except ValueError:
|
||||
logger.warning(f"Skipping invalid DTMF tone: {char!r}")
|
||||
continue
|
||||
dtmf_audio = await load_dtmf_audio(keypad_entry, sample_rate=self._sample_rate)
|
||||
dtmf_audio_frame = OutputAudioRawFrame(
|
||||
audio=dtmf_audio, sample_rate=self._sample_rate, num_channels=1
|
||||
)
|
||||
await self.write_audio_frame(dtmf_audio_frame)
|
||||
|
||||
async def send_audio(self, frame: OutputAudioRawFrame):
|
||||
"""Send an audio frame downstream.
|
||||
|
||||
@@ -158,72 +158,39 @@ class DailyOutputDTMFFrame(OutputDTMFFrame):
|
||||
"""DTMF output frame with Daily-specific options for transport queuing.
|
||||
|
||||
A DTMF keypress output that will be queued after any preceding audio has
|
||||
finished playing. When this frame is processed by the Daily transport,
|
||||
the inherited ``button`` and ``transport_destination`` fields are ignored
|
||||
in favor of the explicit ``tones``, ``session_id`` and
|
||||
``digit_duration_ms`` fields below.
|
||||
finished playing. Inherits ``tones`` from :class:`OutputDTMFFrame`; the
|
||||
two extra fields are forwarded to Daily's ``send_dtmf`` as ``sessionId``
|
||||
and ``digitDurationMs``.
|
||||
|
||||
Parameters:
|
||||
tones: String of one or more DTMF tones to send (e.g. ``"1"`` or
|
||||
``"123#"``). Forwarded to Daily's ``send_dtmf`` as ``tones``.
|
||||
session_id: Target participant session id. Forwarded to Daily's
|
||||
``send_dtmf`` as ``sessionId``. When ``None``, Daily sends the
|
||||
tones to the default destination for the call.
|
||||
session_id: Target participant session id. When ``None``, Daily
|
||||
sends the tones to the default destination for the call.
|
||||
digit_duration_ms: Duration of each DTMF digit in milliseconds.
|
||||
Forwarded to Daily's ``send_dtmf`` as ``digitDurationMs``. When
|
||||
``None``, Daily's default duration is used.
|
||||
When ``None``, Daily's default duration is used.
|
||||
"""
|
||||
|
||||
# Override the inherited `button` to be optional: Daily's send_dtmf takes
|
||||
# a multi-character `tones` string, so a single KeypadEntry is not
|
||||
# required here.
|
||||
button: Optional[KeypadEntry] = None # pyright: ignore[reportIncompatibleVariableOverride]
|
||||
tones: Optional[str] = None
|
||||
session_id: Optional[str] = None
|
||||
digit_duration_ms: Optional[int] = None
|
||||
|
||||
def __post_init__(self):
|
||||
super().__post_init__()
|
||||
if not self.tones:
|
||||
raise ValueError(f"{self.__class__.__name__} requires `tones` to be set")
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.name}(tones: {self.tones})"
|
||||
|
||||
|
||||
@dataclass
|
||||
class DailyOutputDTMFUrgentFrame(OutputDTMFUrgentFrame):
|
||||
"""DTMF output frame with Daily-specific options for immediate sending.
|
||||
|
||||
A DTMF keypress output that will be sent right away. When this frame is
|
||||
processed by the Daily transport, the inherited ``button`` and
|
||||
``transport_destination`` fields are ignored in favor of the explicit
|
||||
``tones``, ``session_id`` and ``digit_duration_ms`` fields below.
|
||||
A DTMF keypress output that will be sent right away. Inherits ``tones``
|
||||
from :class:`OutputDTMFUrgentFrame`; the two extra fields are forwarded
|
||||
to Daily's ``send_dtmf`` as ``sessionId`` and ``digitDurationMs``.
|
||||
|
||||
Parameters:
|
||||
tones: String of one or more DTMF tones to send (e.g. ``"1"`` or
|
||||
``"123#"``). Forwarded to Daily's ``send_dtmf`` as ``tones``.
|
||||
session_id: Target participant session id. Forwarded to Daily's
|
||||
``send_dtmf`` as ``sessionId``. When ``None``, Daily sends the
|
||||
tones to the default destination for the call.
|
||||
session_id: Target participant session id. When ``None``, Daily
|
||||
sends the tones to the default destination for the call.
|
||||
digit_duration_ms: Duration of each DTMF digit in milliseconds.
|
||||
Forwarded to Daily's ``send_dtmf`` as ``digitDurationMs``. When
|
||||
``None``, Daily's default duration is used.
|
||||
When ``None``, Daily's default duration is used.
|
||||
"""
|
||||
|
||||
button: Optional[KeypadEntry] = None # pyright: ignore[reportIncompatibleVariableOverride]
|
||||
tones: Optional[str] = None
|
||||
session_id: Optional[str] = None
|
||||
digit_duration_ms: Optional[int] = None
|
||||
|
||||
def __post_init__(self):
|
||||
super().__post_init__()
|
||||
if not self.tones:
|
||||
raise ValueError(f"{self.__class__.__name__} requires `tones` to be set")
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.name}(tones: {self.tones})"
|
||||
|
||||
|
||||
class WebRTCVADAnalyzer(VADAnalyzer):
|
||||
"""Voice Activity Detection analyzer using WebRTC.
|
||||
@@ -2211,25 +2178,23 @@ class DailyOutputTransport(BaseOutputTransport):
|
||||
Args:
|
||||
frame: The DTMF frame to write. When it is a
|
||||
:class:`DailyOutputDTMFFrame` or
|
||||
:class:`DailyOutputDTMFUrgentFrame`, the explicit ``tones``,
|
||||
``session_id`` and ``digit_duration_ms`` fields are forwarded
|
||||
to the Daily call client (and the inherited ``button`` /
|
||||
``transport_destination`` fields are ignored).
|
||||
:class:`DailyOutputDTMFUrgentFrame`, the ``session_id`` and
|
||||
``digit_duration_ms`` fields are also forwarded to the Daily
|
||||
call client.
|
||||
"""
|
||||
if not frame.tones:
|
||||
return
|
||||
|
||||
settings: Dict[str, Any] = {"tones": frame.tones}
|
||||
if isinstance(frame, (DailyOutputDTMFFrame, DailyOutputDTMFUrgentFrame)):
|
||||
settings: Dict[str, Any] = {"tones": frame.tones}
|
||||
if frame.session_id is not None:
|
||||
settings["sessionId"] = frame.session_id
|
||||
if frame.digit_duration_ms is not None:
|
||||
settings["digitDurationMs"] = frame.digit_duration_ms
|
||||
await self._client.send_dtmf(settings)
|
||||
else:
|
||||
await self._client.send_dtmf(
|
||||
{
|
||||
"sessionId": frame.transport_destination,
|
||||
"tones": frame.button.value,
|
||||
}
|
||||
)
|
||||
elif frame.transport_destination is not None:
|
||||
settings["sessionId"] = frame.transport_destination
|
||||
|
||||
await self._client.send_dtmf(settings)
|
||||
|
||||
|
||||
class DailyTransport(BaseTransport):
|
||||
|
||||
@@ -898,10 +898,15 @@ class LiveKitOutputTransport(BaseOutputTransport):
|
||||
async def _write_dtmf_native(self, frame: OutputDTMFFrame | OutputDTMFUrgentFrame):
|
||||
"""Use LiveKit's native publish_dtmf method for telephone events.
|
||||
|
||||
LiveKit's DTMF API sends a single tone per call, so when ``frame.tones``
|
||||
contains multiple characters only the first one is sent.
|
||||
|
||||
Args:
|
||||
frame: The DTMF frame to write.
|
||||
"""
|
||||
await self._client.send_dtmf(frame.button.value)
|
||||
if not frame.tones:
|
||||
return
|
||||
await self._client.send_dtmf(frame.tones[0])
|
||||
|
||||
def _convert_pipecat_audio_to_livekit(self, pipecat_audio: bytes) -> rtc.AudioFrame:
|
||||
"""Convert Pipecat audio data to LiveKit audio frame."""
|
||||
|
||||
Reference in New Issue
Block a user