Add tones to OutputDTMFFrame and simplify DTMF frame hierarchy

Introduces a new `tones` field on `OutputDTMFFrame` and
`OutputDTMFUrgentFrame` for sending multi-digit DTMF sequences and
deprecates the existing single-key `button` field. When only `button`
is set, it is used as a single-character `tones` string for backward
compatibility.

`DTMFFrame` is kept as an empty marker class so both input and output
DTMF frames can still be identified via isinstance. `InputDTMFFrame`
keeps its required `button` field (single keypress semantics).

The Daily-specific `DailyOutputDTMFFrame` and
`DailyOutputDTMFUrgentFrame` frames no longer need to override
`button` and simply add `session_id` and `digit_duration_ms`, which
are forwarded to Daily's `send_dtmf` as `sessionId` and
`digitDurationMs`.

The base output audio fallback now iterates `tones` and generates a
tone per character; LiveKit's native DTMF path sends `tones[0]` since
its API is single-tone.
This commit is contained in:
Aleix Conchillo Flaqué
2026-04-15 14:48:02 -07:00
parent 30f39d7395
commit 675b7df408
6 changed files with 104 additions and 75 deletions

View File

@@ -1,2 +1,3 @@
- Added `DailyTransport.send_dtmf()` to expose the Daily call client's DTMF sending capability, enabling applications to send tones during a call (e.g. IVR navigation).
- Added `DailyOutputDTMFFrame` and `DailyOutputDTMFUrgentFrame` frames for sending DTMF through the Daily transport pipeline. Both carry explicit `tones`, `session_id` and `digit_duration_ms` fields that are forwarded to Daily's `send_dtmf` as `tones`, `sessionId` and `digitDurationMs`. When the Daily transport processes these frames, the inherited `button` and `transport_destination` fields are ignored.
- Added `tones` field to `OutputDTMFFrame` and `OutputDTMFUrgentFrame` for sending multi-digit DTMF sequences (e.g. `"123#"`). Valid characters are the values of `KeypadEntry`.
- Added `DailyOutputDTMFFrame` and `DailyOutputDTMFUrgentFrame` frames. In addition to the inherited `tones`, they accept `session_id` and `digit_duration_ms`, which are forwarded to Daily's `send_dtmf` as `sessionId` and `digitDurationMs`.

View File

@@ -0,0 +1 @@
- Deprecated the `button` field on `OutputDTMFFrame` and `OutputDTMFUrgentFrame`. Use the new `tones` field instead. When only `button` is set, `button.value` is used as a single-tone `tones` string for backward compatibility.

View File

@@ -730,13 +730,14 @@ class OutputTransportMessageFrame(DataFrame):
@dataclass
class DTMFFrame:
"""Base class for DTMF (Dual-Tone Multi-Frequency) keypad frames.
"""Marker base class for DTMF (Dual-Tone Multi-Frequency) keypad frames.
Parameters:
button: The DTMF keypad entry that was pressed.
Used only as a shared tag so that both input and output DTMF frames can
be identified via ``isinstance(frame, DTMFFrame)``. The concrete frames
define their own fields.
"""
button: KeypadEntry
pass
@dataclass
@@ -744,12 +745,32 @@ class OutputDTMFFrame(DTMFFrame, DataFrame):
"""DTMF keypress output frame for transport queuing.
A DTMF keypress output that will be queued. If your transport supports
multiple dial-out destinations, use the `transport_destination` field to
specify where the DTMF keypress should be sent.
multiple dial-out destinations, use the ``transport_destination`` field
to specify where the DTMF keypress should be sent.
Parameters:
tones: String of one or more DTMF tones to send (e.g. ``"1"`` or
``"123#"``). Valid characters are the values of
:class:`~pipecat.audio.dtmf.types.KeypadEntry`.
button: A single DTMF keypad entry to send.
.. deprecated:: 1.1.0
Use ``tones`` instead. When only ``button`` is set,
``button.value`` is used as a single-tone ``tones`` string.
"""
button: Optional[KeypadEntry] = None
tones: Optional[str] = None
def __post_init__(self):
super().__post_init__()
if self.tones is None and self.button is not None:
self.tones = self.button.value
if not self.tones:
raise ValueError(f"{self.__class__.__name__} requires `tones` or `button` to be set")
def __str__(self):
return f"{self.name}(tone: {self.button})"
return f"{self.name}(tones: {self.tones})"
#
@@ -1232,7 +1253,13 @@ class AssistantImageRawFrame(OutputImageRawFrame):
@dataclass
class InputDTMFFrame(DTMFFrame, SystemFrame):
"""DTMF keypress input frame from transport."""
"""DTMF keypress input frame from transport.
Parameters:
button: The DTMF keypad entry that was pressed.
"""
button: KeypadEntry
def __str__(self):
return f"{self.name}(tone: {self.button.value})"
@@ -1243,11 +1270,32 @@ class OutputDTMFUrgentFrame(DTMFFrame, SystemFrame):
"""DTMF keypress output frame for immediate sending.
A DTMF keypress output that will be sent right away. If your transport
supports multiple dial-out destinations, use the `transport_destination`
supports multiple dial-out destinations, use the ``transport_destination``
field to specify where the DTMF keypress should be sent.
Parameters:
tones: String of one or more DTMF tones to send (e.g. ``"1"`` or
``"123#"``). Valid characters are the values of
:class:`~pipecat.audio.dtmf.types.KeypadEntry`.
button: A single DTMF keypad entry to send.
.. deprecated:: 1.1.0
Use ``tones`` instead. When only ``button`` is set,
``button.value`` is used as a single-tone ``tones`` string.
"""
pass
button: Optional[KeypadEntry] = None
tones: Optional[str] = None
def __post_init__(self):
super().__post_init__()
if self.tones is None and self.button is not None:
self.tones = self.button.value
if not self.tones:
raise ValueError(f"{self.__class__.__name__} requires `tones` or `button` to be set")
def __str__(self):
return f"{self.name}(tones: {self.tones})"
@dataclass

View File

@@ -19,6 +19,7 @@ from typing import Any, AsyncGenerator, Dict, List, Mapping, Optional
from loguru import logger
from PIL import Image
from pipecat.audio.dtmf.types import KeypadEntry
from pipecat.audio.dtmf.utils import load_dtmf_audio
from pipecat.audio.mixers.base_audio_mixer import BaseAudioMixer
from pipecat.audio.utils import create_stream_resampler, is_silence
@@ -275,11 +276,19 @@ class BaseOutputTransport(FrameProcessor):
Args:
frame: The DTMF frame to write.
"""
dtmf_audio = await load_dtmf_audio(frame.button, sample_rate=self._sample_rate)
dtmf_audio_frame = OutputAudioRawFrame(
audio=dtmf_audio, sample_rate=self._sample_rate, num_channels=1
)
await self.write_audio_frame(dtmf_audio_frame)
if not frame.tones:
return
for char in frame.tones:
try:
keypad_entry = KeypadEntry(char)
except ValueError:
logger.warning(f"Skipping invalid DTMF tone: {char!r}")
continue
dtmf_audio = await load_dtmf_audio(keypad_entry, sample_rate=self._sample_rate)
dtmf_audio_frame = OutputAudioRawFrame(
audio=dtmf_audio, sample_rate=self._sample_rate, num_channels=1
)
await self.write_audio_frame(dtmf_audio_frame)
async def send_audio(self, frame: OutputAudioRawFrame):
"""Send an audio frame downstream.

View File

@@ -158,72 +158,39 @@ class DailyOutputDTMFFrame(OutputDTMFFrame):
"""DTMF output frame with Daily-specific options for transport queuing.
A DTMF keypress output that will be queued after any preceding audio has
finished playing. When this frame is processed by the Daily transport,
the inherited ``button`` and ``transport_destination`` fields are ignored
in favor of the explicit ``tones``, ``session_id`` and
``digit_duration_ms`` fields below.
finished playing. Inherits ``tones`` from :class:`OutputDTMFFrame`; the
two extra fields are forwarded to Daily's ``send_dtmf`` as ``sessionId``
and ``digitDurationMs``.
Parameters:
tones: String of one or more DTMF tones to send (e.g. ``"1"`` or
``"123#"``). Forwarded to Daily's ``send_dtmf`` as ``tones``.
session_id: Target participant session id. Forwarded to Daily's
``send_dtmf`` as ``sessionId``. When ``None``, Daily sends the
tones to the default destination for the call.
session_id: Target participant session id. When ``None``, Daily
sends the tones to the default destination for the call.
digit_duration_ms: Duration of each DTMF digit in milliseconds.
Forwarded to Daily's ``send_dtmf`` as ``digitDurationMs``. When
``None``, Daily's default duration is used.
When ``None``, Daily's default duration is used.
"""
# Override the inherited `button` to be optional: Daily's send_dtmf takes
# a multi-character `tones` string, so a single KeypadEntry is not
# required here.
button: Optional[KeypadEntry] = None # pyright: ignore[reportIncompatibleVariableOverride]
tones: Optional[str] = None
session_id: Optional[str] = None
digit_duration_ms: Optional[int] = None
def __post_init__(self):
super().__post_init__()
if not self.tones:
raise ValueError(f"{self.__class__.__name__} requires `tones` to be set")
def __str__(self):
return f"{self.name}(tones: {self.tones})"
@dataclass
class DailyOutputDTMFUrgentFrame(OutputDTMFUrgentFrame):
"""DTMF output frame with Daily-specific options for immediate sending.
A DTMF keypress output that will be sent right away. When this frame is
processed by the Daily transport, the inherited ``button`` and
``transport_destination`` fields are ignored in favor of the explicit
``tones``, ``session_id`` and ``digit_duration_ms`` fields below.
A DTMF keypress output that will be sent right away. Inherits ``tones``
from :class:`OutputDTMFUrgentFrame`; the two extra fields are forwarded
to Daily's ``send_dtmf`` as ``sessionId`` and ``digitDurationMs``.
Parameters:
tones: String of one or more DTMF tones to send (e.g. ``"1"`` or
``"123#"``). Forwarded to Daily's ``send_dtmf`` as ``tones``.
session_id: Target participant session id. Forwarded to Daily's
``send_dtmf`` as ``sessionId``. When ``None``, Daily sends the
tones to the default destination for the call.
session_id: Target participant session id. When ``None``, Daily
sends the tones to the default destination for the call.
digit_duration_ms: Duration of each DTMF digit in milliseconds.
Forwarded to Daily's ``send_dtmf`` as ``digitDurationMs``. When
``None``, Daily's default duration is used.
When ``None``, Daily's default duration is used.
"""
button: Optional[KeypadEntry] = None # pyright: ignore[reportIncompatibleVariableOverride]
tones: Optional[str] = None
session_id: Optional[str] = None
digit_duration_ms: Optional[int] = None
def __post_init__(self):
super().__post_init__()
if not self.tones:
raise ValueError(f"{self.__class__.__name__} requires `tones` to be set")
def __str__(self):
return f"{self.name}(tones: {self.tones})"
class WebRTCVADAnalyzer(VADAnalyzer):
"""Voice Activity Detection analyzer using WebRTC.
@@ -2211,25 +2178,23 @@ class DailyOutputTransport(BaseOutputTransport):
Args:
frame: The DTMF frame to write. When it is a
:class:`DailyOutputDTMFFrame` or
:class:`DailyOutputDTMFUrgentFrame`, the explicit ``tones``,
``session_id`` and ``digit_duration_ms`` fields are forwarded
to the Daily call client (and the inherited ``button`` /
``transport_destination`` fields are ignored).
:class:`DailyOutputDTMFUrgentFrame`, the ``session_id`` and
``digit_duration_ms`` fields are also forwarded to the Daily
call client.
"""
if not frame.tones:
return
settings: Dict[str, Any] = {"tones": frame.tones}
if isinstance(frame, (DailyOutputDTMFFrame, DailyOutputDTMFUrgentFrame)):
settings: Dict[str, Any] = {"tones": frame.tones}
if frame.session_id is not None:
settings["sessionId"] = frame.session_id
if frame.digit_duration_ms is not None:
settings["digitDurationMs"] = frame.digit_duration_ms
await self._client.send_dtmf(settings)
else:
await self._client.send_dtmf(
{
"sessionId": frame.transport_destination,
"tones": frame.button.value,
}
)
elif frame.transport_destination is not None:
settings["sessionId"] = frame.transport_destination
await self._client.send_dtmf(settings)
class DailyTransport(BaseTransport):

View File

@@ -898,10 +898,15 @@ class LiveKitOutputTransport(BaseOutputTransport):
async def _write_dtmf_native(self, frame: OutputDTMFFrame | OutputDTMFUrgentFrame):
"""Use LiveKit's native publish_dtmf method for telephone events.
LiveKit's DTMF API sends a single tone per call, so when ``frame.tones``
contains multiple characters only the first one is sent.
Args:
frame: The DTMF frame to write.
"""
await self._client.send_dtmf(frame.button.value)
if not frame.tones:
return
await self._client.send_dtmf(frame.tones[0])
def _convert_pipecat_audio_to_livekit(self, pipecat_audio: bytes) -> rtc.AudioFrame:
"""Convert Pipecat audio data to LiveKit audio frame."""