diff --git a/changelog/4313.added.md b/changelog/4313.added.md index 6bd4f22b2..c64bbc12e 100644 --- a/changelog/4313.added.md +++ b/changelog/4313.added.md @@ -1,2 +1,3 @@ - Added `DailyTransport.send_dtmf()` to expose the Daily call client's DTMF sending capability, enabling applications to send tones during a call (e.g. IVR navigation). -- Added `DailyOutputDTMFFrame` and `DailyOutputDTMFUrgentFrame` frames for sending DTMF through the Daily transport pipeline. Both carry explicit `tones`, `session_id` and `digit_duration_ms` fields that are forwarded to Daily's `send_dtmf` as `tones`, `sessionId` and `digitDurationMs`. When the Daily transport processes these frames, the inherited `button` and `transport_destination` fields are ignored. +- Added `tones` field to `OutputDTMFFrame` and `OutputDTMFUrgentFrame` for sending multi-digit DTMF sequences (e.g. `"123#"`). Valid characters are the values of `KeypadEntry`. +- Added `DailyOutputDTMFFrame` and `DailyOutputDTMFUrgentFrame` frames. In addition to the inherited `tones`, they accept `session_id` and `digit_duration_ms`, which are forwarded to Daily's `send_dtmf` as `sessionId` and `digitDurationMs`. diff --git a/changelog/4313.deprecated.md b/changelog/4313.deprecated.md new file mode 100644 index 000000000..fdf9249d4 --- /dev/null +++ b/changelog/4313.deprecated.md @@ -0,0 +1 @@ +- Deprecated the `button` field on `OutputDTMFFrame` and `OutputDTMFUrgentFrame`. Use the new `tones` field instead. When only `button` is set, `button.value` is used as a single-tone `tones` string for backward compatibility. diff --git a/src/pipecat/frames/frames.py b/src/pipecat/frames/frames.py index 00f38cab8..d02bfcd90 100644 --- a/src/pipecat/frames/frames.py +++ b/src/pipecat/frames/frames.py @@ -730,13 +730,14 @@ class OutputTransportMessageFrame(DataFrame): @dataclass class DTMFFrame: - """Base class for DTMF (Dual-Tone Multi-Frequency) keypad frames. + """Marker base class for DTMF (Dual-Tone Multi-Frequency) keypad frames. - Parameters: - button: The DTMF keypad entry that was pressed. + Used only as a shared tag so that both input and output DTMF frames can + be identified via ``isinstance(frame, DTMFFrame)``. The concrete frames + define their own fields. """ - button: KeypadEntry + pass @dataclass @@ -744,12 +745,32 @@ class OutputDTMFFrame(DTMFFrame, DataFrame): """DTMF keypress output frame for transport queuing. A DTMF keypress output that will be queued. If your transport supports - multiple dial-out destinations, use the `transport_destination` field to - specify where the DTMF keypress should be sent. + multiple dial-out destinations, use the ``transport_destination`` field + to specify where the DTMF keypress should be sent. + + Parameters: + tones: String of one or more DTMF tones to send (e.g. ``"1"`` or + ``"123#"``). Valid characters are the values of + :class:`~pipecat.audio.dtmf.types.KeypadEntry`. + button: A single DTMF keypad entry to send. + + .. deprecated:: 1.1.0 + Use ``tones`` instead. When only ``button`` is set, + ``button.value`` is used as a single-tone ``tones`` string. """ + button: Optional[KeypadEntry] = None + tones: Optional[str] = None + + def __post_init__(self): + super().__post_init__() + if self.tones is None and self.button is not None: + self.tones = self.button.value + if not self.tones: + raise ValueError(f"{self.__class__.__name__} requires `tones` or `button` to be set") + def __str__(self): - return f"{self.name}(tone: {self.button})" + return f"{self.name}(tones: {self.tones})" # @@ -1232,7 +1253,13 @@ class AssistantImageRawFrame(OutputImageRawFrame): @dataclass class InputDTMFFrame(DTMFFrame, SystemFrame): - """DTMF keypress input frame from transport.""" + """DTMF keypress input frame from transport. + + Parameters: + button: The DTMF keypad entry that was pressed. + """ + + button: KeypadEntry def __str__(self): return f"{self.name}(tone: {self.button.value})" @@ -1243,11 +1270,32 @@ class OutputDTMFUrgentFrame(DTMFFrame, SystemFrame): """DTMF keypress output frame for immediate sending. A DTMF keypress output that will be sent right away. If your transport - supports multiple dial-out destinations, use the `transport_destination` + supports multiple dial-out destinations, use the ``transport_destination`` field to specify where the DTMF keypress should be sent. + + Parameters: + tones: String of one or more DTMF tones to send (e.g. ``"1"`` or + ``"123#"``). Valid characters are the values of + :class:`~pipecat.audio.dtmf.types.KeypadEntry`. + button: A single DTMF keypad entry to send. + + .. deprecated:: 1.1.0 + Use ``tones`` instead. When only ``button`` is set, + ``button.value`` is used as a single-tone ``tones`` string. """ - pass + button: Optional[KeypadEntry] = None + tones: Optional[str] = None + + def __post_init__(self): + super().__post_init__() + if self.tones is None and self.button is not None: + self.tones = self.button.value + if not self.tones: + raise ValueError(f"{self.__class__.__name__} requires `tones` or `button` to be set") + + def __str__(self): + return f"{self.name}(tones: {self.tones})" @dataclass diff --git a/src/pipecat/transports/base_output.py b/src/pipecat/transports/base_output.py index d14daecbd..8a6e41331 100644 --- a/src/pipecat/transports/base_output.py +++ b/src/pipecat/transports/base_output.py @@ -19,6 +19,7 @@ from typing import Any, AsyncGenerator, Dict, List, Mapping, Optional from loguru import logger from PIL import Image +from pipecat.audio.dtmf.types import KeypadEntry from pipecat.audio.dtmf.utils import load_dtmf_audio from pipecat.audio.mixers.base_audio_mixer import BaseAudioMixer from pipecat.audio.utils import create_stream_resampler, is_silence @@ -275,11 +276,19 @@ class BaseOutputTransport(FrameProcessor): Args: frame: The DTMF frame to write. """ - dtmf_audio = await load_dtmf_audio(frame.button, sample_rate=self._sample_rate) - dtmf_audio_frame = OutputAudioRawFrame( - audio=dtmf_audio, sample_rate=self._sample_rate, num_channels=1 - ) - await self.write_audio_frame(dtmf_audio_frame) + if not frame.tones: + return + for char in frame.tones: + try: + keypad_entry = KeypadEntry(char) + except ValueError: + logger.warning(f"Skipping invalid DTMF tone: {char!r}") + continue + dtmf_audio = await load_dtmf_audio(keypad_entry, sample_rate=self._sample_rate) + dtmf_audio_frame = OutputAudioRawFrame( + audio=dtmf_audio, sample_rate=self._sample_rate, num_channels=1 + ) + await self.write_audio_frame(dtmf_audio_frame) async def send_audio(self, frame: OutputAudioRawFrame): """Send an audio frame downstream. diff --git a/src/pipecat/transports/daily/transport.py b/src/pipecat/transports/daily/transport.py index 31b96ae97..739d47b59 100644 --- a/src/pipecat/transports/daily/transport.py +++ b/src/pipecat/transports/daily/transport.py @@ -158,72 +158,39 @@ class DailyOutputDTMFFrame(OutputDTMFFrame): """DTMF output frame with Daily-specific options for transport queuing. A DTMF keypress output that will be queued after any preceding audio has - finished playing. When this frame is processed by the Daily transport, - the inherited ``button`` and ``transport_destination`` fields are ignored - in favor of the explicit ``tones``, ``session_id`` and - ``digit_duration_ms`` fields below. + finished playing. Inherits ``tones`` from :class:`OutputDTMFFrame`; the + two extra fields are forwarded to Daily's ``send_dtmf`` as ``sessionId`` + and ``digitDurationMs``. Parameters: - tones: String of one or more DTMF tones to send (e.g. ``"1"`` or - ``"123#"``). Forwarded to Daily's ``send_dtmf`` as ``tones``. - session_id: Target participant session id. Forwarded to Daily's - ``send_dtmf`` as ``sessionId``. When ``None``, Daily sends the - tones to the default destination for the call. + session_id: Target participant session id. When ``None``, Daily + sends the tones to the default destination for the call. digit_duration_ms: Duration of each DTMF digit in milliseconds. - Forwarded to Daily's ``send_dtmf`` as ``digitDurationMs``. When - ``None``, Daily's default duration is used. + When ``None``, Daily's default duration is used. """ - # Override the inherited `button` to be optional: Daily's send_dtmf takes - # a multi-character `tones` string, so a single KeypadEntry is not - # required here. - button: Optional[KeypadEntry] = None # pyright: ignore[reportIncompatibleVariableOverride] - tones: Optional[str] = None session_id: Optional[str] = None digit_duration_ms: Optional[int] = None - def __post_init__(self): - super().__post_init__() - if not self.tones: - raise ValueError(f"{self.__class__.__name__} requires `tones` to be set") - - def __str__(self): - return f"{self.name}(tones: {self.tones})" - @dataclass class DailyOutputDTMFUrgentFrame(OutputDTMFUrgentFrame): """DTMF output frame with Daily-specific options for immediate sending. - A DTMF keypress output that will be sent right away. When this frame is - processed by the Daily transport, the inherited ``button`` and - ``transport_destination`` fields are ignored in favor of the explicit - ``tones``, ``session_id`` and ``digit_duration_ms`` fields below. + A DTMF keypress output that will be sent right away. Inherits ``tones`` + from :class:`OutputDTMFUrgentFrame`; the two extra fields are forwarded + to Daily's ``send_dtmf`` as ``sessionId`` and ``digitDurationMs``. Parameters: - tones: String of one or more DTMF tones to send (e.g. ``"1"`` or - ``"123#"``). Forwarded to Daily's ``send_dtmf`` as ``tones``. - session_id: Target participant session id. Forwarded to Daily's - ``send_dtmf`` as ``sessionId``. When ``None``, Daily sends the - tones to the default destination for the call. + session_id: Target participant session id. When ``None``, Daily + sends the tones to the default destination for the call. digit_duration_ms: Duration of each DTMF digit in milliseconds. - Forwarded to Daily's ``send_dtmf`` as ``digitDurationMs``. When - ``None``, Daily's default duration is used. + When ``None``, Daily's default duration is used. """ - button: Optional[KeypadEntry] = None # pyright: ignore[reportIncompatibleVariableOverride] - tones: Optional[str] = None session_id: Optional[str] = None digit_duration_ms: Optional[int] = None - def __post_init__(self): - super().__post_init__() - if not self.tones: - raise ValueError(f"{self.__class__.__name__} requires `tones` to be set") - - def __str__(self): - return f"{self.name}(tones: {self.tones})" - class WebRTCVADAnalyzer(VADAnalyzer): """Voice Activity Detection analyzer using WebRTC. @@ -2211,25 +2178,23 @@ class DailyOutputTransport(BaseOutputTransport): Args: frame: The DTMF frame to write. When it is a :class:`DailyOutputDTMFFrame` or - :class:`DailyOutputDTMFUrgentFrame`, the explicit ``tones``, - ``session_id`` and ``digit_duration_ms`` fields are forwarded - to the Daily call client (and the inherited ``button`` / - ``transport_destination`` fields are ignored). + :class:`DailyOutputDTMFUrgentFrame`, the ``session_id`` and + ``digit_duration_ms`` fields are also forwarded to the Daily + call client. """ + if not frame.tones: + return + + settings: Dict[str, Any] = {"tones": frame.tones} if isinstance(frame, (DailyOutputDTMFFrame, DailyOutputDTMFUrgentFrame)): - settings: Dict[str, Any] = {"tones": frame.tones} if frame.session_id is not None: settings["sessionId"] = frame.session_id if frame.digit_duration_ms is not None: settings["digitDurationMs"] = frame.digit_duration_ms - await self._client.send_dtmf(settings) - else: - await self._client.send_dtmf( - { - "sessionId": frame.transport_destination, - "tones": frame.button.value, - } - ) + elif frame.transport_destination is not None: + settings["sessionId"] = frame.transport_destination + + await self._client.send_dtmf(settings) class DailyTransport(BaseTransport): diff --git a/src/pipecat/transports/livekit/transport.py b/src/pipecat/transports/livekit/transport.py index f3b0574b8..0e904066a 100644 --- a/src/pipecat/transports/livekit/transport.py +++ b/src/pipecat/transports/livekit/transport.py @@ -898,10 +898,15 @@ class LiveKitOutputTransport(BaseOutputTransport): async def _write_dtmf_native(self, frame: OutputDTMFFrame | OutputDTMFUrgentFrame): """Use LiveKit's native publish_dtmf method for telephone events. + LiveKit's DTMF API sends a single tone per call, so when ``frame.tones`` + contains multiple characters only the first one is sent. + Args: frame: The DTMF frame to write. """ - await self._client.send_dtmf(frame.button.value) + if not frame.tones: + return + await self._client.send_dtmf(frame.tones[0]) def _convert_pipecat_audio_to_livekit(self, pipecat_audio: bytes) -> rtc.AudioFrame: """Convert Pipecat audio data to LiveKit audio frame."""