- Added context_id field to all TTS-related frames (TTSAudioRawFrame, TTSStartedFrame, TTSStoppedFrame, AggregatedTextFrame, TTSTextFrame)

- Added append_to_context parameter to TTSSpeakFrame for conditional LLM context addition
2026-02-10 11:22:26 -03:00
parent 83039a1a35
commit f206aaa28d
1 changed files with 29 additions and 6 deletions
--- a/src/pipecat/frames/frames.py
+++ b/src/pipecat/frames/frames.py
@@ -279,9 +279,12 @@ class TTSAudioRawFrame(OutputAudioRawFrame):
    """Audio data frame generated by Text-to-Speech services.

    A chunk of output audio generated by a TTS service, ready for playback.
+
+    Parameters:
+        context_id: Unique identifier for the TTS context that generated this audio.
    """

-    pass
+    context_id: Optional[str] = None


@dataclass
@@ -343,6 +346,11 @@ class TextFrame(DataFrame):

    Parameters:
        text: The text content.
+        skip_tts: Whether this text should be skipped by the TTS service.
+        includes_inter_frame_spaces: Whether any necessary inter-frame (leading/trailing) spaces are already
+            included in the text.
+        append_to_context: Whether this text should be appended to the LLM context.
+            Defaults to True.
    """

    text: str
@@ -397,9 +405,11 @@ class AggregatedTextFrame(TextFrame):

    Parameters:
        aggregated_by: Method used to aggregate the text frames.
+        context_id: Unique identifier for the TTS context that generated this text.
    """

    aggregated_by: AggregationType | str
+    context_id: Optional[str] = None


@dataclass
@@ -411,9 +421,13 @@ class VisionTextFrame(LLMTextFrame):

@dataclass
 class TTSTextFrame(AggregatedTextFrame):
-    """Text frame generated by Text-to-Speech services."""
+    """Text frame generated by Text-to-Speech services.

-    pass
+    Parameters:
+        context_id: Unique identifier for the TTS context that generated this text.
+    """
+
+    context_id: Optional[str] = None


@dataclass
@@ -923,9 +937,11 @@ class TTSSpeakFrame(DataFrame):

    Parameters:
        text: The text to be spoken.
+        append_to_context: Whether to append the text to the context.
    """

    text: str
+    append_to_context: Optional[bool] = None


@dataclass
@@ -2023,16 +2039,23 @@ class TTSStartedFrame(ControlFrame):
    TTSStoppedFrame. These frames can be used for aggregating audio frames in a
    transport to optimize the size of frames sent to the session, without
    needing to control this in the TTS service.
+
+    Parameters:
+        context_id: Unique identifier for this TTS context.
    """

-    pass
+    context_id: Optional[str] = None


@dataclass
 class TTSStoppedFrame(ControlFrame):
-    """Frame indicating the end of a TTS response."""
+    """Frame indicating the end of a TTS response.

-    pass
+    Parameters:
+        context_id: Unique identifier for this TTS context.
+    """
+
+    context_id: Optional[str] = None


@dataclass