- Added context_id field to all TTS-related frames (TTSAudioRawFrame, TTSStartedFrame, TTSStoppedFrame, AggregatedTextFrame, TTSTextFrame)

- Added append_to_context parameter to TTSSpeakFrame for conditional LLM context addition
This commit is contained in:
filipi87
2026-02-10 11:22:26 -03:00
parent 83039a1a35
commit f206aaa28d

View File

@@ -279,9 +279,12 @@ class TTSAudioRawFrame(OutputAudioRawFrame):
"""Audio data frame generated by Text-to-Speech services.
A chunk of output audio generated by a TTS service, ready for playback.
Parameters:
context_id: Unique identifier for the TTS context that generated this audio.
"""
pass
context_id: Optional[str] = None
@dataclass
@@ -343,6 +346,11 @@ class TextFrame(DataFrame):
Parameters:
text: The text content.
skip_tts: Whether this text should be skipped by the TTS service.
includes_inter_frame_spaces: Whether any necessary inter-frame (leading/trailing) spaces are already
included in the text.
append_to_context: Whether this text should be appended to the LLM context.
Defaults to True.
"""
text: str
@@ -397,9 +405,11 @@ class AggregatedTextFrame(TextFrame):
Parameters:
aggregated_by: Method used to aggregate the text frames.
context_id: Unique identifier for the TTS context that generated this text.
"""
aggregated_by: AggregationType | str
context_id: Optional[str] = None
@dataclass
@@ -411,9 +421,13 @@ class VisionTextFrame(LLMTextFrame):
@dataclass
class TTSTextFrame(AggregatedTextFrame):
"""Text frame generated by Text-to-Speech services."""
"""Text frame generated by Text-to-Speech services.
pass
Parameters:
context_id: Unique identifier for the TTS context that generated this text.
"""
context_id: Optional[str] = None
@dataclass
@@ -923,9 +937,11 @@ class TTSSpeakFrame(DataFrame):
Parameters:
text: The text to be spoken.
append_to_context: Whether to append the text to the context.
"""
text: str
append_to_context: Optional[bool] = None
@dataclass
@@ -2023,16 +2039,23 @@ class TTSStartedFrame(ControlFrame):
TTSStoppedFrame. These frames can be used for aggregating audio frames in a
transport to optimize the size of frames sent to the session, without
needing to control this in the TTS service.
Parameters:
context_id: Unique identifier for this TTS context.
"""
pass
context_id: Optional[str] = None
@dataclass
class TTSStoppedFrame(ControlFrame):
"""Frame indicating the end of a TTS response."""
"""Frame indicating the end of a TTS response.
pass
Parameters:
context_id: Unique identifier for this TTS context.
"""
context_id: Optional[str] = None
@dataclass