- Added context_id field to all TTS-related frames (TTSAudioRawFrame, TTSStartedFrame, TTSStoppedFrame, AggregatedTextFrame, TTSTextFrame)
- Added append_to_context parameter to TTSSpeakFrame for conditional LLM context addition
This commit is contained in:
@@ -279,9 +279,12 @@ class TTSAudioRawFrame(OutputAudioRawFrame):
|
||||
"""Audio data frame generated by Text-to-Speech services.
|
||||
|
||||
A chunk of output audio generated by a TTS service, ready for playback.
|
||||
|
||||
Parameters:
|
||||
context_id: Unique identifier for the TTS context that generated this audio.
|
||||
"""
|
||||
|
||||
pass
|
||||
context_id: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -343,6 +346,11 @@ class TextFrame(DataFrame):
|
||||
|
||||
Parameters:
|
||||
text: The text content.
|
||||
skip_tts: Whether this text should be skipped by the TTS service.
|
||||
includes_inter_frame_spaces: Whether any necessary inter-frame (leading/trailing) spaces are already
|
||||
included in the text.
|
||||
append_to_context: Whether this text should be appended to the LLM context.
|
||||
Defaults to True.
|
||||
"""
|
||||
|
||||
text: str
|
||||
@@ -397,9 +405,11 @@ class AggregatedTextFrame(TextFrame):
|
||||
|
||||
Parameters:
|
||||
aggregated_by: Method used to aggregate the text frames.
|
||||
context_id: Unique identifier for the TTS context that generated this text.
|
||||
"""
|
||||
|
||||
aggregated_by: AggregationType | str
|
||||
context_id: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -411,9 +421,13 @@ class VisionTextFrame(LLMTextFrame):
|
||||
|
||||
@dataclass
|
||||
class TTSTextFrame(AggregatedTextFrame):
|
||||
"""Text frame generated by Text-to-Speech services."""
|
||||
"""Text frame generated by Text-to-Speech services.
|
||||
|
||||
pass
|
||||
Parameters:
|
||||
context_id: Unique identifier for the TTS context that generated this text.
|
||||
"""
|
||||
|
||||
context_id: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -923,9 +937,11 @@ class TTSSpeakFrame(DataFrame):
|
||||
|
||||
Parameters:
|
||||
text: The text to be spoken.
|
||||
append_to_context: Whether to append the text to the context.
|
||||
"""
|
||||
|
||||
text: str
|
||||
append_to_context: Optional[bool] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -2023,16 +2039,23 @@ class TTSStartedFrame(ControlFrame):
|
||||
TTSStoppedFrame. These frames can be used for aggregating audio frames in a
|
||||
transport to optimize the size of frames sent to the session, without
|
||||
needing to control this in the TTS service.
|
||||
|
||||
Parameters:
|
||||
context_id: Unique identifier for this TTS context.
|
||||
"""
|
||||
|
||||
pass
|
||||
context_id: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class TTSStoppedFrame(ControlFrame):
|
||||
"""Frame indicating the end of a TTS response."""
|
||||
"""Frame indicating the end of a TTS response.
|
||||
|
||||
pass
|
||||
Parameters:
|
||||
context_id: Unique identifier for this TTS context.
|
||||
"""
|
||||
|
||||
context_id: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
Reference in New Issue
Block a user