rename destination to transport_destination

This commit is contained in:
Aleix Conchillo Flaqué
2025-04-30 18:36:56 -07:00
parent 5ad0058303
commit a6781b7352
6 changed files with 35 additions and 35 deletions

View File

@@ -13,20 +13,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
implementation supports it (e.g. Daily's custom tracks). With multiple
destinations it is possible to send different audio or video tracks with a
single transport simultaneously. To do that, you need to set the new
`Frame.destination` field with your desired transport destination (e.g. custom
track name), tell the transport you want a new destination with
`Frame.transport_destination` field with your desired transport destination
(e.g. custom track name), tell the transport you want a new destination with
`TransportParams.audio_out_destinations` or
`TransportParams.video_out_destinations` and the transport should take care of
the rest.
- Similarly to the new `Frame.destination`, there's a new `Frame.source` field
which is set by the `BaseInputTransport` if the incoming data comes from a
non-default source (e.g. custom tracks).
- Similarly to the new `Frame.transport_destination`, there's a new
`Frame.transport_source` field which is set by the `BaseInputTransport` if the
incoming data comes from a non-default source (e.g. custom tracks).
- `TTSService` has a new `destination` constructor parameter. This parameter
will be used to update the `Frame.destination` field for each generated
`TTSAudioRawFrame`. This allows sending multiple bots' audio to multiple
destinations in the same pipeline.
- `TTSService` has a new `transport_destination` constructor parameter. This
parameter will be used to update the `Frame.transport_destination` field for
each generated `TTSAudioRawFrame`. This allows sending multiple bots' audio to
multiple destinations in the same pipeline.
- Added `RTVIObserverParams` which allows you to configure what RTVI messages
are sent to the clients.

View File

@@ -66,17 +66,17 @@ async def main():
tts_spanish = CartesiaTTSService(
api_key=os.getenv("CARTESIA_API_KEY"),
voice_id="cefcb124-080b-4655-b31f-932f3ee743de",
destination="spanish",
transport_destination="spanish",
)
tts_french = CartesiaTTSService(
api_key=os.getenv("CARTESIA_API_KEY"),
voice_id="8832a0b5-47b2-4751-bb22-6a8e2149303d",
destination="french",
transport_destination="french",
)
tts_german = CartesiaTTSService(
api_key=os.getenv("CARTESIA_API_KEY"),
voice_id="38aabb6a-f52b-4fb0-a3d1-988518f4dc06",
destination="german",
transport_destination="german",
)
messages_spanish = [

View File

@@ -60,16 +60,16 @@ class Frame:
name: str = field(init=False)
pts: Optional[int] = field(init=False)
metadata: Dict[str, Any] = field(init=False)
source: Optional[str] = field(init=False)
destination: Optional[str] = field(init=False)
transport_source: Optional[str] = field(init=False)
transport_destination: Optional[str] = field(init=False)
def __post_init__(self):
self.id: int = obj_id()
self.name: str = f"{self.__class__.__name__}#{obj_count(self)}"
self.pts: Optional[int] = None
self.metadata: Dict[str, Any] = {}
self.source: Optional[str] = None
self.destination: Optional[str] = None
self.transport_source: Optional[str] = None
self.transport_destination: Optional[str] = None
def __str__(self):
return self.name
@@ -152,7 +152,7 @@ class OutputAudioRawFrame(DataFrame, AudioRawFrame):
def __str__(self):
pts = format_pts(self.pts)
return f"{self.name}(pts: {pts}, destination: {self.destination}, size: {len(self.audio)}, frames: {self.num_frames}, sample_rate: {self.sample_rate}, channels: {self.num_channels})"
return f"{self.name}(pts: {pts}, destination: {self.transport_destination}, size: {len(self.audio)}, frames: {self.num_frames}, sample_rate: {self.sample_rate}, channels: {self.num_channels})"
@dataclass
@@ -734,7 +734,7 @@ class InputAudioRawFrame(SystemFrame, AudioRawFrame):
def __str__(self):
pts = format_pts(self.pts)
return f"{self.name}(pts: {pts}, source: {self.source}, size: {len(self.audio)}, frames: {self.num_frames}, sample_rate: {self.sample_rate}, channels: {self.num_channels})"
return f"{self.name}(pts: {pts}, source: {self.transport_source}, size: {len(self.audio)}, frames: {self.num_frames}, sample_rate: {self.sample_rate}, channels: {self.num_channels})"
@dataclass
@@ -747,7 +747,7 @@ class InputImageRawFrame(SystemFrame, ImageRawFrame):
def __str__(self):
pts = format_pts(self.pts)
return f"{self.name}(pts: {pts}, source: {self.source}, size: {self.size}, format: {self.format})"
return f"{self.name}(pts: {pts}, source: {self.transport_source}, size: {self.size}, format: {self.format})"
@dataclass
@@ -758,7 +758,7 @@ class UserAudioRawFrame(InputAudioRawFrame):
def __str__(self):
pts = format_pts(self.pts)
return f"{self.name}(pts: {pts}, user: {self.user_id}, source: {self.source}, size: {len(self.audio)}, frames: {self.num_frames}, sample_rate: {self.sample_rate}, channels: {self.num_channels})"
return f"{self.name}(pts: {pts}, user: {self.user_id}, source: {self.transport_source}, size: {len(self.audio)}, frames: {self.num_frames}, sample_rate: {self.sample_rate}, channels: {self.num_channels})"
@dataclass
@@ -770,7 +770,7 @@ class UserImageRawFrame(InputImageRawFrame):
def __str__(self):
pts = format_pts(self.pts)
return f"{self.name}(pts: {pts}, user: {self.user_id}, source: {self.source}, size: {self.size}, format: {self.format}, request: {self.request})"
return f"{self.name}(pts: {pts}, user: {self.user_id}, source: {self.transport_source}, size: {self.size}, format: {self.format}, request: {self.request})"
@dataclass

View File

@@ -66,8 +66,8 @@ class TTSService(AIService):
# Text filter executed after text has been aggregated.
text_filters: Sequence[BaseTextFilter] = [],
text_filter: Optional[BaseTextFilter] = None,
# Audio destination of the generated frames.
destination: Optional[str] = None,
# Audio transport destination of the generated frames.
transport_destination: Optional[str] = None,
**kwargs,
):
super().__init__(**kwargs)
@@ -84,7 +84,7 @@ class TTSService(AIService):
self._settings: Dict[str, Any] = {}
self._text_aggregator: BaseTextAggregator = text_aggregator or SimpleTextAggregator()
self._text_filters: Sequence[BaseTextFilter] = text_filters
self._destination: Optional[str] = destination
self._transport_destination: Optional[str] = transport_destination
if text_filter:
import warnings
@@ -216,11 +216,11 @@ class TTSService(AIService):
sample_rate=self.sample_rate,
num_channels=1,
)
silence_frame.destination = self._destination
silence_frame.transport_destination = self._transport_destination
await self.push_frame(silence_frame)
if isinstance(frame, TTSAudioRawFrame):
frame.destination = self._destination
frame.transport_destination = self._transport_destination
await super().push_frame(frame, direction)

View File

@@ -168,13 +168,13 @@ class BaseOutputTransport(FrameProcessor):
await self._handle_frame(frame)
async def _handle_frame(self, frame: Frame):
if frame.destination not in self._media_senders:
if frame.transport_destination not in self._media_senders:
logger.warning(
f"{self} destination [{frame.destination}] not registered for frame {frame}"
f"{self} destination [{frame.transport_destination}] not registered for frame {frame}"
)
return
sender = self._media_senders[frame.destination]
sender = self._media_senders[frame.transport_destination]
if isinstance(frame, StartInterruptionFrame):
await sender.handle_interruptions(frame)
@@ -371,9 +371,9 @@ class BaseOutputTransport(FrameProcessor):
logger.debug(f"Bot [{self._destination}] started speaking")
downstream_frame = BotStartedSpeakingFrame()
downstream_frame.destination = self._destination
downstream_frame.transport_destination = self._destination
upstream_frame = BotStartedSpeakingFrame()
upstream_frame.destination = self._destination
upstream_frame.transport_destination = self._destination
await self._transport.push_frame(downstream_frame)
await self._transport.push_frame(upstream_frame, FrameDirection.UPSTREAM)
@@ -384,9 +384,9 @@ class BaseOutputTransport(FrameProcessor):
logger.debug(f"Bot [{self._destination}] stopped speaking")
downstream_frame = BotStoppedSpeakingFrame()
downstream_frame.destination = self._destination
downstream_frame.transport_destination = self._destination
upstream_frame = BotStoppedSpeakingFrame()
upstream_frame.destination = self._destination
upstream_frame.transport_destination = self._destination
await self._transport.push_frame(downstream_frame)
await self._transport.push_frame(upstream_frame, FrameDirection.UPSTREAM)

View File

@@ -1010,7 +1010,7 @@ class DailyInputTransport(BaseInputTransport):
sample_rate=self._client.out_sample_rate,
num_channels=audio.num_channels,
)
frame.source = audio_source
frame.transport_source = audio_source
await self.push_frame(frame)
async def _audio_in_task_handler(self):
@@ -1076,7 +1076,7 @@ class DailyInputTransport(BaseInputTransport):
size=(video_frame.width, video_frame.height),
format=video_frame.color_format,
)
frame.source = video_source
frame.transport_source = video_source
await self.push_frame(frame)
self._video_renderers[participant_id][video_source]["timestamp"] = curr_time