From b6007bb3d66cbd65307b74c4fb1b98e033c8c2dd Mon Sep 17 00:00:00 2001 From: Filipi Fuchter Date: Thu, 27 Mar 2025 17:26:03 -0300 Subject: [PATCH] Added support to ProtobufFrameSerializer to send the transport messages --- CHANGELOG.md | 2 ++ src/pipecat/frames/frames.proto | 5 +++++ src/pipecat/frames/protobufs/frames_pb2.py | 24 ++++++++++++++++------ src/pipecat/serializers/protobuf.py | 16 +++++++++++++++ 4 files changed, 41 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0263bae47..ef26f0266 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- Added support to `ProtobufFrameSerializer` to send the messages from `TransportMessageFrame` and `TransportMessageUrgentFrame`. + - Added support for a new TTS service, `PiperTTSService`. (see https://github.com/rhasspy/piper/) diff --git a/src/pipecat/frames/frames.proto b/src/pipecat/frames/frames.proto index 98dc014db..ebdb16fcc 100644 --- a/src/pipecat/frames/frames.proto +++ b/src/pipecat/frames/frames.proto @@ -35,10 +35,15 @@ message TranscriptionFrame { string timestamp = 5; } +message MessageFrame { + string data = 1; +} + message Frame { oneof frame { TextFrame text = 1; AudioRawFrame audio = 2; TranscriptionFrame transcription = 3; + MessageFrame message = 4; } } diff --git a/src/pipecat/frames/protobufs/frames_pb2.py b/src/pipecat/frames/protobufs/frames_pb2.py index d58bc8baa..7884c6ccc 100644 --- a/src/pipecat/frames/protobufs/frames_pb2.py +++ b/src/pipecat/frames/protobufs/frames_pb2.py @@ -1,12 +1,22 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! +# NO CHECKED-IN PROTOBUF GENCODE # source: frames.proto -# Protobuf Python Version: 4.25.1 +# Protobuf Python Version: 5.27.2 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import runtime_version as _runtime_version from google.protobuf import symbol_database as _symbol_database from google.protobuf.internal import builder as _builder +_runtime_version.ValidateProtobufRuntimeVersion( + _runtime_version.Domain.PUBLIC, + 5, + 27, + 2, + '', + 'frames.proto' +) # @@protoc_insertion_point(imports) _sym_db = _symbol_database.Default() @@ -14,19 +24,21 @@ _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0c\x66rames.proto\x12\x07pipecat\"3\n\tTextFrame\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0c\n\x04text\x18\x03 \x01(\t\"}\n\rAudioRawFrame\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\r\n\x05\x61udio\x18\x03 \x01(\x0c\x12\x13\n\x0bsample_rate\x18\x04 \x01(\r\x12\x14\n\x0cnum_channels\x18\x05 \x01(\r\x12\x10\n\x03pts\x18\x06 \x01(\x04H\x00\x88\x01\x01\x42\x06\n\x04_pts\"`\n\x12TranscriptionFrame\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0c\n\x04text\x18\x03 \x01(\t\x12\x0f\n\x07user_id\x18\x04 \x01(\t\x12\x11\n\ttimestamp\x18\x05 \x01(\t\"\x93\x01\n\x05\x46rame\x12\"\n\x04text\x18\x01 \x01(\x0b\x32\x12.pipecat.TextFrameH\x00\x12\'\n\x05\x61udio\x18\x02 \x01(\x0b\x32\x16.pipecat.AudioRawFrameH\x00\x12\x34\n\rtranscription\x18\x03 \x01(\x0b\x32\x1b.pipecat.TranscriptionFrameH\x00\x42\x07\n\x05\x66rameb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0c\x66rames.proto\x12\x07pipecat\"3\n\tTextFrame\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0c\n\x04text\x18\x03 \x01(\t\"}\n\rAudioRawFrame\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\r\n\x05\x61udio\x18\x03 \x01(\x0c\x12\x13\n\x0bsample_rate\x18\x04 \x01(\r\x12\x14\n\x0cnum_channels\x18\x05 \x01(\r\x12\x10\n\x03pts\x18\x06 \x01(\x04H\x00\x88\x01\x01\x42\x06\n\x04_pts\"`\n\x12TranscriptionFrame\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0c\n\x04text\x18\x03 \x01(\t\x12\x0f\n\x07user_id\x18\x04 \x01(\t\x12\x11\n\ttimestamp\x18\x05 \x01(\t\"\x1c\n\x0cMessageFrame\x12\x0c\n\x04\x64\x61ta\x18\x01 \x01(\t\"\xbd\x01\n\x05\x46rame\x12\"\n\x04text\x18\x01 \x01(\x0b\x32\x12.pipecat.TextFrameH\x00\x12\'\n\x05\x61udio\x18\x02 \x01(\x0b\x32\x16.pipecat.AudioRawFrameH\x00\x12\x34\n\rtranscription\x18\x03 \x01(\x0b\x32\x1b.pipecat.TranscriptionFrameH\x00\x12(\n\x07message\x18\x04 \x01(\x0b\x32\x15.pipecat.MessageFrameH\x00\x42\x07\n\x05\x66rameb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'frames_pb2', _globals) -if _descriptor._USE_C_DESCRIPTORS == False: - DESCRIPTOR._options = None +if not _descriptor._USE_C_DESCRIPTORS: + DESCRIPTOR._loaded_options = None _globals['_TEXTFRAME']._serialized_start=25 _globals['_TEXTFRAME']._serialized_end=76 _globals['_AUDIORAWFRAME']._serialized_start=78 _globals['_AUDIORAWFRAME']._serialized_end=203 _globals['_TRANSCRIPTIONFRAME']._serialized_start=205 _globals['_TRANSCRIPTIONFRAME']._serialized_end=301 - _globals['_FRAME']._serialized_start=304 - _globals['_FRAME']._serialized_end=451 + _globals['_MESSAGEFRAME']._serialized_start=303 + _globals['_MESSAGEFRAME']._serialized_end=331 + _globals['_FRAME']._serialized_start=334 + _globals['_FRAME']._serialized_end=523 # @@protoc_insertion_point(module_scope) diff --git a/src/pipecat/serializers/protobuf.py b/src/pipecat/serializers/protobuf.py index 125f2037f..c3b6d86af 100644 --- a/src/pipecat/serializers/protobuf.py +++ b/src/pipecat/serializers/protobuf.py @@ -5,6 +5,7 @@ # import dataclasses +import json from loguru import logger @@ -15,15 +16,24 @@ from pipecat.frames.frames import ( OutputAudioRawFrame, TextFrame, TranscriptionFrame, + TransportMessageFrame, + TransportMessageUrgentFrame, ) from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializerType +# Data class for converting transport messages into Protobuf format. +@dataclasses.dataclass +class MessageFrame: + data: str + + class ProtobufFrameSerializer(FrameSerializer): SERIALIZABLE_TYPES = { TextFrame: "text", OutputAudioRawFrame: "audio", TranscriptionFrame: "transcription", + MessageFrame: "message", } SERIALIZABLE_FIELDS = {v: k for k, v in SERIALIZABLE_TYPES.items()} @@ -42,6 +52,12 @@ class ProtobufFrameSerializer(FrameSerializer): return FrameSerializerType.BINARY async def serialize(self, frame: Frame) -> str | bytes | None: + # Wrapping this messages as a JSONFrame to send + if isinstance(frame, (TransportMessageFrame, TransportMessageUrgentFrame)): + frame = MessageFrame( + data=json.dumps(frame.message), + ) + proto_frame = frame_protos.Frame() if type(frame) not in self.SERIALIZABLE_TYPES: logger.warning(f"Frame type {type(frame)} is not serializable")