Added support to ProtobufFrameSerializer to send the transport messages

This commit is contained in:
Filipi Fuchter
2025-03-27 17:26:03 -03:00
parent 1d4d7f28a1
commit b6007bb3d6
4 changed files with 41 additions and 6 deletions

View File

@@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added
- Added support to `ProtobufFrameSerializer` to send the messages from `TransportMessageFrame` and `TransportMessageUrgentFrame`.
- Added support for a new TTS service, `PiperTTSService`.
(see https://github.com/rhasspy/piper/)

View File

@@ -35,10 +35,15 @@ message TranscriptionFrame {
string timestamp = 5;
}
message MessageFrame {
string data = 1;
}
message Frame {
oneof frame {
TextFrame text = 1;
AudioRawFrame audio = 2;
TranscriptionFrame transcription = 3;
MessageFrame message = 4;
}
}

View File

@@ -1,12 +1,22 @@
# -*- coding: utf-8 -*-
# Generated by the protocol buffer compiler. DO NOT EDIT!
# NO CHECKED-IN PROTOBUF GENCODE
# source: frames.proto
# Protobuf Python Version: 4.25.1
# Protobuf Python Version: 5.27.2
"""Generated protocol buffer code."""
from google.protobuf import descriptor as _descriptor
from google.protobuf import descriptor_pool as _descriptor_pool
from google.protobuf import runtime_version as _runtime_version
from google.protobuf import symbol_database as _symbol_database
from google.protobuf.internal import builder as _builder
_runtime_version.ValidateProtobufRuntimeVersion(
_runtime_version.Domain.PUBLIC,
5,
27,
2,
'',
'frames.proto'
)
# @@protoc_insertion_point(imports)
_sym_db = _symbol_database.Default()
@@ -14,19 +24,21 @@ _sym_db = _symbol_database.Default()
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0c\x66rames.proto\x12\x07pipecat\"3\n\tTextFrame\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0c\n\x04text\x18\x03 \x01(\t\"}\n\rAudioRawFrame\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\r\n\x05\x61udio\x18\x03 \x01(\x0c\x12\x13\n\x0bsample_rate\x18\x04 \x01(\r\x12\x14\n\x0cnum_channels\x18\x05 \x01(\r\x12\x10\n\x03pts\x18\x06 \x01(\x04H\x00\x88\x01\x01\x42\x06\n\x04_pts\"`\n\x12TranscriptionFrame\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0c\n\x04text\x18\x03 \x01(\t\x12\x0f\n\x07user_id\x18\x04 \x01(\t\x12\x11\n\ttimestamp\x18\x05 \x01(\t\"\x93\x01\n\x05\x46rame\x12\"\n\x04text\x18\x01 \x01(\x0b\x32\x12.pipecat.TextFrameH\x00\x12\'\n\x05\x61udio\x18\x02 \x01(\x0b\x32\x16.pipecat.AudioRawFrameH\x00\x12\x34\n\rtranscription\x18\x03 \x01(\x0b\x32\x1b.pipecat.TranscriptionFrameH\x00\x42\x07\n\x05\x66rameb\x06proto3')
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0c\x66rames.proto\x12\x07pipecat\"3\n\tTextFrame\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0c\n\x04text\x18\x03 \x01(\t\"}\n\rAudioRawFrame\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\r\n\x05\x61udio\x18\x03 \x01(\x0c\x12\x13\n\x0bsample_rate\x18\x04 \x01(\r\x12\x14\n\x0cnum_channels\x18\x05 \x01(\r\x12\x10\n\x03pts\x18\x06 \x01(\x04H\x00\x88\x01\x01\x42\x06\n\x04_pts\"`\n\x12TranscriptionFrame\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0c\n\x04text\x18\x03 \x01(\t\x12\x0f\n\x07user_id\x18\x04 \x01(\t\x12\x11\n\ttimestamp\x18\x05 \x01(\t\"\x1c\n\x0cMessageFrame\x12\x0c\n\x04\x64\x61ta\x18\x01 \x01(\t\"\xbd\x01\n\x05\x46rame\x12\"\n\x04text\x18\x01 \x01(\x0b\x32\x12.pipecat.TextFrameH\x00\x12\'\n\x05\x61udio\x18\x02 \x01(\x0b\x32\x16.pipecat.AudioRawFrameH\x00\x12\x34\n\rtranscription\x18\x03 \x01(\x0b\x32\x1b.pipecat.TranscriptionFrameH\x00\x12(\n\x07message\x18\x04 \x01(\x0b\x32\x15.pipecat.MessageFrameH\x00\x42\x07\n\x05\x66rameb\x06proto3')
_globals = globals()
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'frames_pb2', _globals)
if _descriptor._USE_C_DESCRIPTORS == False:
DESCRIPTOR._options = None
if not _descriptor._USE_C_DESCRIPTORS:
DESCRIPTOR._loaded_options = None
_globals['_TEXTFRAME']._serialized_start=25
_globals['_TEXTFRAME']._serialized_end=76
_globals['_AUDIORAWFRAME']._serialized_start=78
_globals['_AUDIORAWFRAME']._serialized_end=203
_globals['_TRANSCRIPTIONFRAME']._serialized_start=205
_globals['_TRANSCRIPTIONFRAME']._serialized_end=301
_globals['_FRAME']._serialized_start=304
_globals['_FRAME']._serialized_end=451
_globals['_MESSAGEFRAME']._serialized_start=303
_globals['_MESSAGEFRAME']._serialized_end=331
_globals['_FRAME']._serialized_start=334
_globals['_FRAME']._serialized_end=523
# @@protoc_insertion_point(module_scope)

View File

@@ -5,6 +5,7 @@
#
import dataclasses
import json
from loguru import logger
@@ -15,15 +16,24 @@ from pipecat.frames.frames import (
OutputAudioRawFrame,
TextFrame,
TranscriptionFrame,
TransportMessageFrame,
TransportMessageUrgentFrame,
)
from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializerType
# Data class for converting transport messages into Protobuf format.
@dataclasses.dataclass
class MessageFrame:
data: str
class ProtobufFrameSerializer(FrameSerializer):
SERIALIZABLE_TYPES = {
TextFrame: "text",
OutputAudioRawFrame: "audio",
TranscriptionFrame: "transcription",
MessageFrame: "message",
}
SERIALIZABLE_FIELDS = {v: k for k, v in SERIALIZABLE_TYPES.items()}
@@ -42,6 +52,12 @@ class ProtobufFrameSerializer(FrameSerializer):
return FrameSerializerType.BINARY
async def serialize(self, frame: Frame) -> str | bytes | None:
# Wrapping this messages as a JSONFrame to send
if isinstance(frame, (TransportMessageFrame, TransportMessageUrgentFrame)):
frame = MessageFrame(
data=json.dumps(frame.message),
)
proto_frame = frame_protos.Frame()
if type(frame) not in self.SERIALIZABLE_TYPES:
logger.warning(f"Frame type {type(frame)} is not serializable")