Merge pull request #593 from pipecat-ai/aleix/bot-transcription-processor

rtvi: add RTVIBotTranscriptionProcessor to send `bot-transcription`
This commit is contained in:
Aleix Conchillo Flaqué
2024-10-15 10:03:39 -07:00
committed by GitHub
2 changed files with 31 additions and 3 deletions

View File

@@ -9,6 +9,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added
- Added `RTVIBotTranscriptionProcessor` which will send the RTVI
`bot-transcription` protocol message. These are TTS text aggregated (into
sentences) messages.
- Added new input params to the `MarkdownTextFilter` utility. You can set
`filter_code` to filter code from text and `filter_tables` to filter tables
from text.

View File

@@ -42,6 +42,7 @@ from pipecat.processors.aggregators.openai_llm_context import (
OpenAILLMContextFrame,
)
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
from pipecat.utils.string import match_endofsentence
RTVI_PROTOCOL_VERSION = "0.2"
@@ -275,6 +276,12 @@ class RTVITextMessageData(BaseModel):
text: str
class RTVIBotTranscriptionMessage(BaseModel):
label: Literal["rtvi-ai"] = "rtvi-ai"
type: Literal["bot-transcription"] = "bot-transcription"
data: RTVITextMessageData
class RTVIBotLLMTextMessage(BaseModel):
label: Literal["rtvi-ai"] = "rtvi-ai"
type: Literal["bot-llm-text"] = "bot-llm-text"
@@ -437,16 +444,33 @@ class RTVIUserLLMTextProcessor(RTVIFrameProcessor):
if message["role"] == "user":
content = message["content"]
if isinstance(content, list):
print("LIST")
text = " ".join(item["text"] for item in content if "text" in item)
else:
print("STRING")
text = content
rtvi_message = RTVIUserLLMTextMessage(data=RTVITextMessageData(text=text))
await self._push_transport_message_urgent(rtvi_message)
class RTVIBotTranscriptionProcessor(RTVIFrameProcessor):
def __init__(self):
super().__init__()
self._aggregation = ""
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)
await self.push_frame(frame, direction)
if isinstance(frame, TextFrame):
self._aggregation += frame.text
if match_endofsentence(self._aggregation):
message = RTVIBotTranscriptionMessage(
data=RTVITextMessageData(text=self._aggregation)
)
await self._push_transport_message_urgent(message)
self._aggregation = ""
class RTVIBotLLMProcessor(RTVIFrameProcessor):
def __init__(self, **kwargs):
super().__init__(**kwargs)