Code review changes

This commit is contained in:
Mark Backman
2024-12-16 10:17:33 -05:00
parent b5bd662fe1
commit 1f8a217cd1
6 changed files with 57 additions and 79 deletions

View File

@@ -27,7 +27,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
format.
- New examples: `28a-transcription-processor-openai.py`,
`28b-transcription-processor-anthropic.py`, and
`28c-transcription-processor-gemini.py`
`28c-transcription-processor-gemini.py`.
- Add support for more languages to ElevenLabs (Arabic, Croatian, Filipino,
Tamil) and PlayHT (Afrikans, Albanian, Amharic, Arabic, Bengali, Croatian,

View File

@@ -7,7 +7,6 @@
import asyncio
import os
import sys
from typing import List
import aiohttp
from dotenv import load_dotenv
@@ -15,13 +14,12 @@ from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import Frame, LLMMessagesFrame
from pipecat.frames.frames import LLMMessagesFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.processors.frame_processor import FrameDirection
from pipecat.services.anthropic import AnthropicLLMContext, AnthropicLLMService
from pipecat.services.anthropic import AnthropicLLMService
from pipecat.services.cartesia import CartesiaTTSService
from pipecat.transports.services.daily import DailyParams, DailyTransport
@@ -31,28 +29,6 @@ logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
class TestAnthropicLLMService(AnthropicLLMService):
async def process_frame(self, frame: Frame, direction: FrameDirection):
if isinstance(frame, LLMMessagesFrame):
logger.info("Original OpenAI format messages:")
logger.info(frame.messages)
# Convert to Anthropic format
context = AnthropicLLMContext.from_messages(frame.messages)
logger.info("Converted to Anthropic format:")
logger.info(context.messages)
# Convert back to OpenAI format
openai_messages = []
for msg in context.messages:
converted = context.to_standard_messages(msg)
openai_messages.extend(converted)
logger.info("Converted back to OpenAI format:")
logger.info(openai_messages)
await super().process_frame(frame, direction)
async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
@@ -74,24 +50,18 @@ async def main():
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
)
llm = TestAnthropicLLMService(
llm = AnthropicLLMService(
api_key=os.getenv("ANTHROPIC_API_KEY"), model="claude-3-opus-20240229"
)
# Test messages including various formats
# todo: think more about how to handle system prompts in a more general way. OpenAI,
# Google, and Anthropic all have slightly different approaches to providing a system
# prompt.
messages = [
{
"role": "system",
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative, helpful, and brief way. Say hello.",
},
{
"role": "assistant",
"content": [
{"type": "text", "text": "Hello! How can I help you today?"},
{"type": "text", "text": "I'm ready to assist."},
],
},
{"role": "user", "content": "Hi there!"},
]
context = OpenAILLMContext(messages)

View File

@@ -127,7 +127,7 @@ async def main():
async def on_first_participant_joined(transport, participant):
await transport.capture_participant_transcription(participant["id"])
# Kick off the conversation.
await task.queue_frames([LLMMessagesFrame(messages)])
await task.queue_frames([context_aggregator.user().get_context_frame()])
runner = PipelineRunner()

View File

@@ -127,7 +127,7 @@ async def main():
async def on_first_participant_joined(transport, participant):
await transport.capture_participant_transcription(participant["id"])
# Kick off the conversation.
await task.queue_frames([LLMMessagesFrame(messages)])
await task.queue_frames([context_aggregator.user().get_context_frame()])
runner = PipelineRunner()

View File

@@ -5,7 +5,7 @@
#
from dataclasses import dataclass, field
from typing import Any, List, Literal, Mapping, Optional, Tuple, TypeAlias
from typing import Any, List, Literal, Mapping, Optional, Tuple
from pipecat.audio.vad.vad_analyzer import VADParams
from pipecat.clocks.base_clock import BaseClock
@@ -240,6 +240,34 @@ class TranscriptionUpdateFrame(DataFrame):
This frame is emitted when new messages are added to the conversation history,
containing only the newly added messages rather than the full transcript.
Messages have normalized roles (user/assistant) regardless of the LLM service used.
Messages are always in the OpenAI standard message format, which supports both:
Simple format:
[
{
"role": "user",
"content": "Hi, how are you?"
},
{
"role": "assistant",
"content": "Great! And you?"
}
]
Content list format:
[
{
"role": "user",
"content": [{"type": "text", "text": "Hi, how are you?"}]
},
{
"role": "assistant",
"content": [{"type": "text", "text": "Great! And you?"}]
}
]
OpenAI supports both formats. Anthropic and Google messages are converted to the
content list format.
"""
messages: List[TranscriptionMessage]

View File

@@ -112,59 +112,39 @@ class OpenAILLMContext:
msgs.append(msg)
return json.dumps(msgs)
def from_standard_message(self, message) -> dict:
"""Convert standard format message to OpenAI format.
def from_standard_message(self, message):
"""Convert from OpenAI message format to OpenAI message format (passthrough).
Converts structured content back to OpenAI's simple string format.
OpenAI's format allows both simple string content and structured content:
- Simple: {"role": "user", "content": "Hello"}
- Structured: {"role": "user", "content": [{"type": "text", "text": "Hello"}]}
Since OpenAI is our standard format, this is a passthrough function.
Args:
message: Message in standard format:
{
"role": "user/assistant",
"content": [{"type": "text", "text": str}]
}
message (dict): Message in OpenAI format
Returns:
Message in OpenAI format:
{
"role": "user/assistant",
"content": str
}
dict: Same message, unchanged
"""
# If content is already a string, return as-is
if isinstance(message.get("content"), str):
return message
# Convert structured content to string
if isinstance(message.get("content"), list):
text_parts = []
for part in message["content"]:
if part.get("type") == "text":
text_parts.append(part["text"])
return {"role": message["role"], "content": " ".join(text_parts) if text_parts else ""}
return message
def to_standard_messages(self, obj) -> list:
"""Convert OpenAI message to standard structured format.
"""Convert from OpenAI message format to OpenAI message format (passthrough).
OpenAI's format is our standard format throughout Pipecat. This function
returns a list containing the original message to maintain consistency with
other LLM services that may need to return multiple messages.
Args:
obj: Message in OpenAI format {"role": "user", "content": "text"}
obj (dict): Message in OpenAI format with either:
- Simple content: {"role": "user", "content": "Hello"}
- List content: {"role": "user", "content": [{"type": "text", "text": "Hello"}]}
Returns:
List containing message with structured content:
[{"role": "user", "content": [{"type": "text", "text": "message"}]}]
list: List containing the original messages, preserving whether
the content was in simple string or structured list format
"""
# Skip messages without content
if not obj.get("content"):
return []
# Convert simple string content to structured format
if isinstance(obj["content"], str):
return [{"role": obj["role"], "content": [{"type": "text", "text": obj["content"]}]}]
# Return original message if content is already structured
return [obj]
def get_messages_for_initializing_history(self):