Code review changes

2024-12-16 10:17:33 -05:00
parent b5bd662fe1
commit 1f8a217cd1
6 changed files with 57 additions and 79 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -27,7 +27,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
    format.
  - New examples: `28a-transcription-processor-openai.py`,
    `28b-transcription-processor-anthropic.py`, and
-    `28c-transcription-processor-gemini.py`
+    `28c-transcription-processor-gemini.py`.

 - Add support for more languages to ElevenLabs (Arabic, Croatian, Filipino,
  Tamil) and PlayHT (Afrikans, Albanian, Amharic, Arabic, Bengali, Croatian,
--- a/examples/foundational/07a-interruptible-anthropic.py
+++ b/examples/foundational/07a-interruptible-anthropic.py
@@ -7,7 +7,6 @@
 import asyncio
 import os
 import sys
-from typing import List

 import aiohttp
 from dotenv import load_dotenv
@@ -15,13 +14,12 @@ from loguru import logger
 from runner import configure

 from pipecat.audio.vad.silero import SileroVADAnalyzer
-from pipecat.frames.frames import Frame, LLMMessagesFrame
+from pipecat.frames.frames import LLMMessagesFrame
 from pipecat.pipeline.pipeline import Pipeline
 from pipecat.pipeline.runner import PipelineRunner
 from pipecat.pipeline.task import PipelineParams, PipelineTask
 from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
-from pipecat.processors.frame_processor import FrameDirection
-from pipecat.services.anthropic import AnthropicLLMContext, AnthropicLLMService
+from pipecat.services.anthropic import AnthropicLLMService
 from pipecat.services.cartesia import CartesiaTTSService
 from pipecat.transports.services.daily import DailyParams, DailyTransport

@@ -31,28 +29,6 @@ logger.remove(0)
 logger.add(sys.stderr, level="DEBUG")


-class TestAnthropicLLMService(AnthropicLLMService):
-    async def process_frame(self, frame: Frame, direction: FrameDirection):
-        if isinstance(frame, LLMMessagesFrame):
-            logger.info("Original OpenAI format messages:")
-            logger.info(frame.messages)
-
-            # Convert to Anthropic format
-            context = AnthropicLLMContext.from_messages(frame.messages)
-            logger.info("Converted to Anthropic format:")
-            logger.info(context.messages)
-
-            # Convert back to OpenAI format
-            openai_messages = []
-            for msg in context.messages:
-                converted = context.to_standard_messages(msg)
-                openai_messages.extend(converted)
-            logger.info("Converted back to OpenAI format:")
-            logger.info(openai_messages)
-
-        await super().process_frame(frame, direction)
-
-
 async def main():
    async with aiohttp.ClientSession() as session:
        (room_url, token) = await configure(session)
@@ -74,24 +50,18 @@ async def main():
            voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22",  # British Lady
        )

-        llm = TestAnthropicLLMService(
+        llm = AnthropicLLMService(
            api_key=os.getenv("ANTHROPIC_API_KEY"), model="claude-3-opus-20240229"
        )

-        # Test messages including various formats
+        # todo: think more about how to handle system prompts in a more general way. OpenAI,
+        # Google, and Anthropic all have slightly different approaches to providing a system
+        # prompt.
        messages = [
            {
                "role": "system",
                "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative, helpful, and brief way. Say hello.",
            },
-            {
-                "role": "assistant",
-                "content": [
-                    {"type": "text", "text": "Hello! How can I help you today?"},
-                    {"type": "text", "text": "I'm ready to assist."},
-                ],
-            },
-            {"role": "user", "content": "Hi there!"},
        ]

        context = OpenAILLMContext(messages)
--- a/examples/foundational/28a-transcription-processor-openai.py
+++ b/examples/foundational/28a-transcription-processor-openai.py
@@ -127,7 +127,7 @@ async def main():
        async def on_first_participant_joined(transport, participant):
            await transport.capture_participant_transcription(participant["id"])
            # Kick off the conversation.
-            await task.queue_frames([LLMMessagesFrame(messages)])
+            await task.queue_frames([context_aggregator.user().get_context_frame()])

        runner = PipelineRunner()

--- a/examples/foundational/28b-transcript-processor-anthropic.py
+++ b/examples/foundational/28b-transcript-processor-anthropic.py
@@ -127,7 +127,7 @@ async def main():
        async def on_first_participant_joined(transport, participant):
            await transport.capture_participant_transcription(participant["id"])
            # Kick off the conversation.
-            await task.queue_frames([LLMMessagesFrame(messages)])
+            await task.queue_frames([context_aggregator.user().get_context_frame()])

        runner = PipelineRunner()

--- a/src/pipecat/frames/frames.py
+++ b/src/pipecat/frames/frames.py
@@ -5,7 +5,7 @@
 #

 from dataclasses import dataclass, field
-from typing import Any, List, Literal, Mapping, Optional, Tuple, TypeAlias
+from typing import Any, List, Literal, Mapping, Optional, Tuple

 from pipecat.audio.vad.vad_analyzer import VADParams
 from pipecat.clocks.base_clock import BaseClock
@@ -240,6 +240,34 @@ class TranscriptionUpdateFrame(DataFrame):
    This frame is emitted when new messages are added to the conversation history,
    containing only the newly added messages rather than the full transcript.
    Messages have normalized roles (user/assistant) regardless of the LLM service used.
+    Messages are always in the OpenAI standard message format, which supports both:
+
+    Simple format:
+    [
+        {
+            "role": "user",
+            "content": "Hi, how are you?"
+        },
+        {
+            "role": "assistant",
+            "content": "Great! And you?"
+        }
+    ]
+
+    Content list format:
+    [
+        {
+            "role": "user",
+            "content": [{"type": "text", "text": "Hi, how are you?"}]
+        },
+        {
+            "role": "assistant",
+            "content": [{"type": "text", "text": "Great! And you?"}]
+        }
+    ]
+
+    OpenAI supports both formats. Anthropic and Google messages are converted to the
+    content list format.
    """

    messages: List[TranscriptionMessage]
--- a/src/pipecat/processors/aggregators/openai_llm_context.py
+++ b/src/pipecat/processors/aggregators/openai_llm_context.py
@@ -112,59 +112,39 @@ class OpenAILLMContext:
            msgs.append(msg)
        return json.dumps(msgs)

-    def from_standard_message(self, message) -> dict:
-        """Convert standard format message to OpenAI format.
+    def from_standard_message(self, message):
+        """Convert from OpenAI message format to OpenAI message format (passthrough).

-        Converts structured content back to OpenAI's simple string format.
+        OpenAI's format allows both simple string content and structured content:
+        - Simple: {"role": "user", "content": "Hello"}
+        - Structured: {"role": "user", "content": [{"type": "text", "text": "Hello"}]}
+
+        Since OpenAI is our standard format, this is a passthrough function.

        Args:
-            message: Message in standard format:
-                {
-                    "role": "user/assistant",
-                    "content": [{"type": "text", "text": str}]
-                }
+            message (dict): Message in OpenAI format

        Returns:
-            Message in OpenAI format:
-            {
-                "role": "user/assistant",
-                "content": str
-            }
+            dict: Same message, unchanged
        """
-        # If content is already a string, return as-is
-        if isinstance(message.get("content"), str):
-            return message
-
-        # Convert structured content to string
-        if isinstance(message.get("content"), list):
-            text_parts = []
-            for part in message["content"]:
-                if part.get("type") == "text":
-                    text_parts.append(part["text"])
-
-            return {"role": message["role"], "content": " ".join(text_parts) if text_parts else ""}
-
        return message

    def to_standard_messages(self, obj) -> list:
-        """Convert OpenAI message to standard structured format.
+        """Convert from OpenAI message format to OpenAI message format (passthrough).
+
+        OpenAI's format is our standard format throughout Pipecat. This function
+        returns a list containing the original message to maintain consistency with
+        other LLM services that may need to return multiple messages.

        Args:
-            obj: Message in OpenAI format {"role": "user", "content": "text"}
+            obj (dict): Message in OpenAI format with either:
+                - Simple content: {"role": "user", "content": "Hello"}
+                - List content: {"role": "user", "content": [{"type": "text", "text": "Hello"}]}

        Returns:
-            List containing message with structured content:
-            [{"role": "user", "content": [{"type": "text", "text": "message"}]}]
+            list: List containing the original messages, preserving whether
+                the content was in simple string or structured list format
        """
-        # Skip messages without content
-        if not obj.get("content"):
-            return []
-
-        # Convert simple string content to structured format
-        if isinstance(obj["content"], str):
-            return [{"role": obj["role"], "content": [{"type": "text", "text": obj["content"]}]}]
-
-        # Return original message if content is already structured
        return [obj]

    def get_messages_for_initializing_history(self):