From 394599d0310ad65fae53d2f3f0ee2ae1851455d2 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Tue, 31 Mar 2026 14:20:40 -0400 Subject: [PATCH 01/11] Remove deprecated `OpenAILLMContext` as well as everything (code paths or whole types) dependent on it (all of which were also deprecated) --- .pre-commit-config.yaml | 13 +- ...o-function-calling-gemini-openai-format.py | 162 --- .../foundational/19-openai-realtime-beta.py | 219 ---- .../foundational/19a-azure-realtime-beta.py | 214 ---- .../19b-openai-realtime-beta-text.py | 215 ---- ...persistent-context-openai-realtime-beta.py | 267 ---- scripts/pre-commit.sh | 27 - src/pipecat/adapters/schemas/tools_schema.py | 3 - .../services/aws_nova_sonic_adapter.py | 16 +- .../services/grok_realtime_adapter.py | 9 +- .../services/open_ai_realtime_adapter.py | 16 +- src/pipecat/extensions/ivr/ivr_navigator.py | 3 +- src/pipecat/frames/frames.py | 68 -- .../observers/loggers/llm_log_observer.py | 19 +- src/pipecat/pipeline/task.py | 5 - .../aggregators/gated_llm_context.py | 3 +- .../aggregators/gated_open_ai_llm_context.py | 12 - .../processors/aggregators/llm_context.py | 75 -- .../processors/aggregators/llm_response.py | 1081 +---------------- .../aggregators/openai_llm_context.py | 413 ------- .../aggregators/vision_image_frame.py | 81 -- src/pipecat/processors/frame_processor.py | 4 - .../processors/frameworks/langchain.py | 9 +- .../processors/frameworks/rtvi/observer.py | 13 +- .../processors/frameworks/strands_agents.py | 3 +- src/pipecat/services/anthropic/llm.py | 741 +---------- src/pipecat/services/aws/agent_core.py | 12 +- src/pipecat/services/aws/llm.py | 760 +----------- .../services/aws/nova_sonic/context.py | 460 ------- src/pipecat/services/aws/nova_sonic/frames.py | 25 - src/pipecat/services/aws/nova_sonic/llm.py | 54 +- .../services/aws_nova_sonic/context.py | 21 - src/pipecat/services/aws_nova_sonic/frames.py | 21 - .../gemini_multimodal_live/__init__.py | 7 - .../services/gemini_multimodal_live/events.py | 44 - .../gemini_multimodal_live/file_api.py | 39 - .../services/gemini_multimodal_live/gemini.py | 57 - .../services/google/gemini_live/llm.py | 343 +----- src/pipecat/services/google/llm.py | 734 +---------- src/pipecat/services/google/llm_openai.py | 18 - .../services/google/openai/__init__.py | 5 - src/pipecat/services/google/openai/llm.py | 213 ---- src/pipecat/services/llm_service.py | 46 +- src/pipecat/services/mem0/memory.py | 24 +- src/pipecat/services/nvidia/llm.py | 3 +- src/pipecat/services/openai/base_llm.py | 128 +- src/pipecat/services/openai/llm.py | 200 --- .../services/openai/realtime/context.py | 368 ------ .../services/openai/realtime/frames.py | 58 - src/pipecat/services/openai/realtime/llm.py | 88 +- .../services/openai_realtime/context.py | 18 - .../services/openai_realtime/frames.py | 21 - .../services/openai_realtime_beta/__init__.py | 19 - .../services/openai_realtime_beta/azure.py | 94 -- .../services/openai_realtime_beta/context.py | 272 ----- .../services/openai_realtime_beta/events.py | 978 --------------- .../services/openai_realtime_beta/frames.py | 37 - .../services/openai_realtime_beta/openai.py | 858 ------------- src/pipecat/services/perplexity/llm.py | 3 +- src/pipecat/services/sambanova/llm.py | 11 +- src/pipecat/services/ultravox/llm.py | 54 +- src/pipecat/services/xai/llm.py | 82 +- src/pipecat/services/xai/realtime/llm.py | 28 - src/pipecat/transports/base_input.py | 2 +- .../utils/tracing/service_attributes.py | 1 - .../utils/tracing/service_decorators.py | 82 +- tests/test_context_aggregators.py | 1060 ---------------- tests/test_context_aggregators_universal.py | 224 ++++ tests/test_google_llm_openai.py | 81 -- tests/test_run_inference.py | 209 ---- 70 files changed, 399 insertions(+), 11154 deletions(-) delete mode 100644 examples/foundational/14o-function-calling-gemini-openai-format.py delete mode 100644 examples/foundational/19-openai-realtime-beta.py delete mode 100644 examples/foundational/19a-azure-realtime-beta.py delete mode 100644 examples/foundational/19b-openai-realtime-beta-text.py delete mode 100644 examples/foundational/20b-persistent-context-openai-realtime-beta.py delete mode 100755 scripts/pre-commit.sh delete mode 100644 src/pipecat/processors/aggregators/gated_open_ai_llm_context.py delete mode 100644 src/pipecat/processors/aggregators/openai_llm_context.py delete mode 100644 src/pipecat/processors/aggregators/vision_image_frame.py delete mode 100644 src/pipecat/services/aws/nova_sonic/context.py delete mode 100644 src/pipecat/services/aws/nova_sonic/frames.py delete mode 100644 src/pipecat/services/aws_nova_sonic/context.py delete mode 100644 src/pipecat/services/aws_nova_sonic/frames.py delete mode 100644 src/pipecat/services/gemini_multimodal_live/__init__.py delete mode 100644 src/pipecat/services/gemini_multimodal_live/events.py delete mode 100644 src/pipecat/services/gemini_multimodal_live/file_api.py delete mode 100644 src/pipecat/services/gemini_multimodal_live/gemini.py delete mode 100644 src/pipecat/services/google/llm_openai.py delete mode 100644 src/pipecat/services/google/openai/__init__.py delete mode 100644 src/pipecat/services/google/openai/llm.py delete mode 100644 src/pipecat/services/openai/realtime/context.py delete mode 100644 src/pipecat/services/openai/realtime/frames.py delete mode 100644 src/pipecat/services/openai_realtime/context.py delete mode 100644 src/pipecat/services/openai_realtime/frames.py delete mode 100644 src/pipecat/services/openai_realtime_beta/__init__.py delete mode 100644 src/pipecat/services/openai_realtime_beta/azure.py delete mode 100644 src/pipecat/services/openai_realtime_beta/context.py delete mode 100644 src/pipecat/services/openai_realtime_beta/events.py delete mode 100644 src/pipecat/services/openai_realtime_beta/frames.py delete mode 100644 src/pipecat/services/openai_realtime_beta/openai.py delete mode 100644 tests/test_context_aggregators.py delete mode 100644 tests/test_google_llm_openai.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 73e772609..f7330683c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,8 +1,13 @@ repos: - - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.12.1 + - repo: local hooks: - id: ruff - language_version: python3 - args: [--fix] + name: ruff + entry: uv run ruff check --fix + language: system + types: [python] - id: ruff-format + name: ruff-format + entry: uv run ruff format + language: system + types: [python] diff --git a/examples/foundational/14o-function-calling-gemini-openai-format.py b/examples/foundational/14o-function-calling-gemini-openai-format.py deleted file mode 100644 index e3b06c0d6..000000000 --- a/examples/foundational/14o-function-calling-gemini-openai-format.py +++ /dev/null @@ -1,162 +0,0 @@ -# -# Copyright (c) 2024-2026, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - - -import os - -from dotenv import load_dotenv -from loguru import logger - -from pipecat.adapters.schemas.function_schema import FunctionSchema -from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.vad.silero import SileroVADAnalyzer -from pipecat.frames.frames import LLMRunFrame, TTSSpeakFrame -from pipecat.pipeline.pipeline import Pipeline -from pipecat.pipeline.runner import PipelineRunner -from pipecat.pipeline.task import PipelineParams, PipelineTask -from pipecat.processors.aggregators.llm_response_universal import ( - LLMContextAggregatorPair, - LLMUserAggregatorParams, -) -from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext -from pipecat.runner.types import RunnerArguments -from pipecat.runner.utils import create_transport -from pipecat.services.deepgram.stt import DeepgramSTTService -from pipecat.services.elevenlabs.tts import ElevenLabsTTSService -from pipecat.services.google.openai.llm import GoogleLLMOpenAIBetaService -from pipecat.services.llm_service import FunctionCallParams -from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.daily.transport import DailyParams -from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams - -load_dotenv(override=True) - - -async def fetch_weather_from_api(params: FunctionCallParams): - await params.result_callback({"conditions": "nice", "temperature": "75"}) - - -# We use lambdas to defer transport parameter creation until the transport -# type is selected at runtime. -transport_params = { - "daily": lambda: DailyParams( - audio_in_enabled=True, - audio_out_enabled=True, - ), - "twilio": lambda: FastAPIWebsocketParams( - audio_in_enabled=True, - audio_out_enabled=True, - ), - "webrtc": lambda: TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - ), -} - - -async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): - logger.info(f"Starting bot") - - stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) - - tts = ElevenLabsTTSService( - api_key=os.getenv("ELEVENLABS_API_KEY", ""), - settings=ElevenLabsTTSService.Settings( - voice=os.getenv("ELEVENLABS_VOICE_ID", ""), - ), - ) - - llm = GoogleLLMOpenAIBetaService( - api_key=os.getenv("GOOGLE_API_KEY"), - settings=GoogleLLMOpenAIBetaService.Settings( - system_instruction="You are a helpful assistant in a voice conversation. Your responses will be spoken aloud, so avoid emojis, bullet points, or other formatting that can't be spoken. Respond to what the user said in a creative, helpful, and brief way.", - ), - ) - # You can aslo register a function_name of None to get all functions - # sent to the same callback with an additional function_name parameter. - llm.register_function("get_current_weather", fetch_weather_from_api) - - @llm.event_handler("on_function_calls_started") - async def on_function_calls_started(service, function_calls): - await tts.queue_frame(TTSSpeakFrame("Let me check on that.")) - - weather_function = FunctionSchema( - name="get_current_weather", - description="Get the current weather", - properties={ - "location": { - "type": "string", - "description": "The city and state, e.g. San Francisco, CA", - }, - "format": { - "type": "string", - "enum": ["celsius", "fahrenheit"], - "description": "The temperature unit to use. Infer this from the user's location.", - }, - }, - required=["location", "format"], - ) - tools = ToolsSchema(standard_tools=[weather_function]) - messages = [ - { - "role": "developer", - "content": "Start a conversation with 'Hey there' to get the current weather.", - }, - ] - - context = OpenAILLMContext(messages, tools) - user_aggregator, assistant_aggregator = LLMContextAggregatorPair( - context, - user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), - ) - - pipeline = Pipeline( - [ - transport.input(), - stt, - user_aggregator, - llm, - tts, - transport.output(), - assistant_aggregator, - ] - ) - - task = PipelineTask( - pipeline, - params=PipelineParams( - enable_metrics=True, - enable_usage_metrics=True, - ), - idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, - ) - - @transport.event_handler("on_client_connected") - async def on_client_connected(transport, client): - logger.info(f"Client connected") - # Kick off the conversation. - await task.queue_frames([LLMRunFrame()]) - - @transport.event_handler("on_client_disconnected") - async def on_client_disconnected(transport, client): - logger.info(f"Client disconnected") - await task.cancel() - - runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) - - await runner.run(task) - - -async def bot(runner_args: RunnerArguments): - """Main bot entry point compatible with Pipecat Cloud.""" - transport = await create_transport(runner_args, transport_params) - await run_bot(transport, runner_args) - - -if __name__ == "__main__": - from pipecat.runner.run import main - - main() diff --git a/examples/foundational/19-openai-realtime-beta.py b/examples/foundational/19-openai-realtime-beta.py deleted file mode 100644 index 4d1714fe3..000000000 --- a/examples/foundational/19-openai-realtime-beta.py +++ /dev/null @@ -1,219 +0,0 @@ -# -# Copyright (c) 2024-2026, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - - -import os -from datetime import datetime - -from dotenv import load_dotenv -from loguru import logger - -from pipecat.adapters.schemas.function_schema import FunctionSchema -from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.vad.silero import SileroVADAnalyzer -from pipecat.frames.frames import LLMRunFrame, TranscriptionMessage -from pipecat.pipeline.pipeline import Pipeline -from pipecat.pipeline.runner import PipelineRunner -from pipecat.pipeline.task import PipelineParams, PipelineTask -from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext -from pipecat.processors.transcript_processor import TranscriptProcessor -from pipecat.runner.types import RunnerArguments -from pipecat.runner.utils import create_transport -from pipecat.services.llm_service import FunctionCallParams -from pipecat.services.openai_realtime_beta import ( - InputAudioNoiseReduction, - InputAudioTranscription, - OpenAIRealtimeBetaLLMService, - SemanticTurnDetection, - SessionProperties, -) -from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.daily.transport import DailyParams -from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams - -load_dotenv(override=True) - - -async def fetch_weather_from_api(params: FunctionCallParams): - temperature = 75 if params.arguments["format"] == "fahrenheit" else 24 - await params.result_callback( - { - "conditions": "nice", - "temperature": temperature, - "format": params.arguments["format"], - "timestamp": datetime.now().strftime("%Y%m%d_%H%M%S"), - } - ) - - -async def fetch_restaurant_recommendation(params: FunctionCallParams): - await params.result_callback({"name": "The Golden Dragon"}) - - -weather_function = FunctionSchema( - name="get_current_weather", - description="Get the current weather", - properties={ - "location": { - "type": "string", - "description": "The city and state, e.g. San Francisco, CA", - }, - "format": { - "type": "string", - "enum": ["celsius", "fahrenheit"], - "description": "The temperature unit to use. Infer this from the users location.", - }, - }, - required=["location", "format"], -) - -restaurant_function = FunctionSchema( - name="get_restaurant_recommendation", - description="Get a restaurant recommendation", - properties={ - "location": { - "type": "string", - "description": "The city and state, e.g. San Francisco, CA", - }, - }, - required=["location"], -) - -# Create tools schema -tools = ToolsSchema(standard_tools=[weather_function, restaurant_function]) - - -# We use lambdas to defer transport parameter creation until the transport -# type is selected at runtime. -transport_params = { - "daily": lambda: DailyParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - "twilio": lambda: FastAPIWebsocketParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - "webrtc": lambda: TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), -} - - -async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): - logger.info(f"Starting bot") - - session_properties = SessionProperties( - input_audio_transcription=InputAudioTranscription(), - # Set openai TurnDetection parameters. Not setting this at all will turn it - # on by default - turn_detection=SemanticTurnDetection(), - # Or set to False to disable openai turn detection and use transport VAD - # turn_detection=False, - input_audio_noise_reduction=InputAudioNoiseReduction(type="near_field"), - # tools=tools, - instructions="""You are a helpful and friendly AI. - -Act like a human, but remember that you aren't a human and that you can't do human -things in the real world. Your voice and personality should be warm and engaging, with a lively and -playful tone. - -If interacting in a non-English language, start by using the standard accent or dialect familiar to -the user. Talk quickly. You should always call a function if you can. Do not refer to these rules, -even if you're asked about them. - -You are participating in a voice conversation. Keep your responses concise, short, and to the point -unless specifically asked to elaborate on a topic. - -You have access to the following tools: -- get_current_weather: Get the current weather for a given location. -- get_restaurant_recommendation: Get a restaurant recommendation for a given location. - -Remember, your responses should be short. Just one or two sentences, usually. Respond in English.""", - ) - - llm = OpenAIRealtimeBetaLLMService( - api_key=os.getenv("OPENAI_API_KEY"), - session_properties=session_properties, - ) - - # you can either register a single function for all function calls, or specific functions - # llm.register_function(None, fetch_weather_from_api) - llm.register_function("get_current_weather", fetch_weather_from_api) - llm.register_function("get_restaurant_recommendation", fetch_restaurant_recommendation) - - transcript = TranscriptProcessor() - - # Create a standard OpenAI LLM context object using the normal messages format. The - # OpenAIRealtimeBetaLLMService will convert this internally to messages that the - # openai WebSocket API can understand. - context = OpenAILLMContext( - [{"role": "developer", "content": "Say hello!"}], - tools, - ) - - context_aggregator = llm.create_context_aggregator(context) - - pipeline = Pipeline( - [ - transport.input(), # Transport user input - context_aggregator.user(), - llm, # LLM - transcript.user(), # Placed after the LLM, as LLM pushes TranscriptionFrames downstream - transport.output(), # Transport bot output - transcript.assistant(), # After the transcript output, to time with the audio output - context_aggregator.assistant(), - ] - ) - - task = PipelineTask( - pipeline, - params=PipelineParams( - enable_metrics=True, - enable_usage_metrics=True, - ), - idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, - ) - - @transport.event_handler("on_client_connected") - async def on_client_connected(transport, client): - logger.info(f"Client connected") - # Kick off the conversation. - await task.queue_frames([LLMRunFrame()]) - - @transport.event_handler("on_client_disconnected") - async def on_client_disconnected(transport, client): - logger.info(f"Client disconnected") - await task.cancel() - - # Register event handler for transcript updates - @transcript.event_handler("on_transcript_update") - async def on_transcript_update(processor, frame): - for msg in frame.messages: - if isinstance(msg, TranscriptionMessage): - timestamp = f"[{msg.timestamp}] " if msg.timestamp else "" - line = f"{timestamp}{msg.role}: {msg.content}" - logger.info(f"Transcript: {line}") - - runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) - - await runner.run(task) - - -async def bot(runner_args: RunnerArguments): - """Main bot entry point compatible with Pipecat Cloud.""" - transport = await create_transport(runner_args, transport_params) - await run_bot(transport, runner_args) - - -if __name__ == "__main__": - from pipecat.runner.run import main - - main() diff --git a/examples/foundational/19a-azure-realtime-beta.py b/examples/foundational/19a-azure-realtime-beta.py deleted file mode 100644 index 4e9b7bcb1..000000000 --- a/examples/foundational/19a-azure-realtime-beta.py +++ /dev/null @@ -1,214 +0,0 @@ -# -# Copyright (c) 2024-2026, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - - -import os -from datetime import datetime - -from dotenv import load_dotenv -from loguru import logger - -from pipecat.adapters.schemas.function_schema import FunctionSchema -from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.vad.silero import SileroVADAnalyzer -from pipecat.frames.frames import LLMRunFrame -from pipecat.pipeline.pipeline import Pipeline -from pipecat.pipeline.runner import PipelineRunner -from pipecat.pipeline.task import PipelineParams, PipelineTask -from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext -from pipecat.runner.types import RunnerArguments -from pipecat.runner.utils import create_transport -from pipecat.services.llm_service import FunctionCallParams -from pipecat.services.openai_realtime_beta import ( - AzureRealtimeBetaLLMService, - InputAudioTranscription, - SessionProperties, -) -from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.daily.transport import DailyParams -from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams - -load_dotenv(override=True) - - -async def fetch_weather_from_api(params: FunctionCallParams): - temperature = 75 if params.arguments["format"] == "fahrenheit" else 24 - await params.result_callback( - { - "conditions": "nice", - "temperature": temperature, - "format": params.arguments["format"], - "timestamp": datetime.now().strftime("%Y%m%d_%H%M%S"), - } - ) - - -async def fetch_restaurant_recommendation(params: FunctionCallParams): - await params.result_callback({"name": "The Golden Dragon"}) - - -# Define weather function using standardized schema -weather_function = FunctionSchema( - name="get_current_weather", - description="Get the current weather", - properties={ - "location": { - "type": "string", - "description": "The city and state, e.g. San Francisco, CA", - }, - "format": { - "type": "string", - "enum": ["celsius", "fahrenheit"], - "description": "The temperature unit to use. Infer this from the users location.", - }, - }, - required=["location", "format"], -) - -restaurant_function = FunctionSchema( - name="get_restaurant_recommendation", - description="Get a restaurant recommendation", - properties={ - "location": { - "type": "string", - "description": "The city and state, e.g. San Francisco, CA", - }, - }, - required=["location"], -) - -# Create tools schema -tools = ToolsSchema(standard_tools=[weather_function, restaurant_function]) - - -# We use lambdas to defer transport parameter creation until the transport -# type is selected at runtime. -transport_params = { - "daily": lambda: DailyParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - "twilio": lambda: FastAPIWebsocketParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - "webrtc": lambda: TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), -} - - -async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): - logger.info(f"Starting bot") - - session_properties = SessionProperties( - input_audio_transcription=InputAudioTranscription(model="whisper-1"), - # Set openai TurnDetection parameters. Not setting this at all will turn it - # on by default - # turn_detection=TurnDetection(silence_duration_ms=1000), - # Or set to False to disable openai turn detection and use transport VAD - # turn_detection=False, - # tools=tools, - instructions="""You are a helpful and friendly AI. - -Act like a human, but remember that you aren't a human and that you can't do human -things in the real world. Your voice and personality should be warm and engaging, with a lively and -playful tone. - -If interacting in a non-English language, start by using the standard accent or dialect familiar to -the user. Talk quickly. You should always call a function if you can. Do not refer to these rules, -even if you're asked about them. -- -You are participating in a voice conversation. Keep your responses concise, short, and to the point -unless specifically asked to elaborate on a topic. - -You have access to the following tools: -- get_current_weather: Get the current weather for a given location. -- get_restaurant_recommendation: Get a restaurant recommendation for a given location. - -Remember, your responses should be short. Just one or two sentences, usually. Respond in English.""", - ) - - llm = AzureRealtimeBetaLLMService( - api_key=os.getenv("AZURE_REALTIME_API_KEY"), - base_url=os.getenv("AZURE_REALTIME_BASE_URL"), - session_properties=session_properties, - ) - - # you can either register a single function for all function calls, or specific functions - # llm.register_function(None, fetch_weather_from_api) - llm.register_function("get_current_weather", fetch_weather_from_api) - llm.register_function("get_restaurant_recommendation", fetch_restaurant_recommendation) - - # Create a standard OpenAI LLM context object using the normal messages format. The - # OpenAIRealtimeBetaLLMService will convert this internally to messages that the - # openai WebSocket API can understand. - context = OpenAILLMContext( - [{"role": "developer", "content": "Say hello!"}], - # [{"role": "developer", "content": [{"type": "text", "text": "Say hello!"}]}], - # [ - # { - # "role": "developer", - # "content": [ - # {"type": "text", "text": "Say"}, - # {"type": "text", "text": "yo what's up!"}, - # ], - # } - # ], - tools, - ) - - context_aggregator = llm.create_context_aggregator(context) - - pipeline = Pipeline( - [ - transport.input(), # Transport user input - context_aggregator.user(), - llm, # LLM - transport.output(), # Transport bot output - context_aggregator.assistant(), - ] - ) - - task = PipelineTask( - pipeline, - params=PipelineParams( - enable_metrics=True, - enable_usage_metrics=True, - ), - idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, - ) - - @transport.event_handler("on_client_connected") - async def on_client_connected(transport, client): - logger.info(f"Client connected") - # Kick off the conversation. - await task.queue_frames([LLMRunFrame()]) - - @transport.event_handler("on_client_disconnected") - async def on_client_disconnected(transport, client): - logger.info(f"Client disconnected") - await task.cancel() - - runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) - - await runner.run(task) - - -async def bot(runner_args: RunnerArguments): - """Main bot entry point compatible with Pipecat Cloud.""" - transport = await create_transport(runner_args, transport_params) - await run_bot(transport, runner_args) - - -if __name__ == "__main__": - from pipecat.runner.run import main - - main() diff --git a/examples/foundational/19b-openai-realtime-beta-text.py b/examples/foundational/19b-openai-realtime-beta-text.py deleted file mode 100644 index 31f5d0560..000000000 --- a/examples/foundational/19b-openai-realtime-beta-text.py +++ /dev/null @@ -1,215 +0,0 @@ -# -# Copyright (c) 2024-2026, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - - -import os -from datetime import datetime - -from dotenv import load_dotenv -from loguru import logger - -from pipecat.adapters.schemas.function_schema import FunctionSchema -from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.vad.silero import SileroVADAnalyzer -from pipecat.frames.frames import LLMRunFrame -from pipecat.pipeline.pipeline import Pipeline -from pipecat.pipeline.runner import PipelineRunner -from pipecat.pipeline.task import PipelineParams, PipelineTask -from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext -from pipecat.runner.types import RunnerArguments -from pipecat.runner.utils import create_transport -from pipecat.services.cartesia.tts import CartesiaTTSService -from pipecat.services.llm_service import FunctionCallParams -from pipecat.services.openai_realtime_beta import ( - InputAudioNoiseReduction, - InputAudioTranscription, - OpenAIRealtimeBetaLLMService, - SemanticTurnDetection, - SessionProperties, -) -from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.daily.transport import DailyParams -from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams - -load_dotenv(override=True) - - -async def fetch_weather_from_api(params: FunctionCallParams): - temperature = 75 if params.arguments["format"] == "fahrenheit" else 24 - await params.result_callback( - { - "conditions": "nice", - "temperature": temperature, - "format": params.arguments["format"], - "timestamp": datetime.now().strftime("%Y%m%d_%H%M%S"), - } - ) - - -async def fetch_restaurant_recommendation(params: FunctionCallParams): - await params.result_callback({"name": "The Golden Dragon"}) - - -weather_function = FunctionSchema( - name="get_current_weather", - description="Get the current weather", - properties={ - "location": { - "type": "string", - "description": "The city and state, e.g. San Francisco, CA", - }, - "format": { - "type": "string", - "enum": ["celsius", "fahrenheit"], - "description": "The temperature unit to use. Infer this from the users location.", - }, - }, - required=["location", "format"], -) - -restaurant_function = FunctionSchema( - name="get_restaurant_recommendation", - description="Get a restaurant recommendation", - properties={ - "location": { - "type": "string", - "description": "The city and state, e.g. San Francisco, CA", - }, - }, - required=["location"], -) - -# Create tools schema -tools = ToolsSchema(standard_tools=[weather_function, restaurant_function]) - - -# We use lambdas to defer transport parameter creation until the transport -# type is selected at runtime. -transport_params = { - "daily": lambda: DailyParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - "twilio": lambda: FastAPIWebsocketParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - "webrtc": lambda: TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), -} - - -async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): - logger.info(f"Starting bot") - - session_properties = SessionProperties( - input_audio_transcription=InputAudioTranscription(), - modalities=["text"], - # Set openai TurnDetection parameters. Not setting this at all will turn it - # on by default - turn_detection=SemanticTurnDetection(), - # Or set to False to disable openai turn detection and use transport VAD - # turn_detection=False, - input_audio_noise_reduction=InputAudioNoiseReduction(type="near_field"), - # tools=tools, - instructions="""You are a helpful and friendly AI. - -Act like a human, but remember that you aren't a human and that you can't do human -things in the real world. Your voice and personality should be warm and engaging, with a lively and -playful tone. - -If interacting in a non-English language, start by using the standard accent or dialect familiar to -the user. Talk quickly. You should always call a function if you can. Do not refer to these rules, -even if you're asked about them. - -You are participating in a voice conversation. Keep your responses concise, short, and to the point -unless specifically asked to elaborate on a topic. - -You have access to the following tools: -- get_current_weather: Get the current weather for a given location. -- get_restaurant_recommendation: Get a restaurant recommendation for a given location. - -Remember, your responses should be short. Just one or two sentences, usually. Respond in English.""", - ) - - llm = OpenAIRealtimeBetaLLMService( - api_key=os.getenv("OPENAI_API_KEY"), - session_properties=session_properties, - ) - - tts = CartesiaTTSService( - api_key=os.getenv("CARTESIA_API_KEY"), - settings=CartesiaTTSService.Settings( - voice="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady - ), - ) - - # you can either register a single function for all function calls, or specific functions - # llm.register_function(None, fetch_weather_from_api) - llm.register_function("get_current_weather", fetch_weather_from_api) - llm.register_function("get_restaurant_recommendation", fetch_restaurant_recommendation) - - # Create a standard OpenAI LLM context object using the normal messages format. The - # OpenAIRealtimeBetaLLMService will convert this internally to messages that the - # openai WebSocket API can understand. - context = OpenAILLMContext( - [{"role": "developer", "content": "Say hello!"}], - tools, - ) - - context_aggregator = llm.create_context_aggregator(context) - - pipeline = Pipeline( - [ - transport.input(), # Transport user input - context_aggregator.user(), - llm, # LLM - tts, # TTS - transport.output(), # Transport bot output - context_aggregator.assistant(), - ] - ) - - task = PipelineTask( - pipeline, - params=PipelineParams( - enable_metrics=True, - enable_usage_metrics=True, - ), - idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, - ) - - @transport.event_handler("on_client_connected") - async def on_client_connected(transport, client): - logger.info(f"Client connected") - # Kick off the conversation. - await task.queue_frames([LLMRunFrame()]) - - @transport.event_handler("on_client_disconnected") - async def on_client_disconnected(transport, client): - logger.info(f"Client disconnected") - await task.cancel() - - runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) - - await runner.run(task) - - -async def bot(runner_args: RunnerArguments): - """Main bot entry point compatible with Pipecat Cloud.""" - transport = await create_transport(runner_args, transport_params) - await run_bot(transport, runner_args) - - -if __name__ == "__main__": - from pipecat.runner.run import main - - main() diff --git a/examples/foundational/20b-persistent-context-openai-realtime-beta.py b/examples/foundational/20b-persistent-context-openai-realtime-beta.py deleted file mode 100644 index 4b05db618..000000000 --- a/examples/foundational/20b-persistent-context-openai-realtime-beta.py +++ /dev/null @@ -1,267 +0,0 @@ -# -# Copyright (c) 2024-2026, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - -import asyncio -import glob -import json -import os -from datetime import datetime - -from dotenv import load_dotenv -from loguru import logger - -from pipecat.audio.vad.silero import SileroVADAnalyzer -from pipecat.frames.frames import LLMRunFrame -from pipecat.pipeline.pipeline import Pipeline -from pipecat.pipeline.runner import PipelineRunner -from pipecat.pipeline.task import PipelineParams, PipelineTask -from pipecat.processors.aggregators.openai_llm_context import ( - OpenAILLMContext, -) -from pipecat.runner.types import RunnerArguments -from pipecat.runner.utils import create_transport -from pipecat.services.deepgram.stt import DeepgramSTTService -from pipecat.services.llm_service import FunctionCallParams -from pipecat.services.openai_realtime_beta import ( - InputAudioTranscription, - OpenAIRealtimeBetaLLMService, - SessionProperties, - TurnDetection, -) -from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.daily.transport import DailyParams -from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams - -load_dotenv(override=True) - -BASE_FILENAME = "/tmp/pipecat_conversation_" - - -async def fetch_weather_from_api(params: FunctionCallParams): - temperature = 75 if params.arguments["format"] == "fahrenheit" else 24 - await params.result_callback( - { - "conditions": "nice", - "temperature": temperature, - "format": params.arguments["format"], - "timestamp": datetime.now().strftime("%Y%m%d_%H%M%S"), - } - ) - - -async def get_saved_conversation_filenames(params: FunctionCallParams): - # Construct the full pattern including the BASE_FILENAME - full_pattern = f"{BASE_FILENAME}*.json" - - # Use glob to find all matching files - matching_files = glob.glob(full_pattern) - logger.debug(f"matching files: {matching_files}") - - await params.result_callback({"filenames": matching_files}) - - -async def save_conversation(params: FunctionCallParams): - timestamp = datetime.now().strftime("%Y-%m-%d_%H:%M:%S") - filename = f"{BASE_FILENAME}{timestamp}.json" - logger.debug( - f"writing conversation to {filename}\n{json.dumps(params.context.messages, indent=4)}" - ) - try: - with open(filename, "w") as file: - messages = params.context.get_messages_for_persistent_storage() - # remove the last message, which is the instruction we just gave to save the conversation - messages.pop() - json.dump(messages, file, indent=2) - await params.result_callback({"success": True}) - except Exception as e: - await params.result_callback({"success": False, "error": str(e)}) - - -async def load_conversation(params: FunctionCallParams): - async def _reset(): - filename = params.arguments["filename"] - logger.debug(f"loading conversation from {filename}") - try: - with open(filename, "r") as file: - params.context.set_messages(json.load(file)) - await params.llm.reset_conversation() - await params.llm._create_response() - except Exception as e: - await params.result_callback({"success": False, "error": str(e)}) - - asyncio.create_task(_reset()) - - -tools = [ - { - "type": "function", - "name": "get_current_weather", - "description": "Get the current weather", - "parameters": { - "type": "object", - "properties": { - "location": { - "type": "string", - "description": "The city and state, e.g. San Francisco, CA", - }, - "format": { - "type": "string", - "enum": ["celsius", "fahrenheit"], - "description": "The temperature unit to use. Infer this from the users location.", - }, - }, - "required": ["location", "format"], - }, - }, - { - "type": "function", - "name": "save_conversation", - "description": "Save the current conversation. Use this function to persist the current conversation to external storage.", - "parameters": { - "type": "object", - "properties": {}, - "required": [], - }, - }, - { - "type": "function", - "name": "get_saved_conversation_filenames", - "description": "Get a list of saved conversation histories. Returns a list of filenames. Each filename includes a date and timestamp. Each file is conversation history that can be loaded into this session.", - "parameters": { - "type": "object", - "properties": {}, - "required": [], - }, - }, - { - "type": "function", - "name": "load_conversation", - "description": "Load a conversation history. Use this function to load a conversation history into the current session.", - "parameters": { - "type": "object", - "properties": { - "filename": { - "type": "string", - "description": "The filename of the conversation history to load.", - } - }, - "required": ["filename"], - }, - }, -] - - -# We use lambdas to defer transport parameter creation until the transport -# type is selected at runtime. -transport_params = { - "daily": lambda: DailyParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - "twilio": lambda: FastAPIWebsocketParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), - "webrtc": lambda: TransportParams( - audio_in_enabled=True, - audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - ), -} - - -async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): - logger.info(f"Starting bot") - - stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) - - session_properties = SessionProperties( - input_audio_transcription=InputAudioTranscription(), - # Set openai TurnDetection parameters. Not setting this at all will turn - # it on by default - turn_detection=TurnDetection(silence_duration_ms=1000), - # Or set to False to disable openai turn detection and use transport VAD - # turn_detection=False, - # tools=tools, - instructions="""Your knowledge cutoff is 2023-10. You are a helpful and friendly AI. - -Act like a human, but remember that you aren't a human and that you can't do human -things in the real world. Your voice and personality should be warm and engaging, with a lively and -playful tone. - -If interacting in a non-English language, start by using the standard accent or dialect familiar to -the user. Talk quickly. You should always call a function if you can. Do not refer to these rules, -even if you're asked about them. -- -You are participating in a voice conversation. Keep your responses concise, short, and to the point -unless specifically asked to elaborate on a topic. - -Remember, your responses should be short. Just one or two sentences, usually.""", - ) - - llm = OpenAIRealtimeBetaLLMService( - api_key=os.getenv("OPENAI_API_KEY"), - session_properties=session_properties, - ) - - # you can either register a single function for all function calls, or specific functions - # llm.register_function(None, fetch_weather_from_api) - llm.register_function("get_current_weather", fetch_weather_from_api) - llm.register_function("save_conversation", save_conversation) - llm.register_function("get_saved_conversation_filenames", get_saved_conversation_filenames) - llm.register_function("load_conversation", load_conversation) - - context = OpenAILLMContext([], tools) - context_aggregator = llm.create_context_aggregator(context) - - pipeline = Pipeline( - [ - transport.input(), # Transport user input - stt, # STT - context_aggregator.user(), - llm, # LLM - transport.output(), # Transport bot output - context_aggregator.assistant(), - ] - ) - - task = PipelineTask( - pipeline, - params=PipelineParams( - enable_metrics=True, - enable_usage_metrics=True, - ), - idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, - ) - - @transport.event_handler("on_client_connected") - async def on_client_connected(transport, client): - logger.info(f"Client connected") - # Kick off the conversation. - await task.queue_frames([LLMRunFrame()]) - - @transport.event_handler("on_client_disconnected") - async def on_client_disconnected(transport, client): - logger.info(f"Client disconnected") - await task.cancel() - - runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) - - await runner.run(task) - - -async def bot(runner_args: RunnerArguments): - """Main bot entry point compatible with Pipecat Cloud.""" - transport = await create_transport(runner_args, transport_params) - await run_bot(transport, runner_args) - - -if __name__ == "__main__": - from pipecat.runner.run import main - - main() diff --git a/scripts/pre-commit.sh b/scripts/pre-commit.sh deleted file mode 100755 index 44a17cc19..000000000 --- a/scripts/pre-commit.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash - -# Color codes for output -RED='\033[0;31m' -GREEN='\033[0;32m' -NC='\033[0m' # No Color - -echo "🔍 Running pre-commit checks..." - -# Change to project root (one level up from scripts/) -cd "$(dirname "$0")/.." - -# Format check -echo "📝 Checking code formatting..." -if ! NO_COLOR=1 uv run ruff format --diff --check; then - echo -e "${RED}❌ Code formatting issues found. Run 'uv run ruff format' to fix.${NC}" - exit 1 -fi - -# Lint check -echo "🔍 Running linter..." -if ! uv run ruff check; then - echo -e "${RED}❌ Linting issues found.${NC}" - exit 1 -fi - -echo -e "${GREEN}✅ All pre-commit checks passed!${NC}" \ No newline at end of file diff --git a/src/pipecat/adapters/schemas/tools_schema.py b/src/pipecat/adapters/schemas/tools_schema.py index 298cef56d..e3940f6cb 100644 --- a/src/pipecat/adapters/schemas/tools_schema.py +++ b/src/pipecat/adapters/schemas/tools_schema.py @@ -22,12 +22,9 @@ class AdapterType(Enum): Parameters: GEMINI: Google Gemini adapter - currently the only service supporting custom tools. - SHIM: Backward compatibility shim for creating ToolsSchemas from lists of tools in - any format, used by LLMContext.from_openai_context. """ GEMINI = "gemini" # that is the only service where we are able to add custom tools for now - SHIM = "shim" # for use as backward compatibility shim for creating ToolsSchemas from list of tools in any format class ToolsSchema: diff --git a/src/pipecat/adapters/services/aws_nova_sonic_adapter.py b/src/pipecat/adapters/services/aws_nova_sonic_adapter.py index 492e02db6..e481f9971 100644 --- a/src/pipecat/adapters/services/aws_nova_sonic_adapter.py +++ b/src/pipecat/adapters/services/aws_nova_sonic_adapter.py @@ -222,18 +222,4 @@ class AWSNovaSonicLLMAdapter(BaseLLMAdapter[AWSNovaSonicLLMInvocationParams]): List of dictionaries in AWS Nova Sonic function format. """ functions_schema = tools_schema.standard_tools - standard_tools = [ - self._to_aws_nova_sonic_function_format(func) for func in functions_schema - ] - - # For backward compatibility, AWS Nova Sonic can still be used with - # tools in dict format, even though it always uses `LLMContext` under - # the hood (via `LLMContext.from_openai_context()`). - # To support this behavior, we use "shimmed" custom tools here. - # (We maintain this backward compatibility because users aren't - # *knowingly* opting into the new `LLMContext`.) - shimmed_tools = [] - if tools_schema.custom_tools: - shimmed_tools = tools_schema.custom_tools.get(AdapterType.SHIM, []) - - return standard_tools + shimmed_tools + return [self._to_aws_nova_sonic_function_format(func) for func in functions_schema] diff --git a/src/pipecat/adapters/services/grok_realtime_adapter.py b/src/pipecat/adapters/services/grok_realtime_adapter.py index 4e5e2a8c5..1c1f7336d 100644 --- a/src/pipecat/adapters/services/grok_realtime_adapter.py +++ b/src/pipecat/adapters/services/grok_realtime_adapter.py @@ -256,11 +256,4 @@ class GrokRealtimeLLMAdapter(BaseLLMAdapter): """ # Convert standard function tools functions_schema = tools_schema.standard_tools - standard_tools = [self._to_grok_function_format(func) for func in functions_schema] - - # Support shimmed custom tools for backward compatibility - shimmed_tools = [] - if tools_schema.custom_tools: - shimmed_tools = tools_schema.custom_tools.get(AdapterType.SHIM, []) - - return standard_tools + shimmed_tools + return [self._to_grok_function_format(func) for func in functions_schema] diff --git a/src/pipecat/adapters/services/open_ai_realtime_adapter.py b/src/pipecat/adapters/services/open_ai_realtime_adapter.py index 3c394d99b..4732f70b7 100644 --- a/src/pipecat/adapters/services/open_ai_realtime_adapter.py +++ b/src/pipecat/adapters/services/open_ai_realtime_adapter.py @@ -236,18 +236,4 @@ class OpenAIRealtimeLLMAdapter(BaseLLMAdapter): List of function definitions in OpenAI Realtime format. """ functions_schema = tools_schema.standard_tools - standard_tools = [ - self._to_openai_realtime_function_format(func) for func in functions_schema - ] - - # For backward compatibility, OpenAI Realtime can still be used with - # tools in dict format, even though it always uses `LLMContext` under - # the hood (via `LLMContext.from_openai_context()`). - # To support this behavior, we use "shimmed" custom tools here. - # (We maintain this backward compatibility because users aren't - # *knowingly* opting into the new `LLMContext`.) - shimmed_tools = [] - if tools_schema.custom_tools: - shimmed_tools = tools_schema.custom_tools.get(AdapterType.SHIM, []) - - return standard_tools + shimmed_tools + return [self._to_openai_realtime_function_format(func) for func in functions_schema] diff --git a/src/pipecat/extensions/ivr/ivr_navigator.py b/src/pipecat/extensions/ivr/ivr_navigator.py index 64a6d0942..a2ff0cde5 100644 --- a/src/pipecat/extensions/ivr/ivr_navigator.py +++ b/src/pipecat/extensions/ivr/ivr_navigator.py @@ -31,7 +31,6 @@ from pipecat.frames.frames import ( VADParamsUpdateFrame, ) from pipecat.pipeline.pipeline import Pipeline -from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContextFrame from pipecat.processors.frame_processor import FrameDirection, FrameProcessor from pipecat.services.llm_service import LLMService from pipecat.utils.text.pattern_pair_aggregator import ( @@ -444,7 +443,7 @@ Remember: Respond with `NUMBER` (single or multiple for sequences), frame: The frame to process. direction: The direction of frame flow in the pipeline. """ - if isinstance(frame, (OpenAILLMContextFrame, LLMContextFrame)): + if isinstance(frame, LLMContextFrame): # Extract messages and pass to IVR processor all_messages = frame.context.get_messages() diff --git a/src/pipecat/frames/frames.py b/src/pipecat/frames/frames.py index 29369f03a..bf5319405 100644 --- a/src/pipecat/frames/frames.py +++ b/src/pipecat/frames/frames.py @@ -451,36 +451,6 @@ class TranslationFrame(TextFrame): return f"{self.name}(user: {self.user_id}, text: [{self.text}], language: {self.language}, timestamp: {self.timestamp})" -@dataclass -class OpenAILLMContextAssistantTimestampFrame(DataFrame): - """Timestamp information for assistant messages in LLM context. - - .. deprecated:: 0.0.99 - `OpenAILLMContextAssistantTimestampFrame` is deprecated and will be removed in a future version. - Use `LLMContextAssistantTimestampFrame` with the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. - - Parameters: - timestamp: Timestamp when the assistant message was created. - """ - - timestamp: str - - def __post_init__(self): - super().__post_init__() - import warnings - - with warnings.catch_warnings(): - warnings.simplefilter("always") - warnings.warn( - "OpenAILLMContextAssistantTimestampFrame is deprecated and will be removed in a future version. " - "Use LLMContextAssistantTimestampFrame with the universal LLMContext and LLMContextAggregatorPair instead. " - "See OpenAILLMContext docstring for migration guide.", - DeprecationWarning, - stacklevel=2, - ) - - @dataclass class LLMContextAssistantTimestampFrame(DataFrame): """Timestamp information for assistant messages in LLM context. @@ -706,44 +676,6 @@ class LLMThoughtEndFrame(ControlFrame): return f"{self.name}(pts: {pts}, signature: {self.signature})" -@dataclass -class LLMMessagesFrame(DataFrame): - """Frame containing LLM messages for chat completion. - - .. deprecated:: 0.0.79 - This class is deprecated and will be removed in a future version. - Instead, use either: - - `LLMMessagesUpdateFrame` with `run_llm=True` - - `OpenAILLMContextFrame` with desired messages in a new context - - A frame containing a list of LLM messages. Used to signal that an LLM - service should run a chat completion and emit an LLMFullResponseStartFrame, - TextFrames and an LLMFullResponseEndFrame. Note that the `messages` - property in this class is mutable, and will be updated by various - aggregators. - - Parameters: - messages: List of message dictionaries in LLM format. - """ - - messages: List[dict] - - def __post_init__(self): - super().__post_init__() - import warnings - - with warnings.catch_warnings(): - warnings.simplefilter("always") - warnings.warn( - "LLMMessagesFrame is deprecated and will be removed in a future version. " - "Instead, use either " - "`LLMMessagesUpdateFrame` with `run_llm=True`, or " - "`OpenAILLMContextFrame` with desired messages in a new context", - DeprecationWarning, - stacklevel=2, - ) - - @dataclass class LLMRunFrame(DataFrame): """Frame to trigger LLM processing with current context. diff --git a/src/pipecat/observers/loggers/llm_log_observer.py b/src/pipecat/observers/loggers/llm_log_observer.py index 07212e4c9..31fd0142e 100644 --- a/src/pipecat/observers/loggers/llm_log_observer.py +++ b/src/pipecat/observers/loggers/llm_log_observer.py @@ -14,11 +14,9 @@ from pipecat.frames.frames import ( LLMContextFrame, LLMFullResponseEndFrame, LLMFullResponseStartFrame, - LLMMessagesFrame, LLMTextFrame, ) from pipecat.observers.base_observer import BaseObserver, FramePushed -from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContextFrame from pipecat.processors.frame_processor import FrameDirection from pipecat.services.llm_service import LLMService @@ -32,8 +30,6 @@ class LLMLogObserver(BaseObserver): - LLMFullResponseEndFrame - LLMTextFrame - FunctionCallInProgressFrame - - LLMMessagesFrame - - OpenAILLMContextFrame This allows you to track when the LLM starts responding, what it generates, and when it finishes. @@ -74,18 +70,9 @@ class LLMLogObserver(BaseObserver): logger.debug( f"🧠 {src} {arrow} LLM FUNCTION CALL ({frame.tool_call_id}): {frame.function_name!r}({frame.arguments}) at {time_sec:.2f}s" ) - # Log LLMMessagesFrame (input) - elif isinstance(frame, LLMMessagesFrame): - logger.debug( - f"🧠 {arrow} {dst} LLM MESSAGES FRAME: {frame.messages} at {time_sec:.2f}s" - ) - # Log OpenAILLMContextFrame (input) - elif isinstance(frame, (LLMContextFrame, OpenAILLMContextFrame)): - messages = ( - frame.context.messages - if isinstance(frame, OpenAILLMContextFrame) - else frame.context.get_messages() - ) + # Log LLMContextFrame (input) + elif isinstance(frame, LLMContextFrame): + messages = frame.context.get_messages() logger.debug(f"🧠 {arrow} {dst} LLM CONTEXT FRAME: {messages} at {time_sec:.2f}s") # Log function call result (input) elif isinstance(frame, FunctionCallResultFrame): diff --git a/src/pipecat/pipeline/task.py b/src/pipecat/pipeline/task.py index d33fcbd2c..e5a6ad7ff 100644 --- a/src/pipecat/pipeline/task.py +++ b/src/pipecat/pipeline/task.py @@ -48,7 +48,6 @@ from pipecat.pipeline.base_pipeline import BasePipeline from pipecat.pipeline.base_task import BasePipelineTask, PipelineTaskParams from pipecat.pipeline.pipeline import Pipeline, PipelineSink, PipelineSource from pipecat.pipeline.task_observer import TaskObserver -from pipecat.processors.aggregators.llm_response import LLMUserContextAggregator from pipecat.processors.frame_processor import FrameDirection, FrameProcessor, FrameProcessorSetup from pipecat.processors.frameworks.rtvi import RTVIObserver, RTVIObserverParams, RTVIProcessor from pipecat.utils.asyncio.task_manager import BaseTaskManager, TaskManager, TaskManagerParams @@ -1028,10 +1027,6 @@ class PipelineTask(BasePipelineTask): """Build and return start metadata including user-provided values.""" start_metadata = {} - # NOTE(aleix): Remove when OpenAILLMContext/LLMUserContextAggregator is removed. - if self._find_processor(self._pipeline, LLMUserContextAggregator): - start_metadata["deprecated_openaillmcontext"] = True - # Update with user provided metadata. start_metadata.update(self._params.start_metadata) diff --git a/src/pipecat/processors/aggregators/gated_llm_context.py b/src/pipecat/processors/aggregators/gated_llm_context.py index cd1c587e9..fdcc9a025 100644 --- a/src/pipecat/processors/aggregators/gated_llm_context.py +++ b/src/pipecat/processors/aggregators/gated_llm_context.py @@ -7,7 +7,6 @@ """Gated LLM context aggregator for controlled message flow.""" from pipecat.frames.frames import CancelFrame, EndFrame, Frame, LLMContextFrame, StartFrame -from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContextFrame from pipecat.processors.frame_processor import FrameDirection, FrameProcessor from pipecat.utils.sync.base_notifier import BaseNotifier @@ -49,7 +48,7 @@ class GatedLLMContextAggregator(FrameProcessor): if isinstance(frame, (EndFrame, CancelFrame)): await self._stop() await self.push_frame(frame) - elif isinstance(frame, (LLMContextFrame, OpenAILLMContextFrame)): + elif isinstance(frame, LLMContextFrame): if self._start_open: self._start_open = False await self.push_frame(frame, direction) diff --git a/src/pipecat/processors/aggregators/gated_open_ai_llm_context.py b/src/pipecat/processors/aggregators/gated_open_ai_llm_context.py deleted file mode 100644 index 0cdb366d3..000000000 --- a/src/pipecat/processors/aggregators/gated_open_ai_llm_context.py +++ /dev/null @@ -1,12 +0,0 @@ -# -# Copyright (c) 2024-2026, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - -"""Gated OpenAI LLM context aggregator for controlled message flow.""" - -from pipecat.processors.aggregators.gated_llm_context import GatedLLMContextAggregator - -# Alias for backward compatibility with the previous name -GatedOpenAILLMContextAggregator = GatedLLMContextAggregator diff --git a/src/pipecat/processors/aggregators/llm_context.py b/src/pipecat/processors/aggregators/llm_context.py index 1375b8297..eb98571cc 100644 --- a/src/pipecat/processors/aggregators/llm_context.py +++ b/src/pipecat/processors/aggregators/llm_context.py @@ -33,9 +33,6 @@ from PIL import Image from pipecat.adapters.schemas.tools_schema import AdapterType, ToolsSchema from pipecat.frames.frames import AudioRawFrame -if TYPE_CHECKING: - from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext - # "Re-export" types from OpenAI that we're using as universal context types. # NOTE: if universal message types need to someday diverge from OpenAI's, we # should consider managing our own definitions. But we should do so carefully, @@ -70,51 +67,6 @@ class LLMContext: and content formatting. """ - @staticmethod - def from_openai_context(openai_context: "OpenAILLMContext") -> "LLMContext": - """Create a universal LLM context from an OpenAI-specific context. - - NOTE: this should only be used internally, for facilitating migration - from OpenAILLMContext to LLMContext. New user code should use - LLMContext directly. - - .. deprecated:: 0.0.99 - `from_openai_context()` is deprecated and will be removed in a future version. - Directly use the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. - - Args: - openai_context: The OpenAI LLM context to convert. - - Returns: - New LLMContext instance with converted messages and settings. - """ - import warnings - - with warnings.catch_warnings(): - warnings.simplefilter("always") - warnings.warn( - "from_openai_context() (likely invoked by create_context_aggregator()) is deprecated and will be removed in a future version. " - "Directly use the universal LLMContext and LLMContextAggregatorPair instead. " - "See OpenAILLMContext docstring for migration guide.", - DeprecationWarning, - stacklevel=2, - ) - - # Convert tools to ToolsSchema if needed. - # If the tools are already a ToolsSchema, this is a no-op. - # Otherwise, we wrap them in a shim ToolsSchema. - converted_tools = openai_context.tools - if isinstance(converted_tools, list): - converted_tools = ToolsSchema( - standard_tools=[], custom_tools={AdapterType.SHIM: converted_tools} - ) - return LLMContext( - messages=openai_context.get_messages(), - tools=converted_tools, - tool_choice=openai_context.tool_choice, - ) - def __init__( self, messages: Optional[List[LLMContextMessage]] = None, @@ -246,33 +198,6 @@ class LLMContext: """ return self.get_messages() - def get_messages_for_persistent_storage(self) -> List[LLMContextMessage]: - """Get messages suitable for persistent storage. - - NOTE: the only reason this method exists is because we're "silently" - switching from OpenAILLMContext to LLMContext under the hood in some - services and don't want to trip up users who may have been relying on - this method, which is part of the public API of OpenAILLMContext but - doesn't need to be for LLMContext. - - .. deprecated:: 0.0.92 - Use `get_messages()` instead. - - Returns: - List of conversation messages. - """ - import warnings - - with warnings.catch_warnings(): - warnings.simplefilter("always") - warnings.warn( - "get_messages_for_persistent_storage() is deprecated, use get_messages() instead.", - DeprecationWarning, - stacklevel=2, - ) - - return self.get_messages() - def get_messages(self, llm_specific_filter: Optional[str] = None) -> List[LLMContextMessage]: """Get the current messages list. diff --git a/src/pipecat/processors/aggregators/llm_response.py b/src/pipecat/processors/aggregators/llm_response.py index 7c246b209..a1d4399cb 100644 --- a/src/pipecat/processors/aggregators/llm_response.py +++ b/src/pipecat/processors/aggregators/llm_response.py @@ -4,106 +4,17 @@ # SPDX-License-Identifier: BSD 2-Clause License # -"""LLM response aggregators for handling conversation context and message aggregation. +"""LLM response aggregator for collecting complete LLM responses.""" -This module provides aggregators that process and accumulate LLM responses, user inputs, -and conversation context. These aggregators handle the flow between speech-to-text, -LLM processing, and text-to-speech components in conversational AI pipelines. -""" - -import asyncio -import warnings -from abc import abstractmethod -from dataclasses import dataclass -from typing import Dict, List, Literal, Optional, Set - -from loguru import logger - -from pipecat.audio.interruptions.base_interruption_strategy import BaseInterruptionStrategy -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams -from pipecat.audio.vad.vad_analyzer import VADParams from pipecat.frames.frames import ( - BotStartedSpeakingFrame, - BotStoppedSpeakingFrame, - CancelFrame, - EmulateUserStartedSpeakingFrame, - EmulateUserStoppedSpeakingFrame, - EndFrame, Frame, - FunctionCallCancelFrame, - FunctionCallInProgressFrame, - FunctionCallResultFrame, - FunctionCallsStartedFrame, - InputAudioRawFrame, - InterimTranscriptionFrame, InterruptionFrame, LLMFullResponseEndFrame, LLMFullResponseStartFrame, - LLMMessagesAppendFrame, - LLMMessagesFrame, - LLMMessagesUpdateFrame, - LLMRunFrame, - LLMSetToolChoiceFrame, - LLMSetToolsFrame, LLMTextFrame, - OpenAILLMContextAssistantTimestampFrame, - SpeechControlParamsFrame, - StartFrame, TextFrame, - TranscriptionFrame, - UserImageRawFrame, - UserStartedSpeakingFrame, - UserStoppedSpeakingFrame, -) -from pipecat.processors.aggregators.openai_llm_context import ( - OpenAILLMContext, - OpenAILLMContextFrame, ) from pipecat.processors.frame_processor import FrameDirection, FrameProcessor -from pipecat.utils.time import time_now_iso8601 - - -@dataclass -class LLMUserAggregatorParams: - """Parameters for configuring LLM user aggregation behavior. - - .. deprecated:: 0.0.99 - This class is deprecated, use the new universal `LLMContext` and - `LLMContextAggregatorPair`. - - Parameters: - aggregation_timeout: Maximum time in seconds to wait for additional - transcription content before pushing aggregated result. This - timeout is used only when the transcription is slow to arrive. - turn_emulated_vad_timeout: Maximum time in seconds to wait for emulated - VAD when using turn-based analysis. Applied when transcription is - received but VAD didn't detect speech (e.g., whispered utterances). - enable_emulated_vad_interruptions: When True, allows emulated VAD events - to interrupt the bot when it's speaking. When False, emulated speech - is ignored while the bot is speaking. - """ - - aggregation_timeout: float = 0.5 - turn_emulated_vad_timeout: float = 0.8 - enable_emulated_vad_interruptions: bool = False - - -@dataclass -class LLMAssistantAggregatorParams: - """Parameters for configuring LLM assistant aggregation behavior. - - .. deprecated:: 0.0.99 - This class is deprecated, use the new universal `LLMContext` and - `LLMContextAggregatorPair`. - - Parameters: - expect_stripped_words: Whether to expect and handle stripped words - in text frames by adding spaces between tokens. This parameter is - ignored when used with the newer LLMAssistantAggregator, which - handles word spacing automatically. - """ - - expect_stripped_words: bool = True class LLMFullResponseAggregator(FrameProcessor): @@ -173,993 +84,3 @@ class LLMFullResponseAggregator(FrameProcessor): if not self._started: return self._aggregation += frame.text - - -class BaseLLMResponseAggregator(FrameProcessor): - """Base class for all LLM response aggregators. - - These aggregators process incoming frames and aggregate content until they are - ready to push the aggregation downstream. They maintain conversation state - and handle message flow between different components in the pipeline. - - The aggregators keep a store (e.g. message list or LLM context) of the current - conversation, storing messages from both users and the bot. - - .. deprecated:: 0.0.99 - `BaseLLMResponseAggregator` is deprecated and will be removed in a future version. - Use the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. - """ - - def __init__(self, **kwargs): - """Initialize the base LLM response aggregator. - - Args: - **kwargs: Additional arguments passed to parent FrameProcessor. - - .. deprecated:: 0.0.99 - `BaseLLMResponseAggregator` is deprecated and will be removed in a future version. - Use the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. - """ - with warnings.catch_warnings(): - warnings.simplefilter("always") - warnings.warn( - f"{self.__class__.__name__} (likely created with create_context_aggregator()) is deprecated and will be removed in a future version. " - "Use the universal LLMContext and LLMContextAggregatorPair instead. " - "See OpenAILLMContext docstring for migration guide.", - DeprecationWarning, - stacklevel=2, - ) - super().__init__(**kwargs) - - @property - @abstractmethod - def messages(self) -> List[dict]: - """Get the messages from the current conversation. - - Returns: - List of message dictionaries representing the conversation history. - """ - pass - - @property - @abstractmethod - def role(self) -> str: - """Get the role for this aggregator. - - Returns: - The role string (e.g. "user", "assistant") for this aggregator. - """ - pass - - @abstractmethod - def add_messages(self, messages): - """Add the given messages to the conversation. - - Args: - messages: Messages to append to the conversation history. - """ - pass - - @abstractmethod - def set_messages(self, messages): - """Reset the conversation with the given messages. - - Args: - messages: Messages to replace the current conversation history. - """ - pass - - @abstractmethod - def set_tools(self, tools): - """Set LLM tools to be used in the current conversation. - - Args: - tools: List of tool definitions for the LLM to use. - """ - pass - - @abstractmethod - def set_tool_choice(self, tool_choice): - """Set the tool choice for the LLM. - - Args: - tool_choice: Tool choice configuration for the LLM context. - """ - pass - - @abstractmethod - async def reset(self): - """Reset the internal state of this aggregator. - - This should clear aggregation state but not modify the conversation messages. - """ - pass - - @abstractmethod - async def handle_aggregation(self, aggregation: str): - """Add the given aggregation to the conversation store. - - Args: - aggregation: The aggregated text content to add to the conversation. - """ - pass - - @abstractmethod - async def push_aggregation(self): - """Push the current aggregation downstream. - - The specific frame type pushed depends on the aggregator implementation - (e.g. context frame, messages frame). - """ - pass - - -class LLMContextResponseAggregator(BaseLLMResponseAggregator): - """Base LLM aggregator that uses an OpenAI LLM context for conversation storage. - - This aggregator maintains conversation state using an OpenAILLMContext and - pushes OpenAILLMContextFrame objects as aggregation frames. It provides - common functionality for context-based conversation management. - - .. deprecated:: 0.0.99 - `LLMContextResponseAggregator` is deprecated and will be removed in a future version. - Use the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. - """ - - def __init__(self, *, context: OpenAILLMContext, role: str, **kwargs): - """Initialize the context response aggregator. - - Args: - context: The OpenAI LLM context to use for conversation storage. - role: The role this aggregator represents (e.g. "user", "assistant"). - **kwargs: Additional arguments passed to parent class. - - .. deprecated:: 0.0.99 - `LLMContextResponseAggregator` is deprecated and will be removed in a future version. - Use the universal `LLMUserAggregator` and `LLMAssistantAggregator` instead. - See `OpenAILLMContext` docstring for migration guide. - """ - # Super handles deprecation warning - super().__init__(**kwargs) - self._context = context - self._role = role - - self._aggregation: str = "" - - @property - def messages(self) -> List[dict]: - """Get messages from the LLM context. - - Returns: - List of message dictionaries from the context. - """ - return self._context.get_messages() - - @property - def role(self) -> str: - """Get the role for this aggregator. - - Returns: - The role string for this aggregator. - """ - return self._role - - @property - def context(self): - """Get the OpenAI LLM context. - - Returns: - The OpenAILLMContext instance used by this aggregator. - """ - return self._context - - def get_context_frame(self) -> OpenAILLMContextFrame: - """Create a context frame with the current context. - - .. deprecated:: 0.0.82 - This method is deprecated and will be removed in a future version. - - Returns: - LLMContextFrame containing the current context. - """ - with warnings.catch_warnings(): - warnings.simplefilter("always") - warnings.warn( - "get_context_frame() is deprecated and will be removed in a future version. To trigger an LLM response, use LLMRunFrame instead.", - DeprecationWarning, - stacklevel=2, - ) - return self._get_context_frame() - - def _get_context_frame(self) -> OpenAILLMContextFrame: - return OpenAILLMContextFrame(context=self._context) - - async def push_context_frame(self, direction: FrameDirection = FrameDirection.DOWNSTREAM): - """Push a context frame in the specified direction. - - Args: - direction: The direction to push the frame (upstream or downstream). - """ - frame = self._get_context_frame() - await self.push_frame(frame, direction) - - def add_messages(self, messages): - """Add messages to the context. - - Args: - messages: Messages to add to the conversation context. - """ - self._context.add_messages(messages) - - def set_messages(self, messages): - """Set the context messages. - - Args: - messages: Messages to replace the current context messages. - """ - self._context.set_messages(messages) - - def set_tools(self, tools: List): - """Set tools in the context. - - Args: - tools: List of tool definitions to set in the context. - """ - self._context.set_tools(tools) - - def set_tool_choice(self, tool_choice: Literal["none", "auto", "required"] | dict): - """Set tool choice in the context. - - Args: - tool_choice: Tool choice configuration for the context. - """ - self._context.set_tool_choice(tool_choice) - - async def reset(self): - """Reset the aggregation state.""" - self._aggregation = "" - - -class LLMUserContextAggregator(LLMContextResponseAggregator): - """User LLM aggregator that processes speech-to-text transcriptions. - - This aggregator handles the complex logic of aggregating user speech transcriptions - from STT services. It manages multiple scenarios including: - - - Transcriptions received between VAD events - - Transcriptions received outside VAD events - - Interim vs final transcriptions - - User interruptions during bot speech - - Emulated VAD for whispered or short utterances - - The aggregator uses timeouts to handle cases where transcriptions arrive - after VAD events or when no VAD is available. - - .. deprecated:: 0.0.99 - `LLMUserContextAggregator` is deprecated and will be removed in a future version. - Use the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. - """ - - def __init__( - self, - context: OpenAILLMContext, - *, - params: Optional[LLMUserAggregatorParams] = None, - **kwargs, - ): - """Initialize the user context aggregator. - - Args: - context: The OpenAI LLM context for conversation storage. - params: Configuration parameters for aggregation behavior. - **kwargs: Additional arguments. Supports deprecated 'aggregation_timeout'. - - .. deprecated:: 0.0.99 - `LLMUserContextAggregator` is deprecated and will be removed in a future version. - Use the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. - """ - # Super handles deprecation warning - super().__init__(context=context, role="user", **kwargs) - self._params = params or LLMUserAggregatorParams() - self._vad_params: Optional[VADParams] = None - self._turn_params: Optional[SmartTurnParams] = None - - if "aggregation_timeout" in kwargs: - with warnings.catch_warnings(): - warnings.simplefilter("always") - warnings.warn( - "Parameter 'aggregation_timeout' is deprecated, use 'params' instead.", - DeprecationWarning, - ) - - self._params.aggregation_timeout = kwargs["aggregation_timeout"] - - self._user_speaking = False - self._bot_speaking = False - self._was_bot_speaking = False - self._emulating_vad = False - self._seen_interim_results = False - self._waiting_for_aggregation = False - - self._aggregation_event = asyncio.Event() - self._aggregation_task = None - - async def reset(self): - """Reset the aggregation state and interruption strategies.""" - await super().reset() - self._was_bot_speaking = False - self._seen_interim_results = False - self._waiting_for_aggregation = False - [await s.reset() for s in self._interruption_strategies] - - async def handle_aggregation(self, aggregation: str): - """Add the aggregated user text to the context. - - Args: - aggregation: The aggregated user text to add as a user message. - """ - self._context.add_message({"role": self.role, "content": aggregation}) - - async def process_frame(self, frame: Frame, direction: FrameDirection): - """Process frames for user speech aggregation and context management. - - Args: - frame: The frame to process. - direction: The direction of frame flow in the pipeline. - """ - await super().process_frame(frame, direction) - - if isinstance(frame, StartFrame): - # Push StartFrame before start(), because we want StartFrame to be - # processed by every processor before any other frame is processed. - await self.push_frame(frame, direction) - await self._start(frame) - elif isinstance(frame, EndFrame): - # Push EndFrame before stop(), because stop() waits on the task to - # finish and the task finishes when EndFrame is processed. - await self.push_frame(frame, direction) - await self._stop(frame) - elif isinstance(frame, CancelFrame): - await self._cancel(frame) - await self.push_frame(frame, direction) - elif isinstance(frame, InputAudioRawFrame): - await self._handle_input_audio(frame) - await self.push_frame(frame, direction) - elif isinstance(frame, UserStartedSpeakingFrame): - await self._handle_user_started_speaking(frame) - await self.push_frame(frame, direction) - elif isinstance(frame, UserStoppedSpeakingFrame): - await self._handle_user_stopped_speaking(frame) - await self.push_frame(frame, direction) - elif isinstance(frame, BotStartedSpeakingFrame): - await self._handle_bot_started_speaking(frame) - await self.push_frame(frame, direction) - elif isinstance(frame, BotStoppedSpeakingFrame): - await self._handle_bot_stopped_speaking(frame) - await self.push_frame(frame, direction) - elif isinstance(frame, TranscriptionFrame): - await self._handle_transcription(frame) - elif isinstance(frame, InterimTranscriptionFrame): - await self._handle_interim_transcription(frame) - elif isinstance(frame, LLMRunFrame): - await self._handle_llm_run(frame) - elif isinstance(frame, LLMMessagesAppendFrame): - await self._handle_llm_messages_append(frame) - elif isinstance(frame, LLMMessagesUpdateFrame): - await self._handle_llm_messages_update(frame) - elif isinstance(frame, LLMSetToolsFrame): - self.set_tools(frame.tools) - elif isinstance(frame, LLMSetToolChoiceFrame): - self.set_tool_choice(frame.tool_choice) - elif isinstance(frame, SpeechControlParamsFrame): - self._vad_params = frame.vad_params - self._turn_params = frame.turn_params - await self.push_frame(frame, direction) - else: - await self.push_frame(frame, direction) - - async def _process_aggregation(self): - """Process the current aggregation and push it downstream.""" - aggregation = self._aggregation - await self.reset() - await self.handle_aggregation(aggregation) - frame = OpenAILLMContextFrame(self._context) - await self.push_frame(frame) - - async def push_aggregation(self): - """Push the current aggregation based on interruption strategies and conditions.""" - if len(self._aggregation) > 0: - if self.interruption_strategies and self._bot_speaking: - should_interrupt = await self._should_interrupt_based_on_strategies() - - if should_interrupt: - logger.debug( - "Interruption conditions met - pushing interruption and aggregation" - ) - await self.broadcast_interruption() - await self._process_aggregation() - else: - logger.debug("Interruption conditions not met - not pushing aggregation") - # Don't process aggregation, just reset it - await self.reset() - else: - # No interruption config - normal behavior (always push aggregation) - await self._process_aggregation() - # Handles the case where both the user and the bot are not speaking, - # and the bot was previously speaking before the user interruption. - # Normally, when the user stops speaking, new text is expected, - # which triggers the bot to respond. However, if no new text - # is received, this safeguard ensures - # the bot doesn't hang indefinitely while waiting to speak again. - elif not self._seen_interim_results and self._was_bot_speaking and not self._bot_speaking: - logger.warning("User stopped speaking but no new aggregation received.") - # Resetting it so we don't trigger this twice - self._was_bot_speaking = False - # TODO: we are not enabling this for now, due to some STT services which can take as long as 2 seconds two return a transcription - # So we need more tests and probably make this feature configurable, disabled it by default. - # We are just pushing the same previous context to be processed again in this case - # await self.push_frame(OpenAILLMContextFrame(self._context)) - - async def _should_interrupt_based_on_strategies(self) -> bool: - """Check if interruption should occur based on configured strategies. - - Returns: - True if any interruption strategy indicates interruption should occur. - """ - - async def should_interrupt(strategy: BaseInterruptionStrategy): - await strategy.append_text(self._aggregation) - return await strategy.should_interrupt() - - return any([await should_interrupt(s) for s in self._interruption_strategies]) - - async def _start(self, frame: StartFrame): - self._create_aggregation_task() - - async def _stop(self, frame: EndFrame): - await self._cancel_aggregation_task() - - async def _cancel(self, frame: CancelFrame): - await self._cancel_aggregation_task() - - async def _handle_llm_run(self, frame: LLMRunFrame): - await self.push_context_frame() - - async def _handle_llm_messages_append(self, frame: LLMMessagesAppendFrame): - self.add_messages(frame.messages) - if frame.run_llm: - await self.push_context_frame() - - async def _handle_llm_messages_update(self, frame: LLMMessagesUpdateFrame): - self.set_messages(frame.messages) - if frame.run_llm: - await self.push_context_frame() - - async def _handle_input_audio(self, frame: InputAudioRawFrame): - for s in self.interruption_strategies: - await s.append_audio(frame.audio, frame.sample_rate) - - async def _handle_user_started_speaking(self, frame: UserStartedSpeakingFrame): - self._user_speaking = True - self._waiting_for_aggregation = True - self._was_bot_speaking = self._bot_speaking - - # If we get a non-emulated UserStartedSpeakingFrame but we are in the - # middle of emulating VAD, let's stop emulating VAD (i.e. don't send the - # EmulateUserStoppedSpeakingFrame). - if not frame.emulated and self._emulating_vad: - self._emulating_vad = False - - async def _handle_user_stopped_speaking(self, _: UserStoppedSpeakingFrame): - self._user_speaking = False - # We just stopped speaking. Let's see if there's some aggregation to - # push. If the last thing we saw is an interim transcription, let's wait - # pushing the aggregation as we will probably get a final transcription. - if len(self._aggregation) > 0: - if not self._seen_interim_results: - await self.push_aggregation() - # Handles the case where both the user and the bot are not speaking, - # and the bot was previously speaking before the user interruption. - # So in this case we are resetting the aggregation timer - elif not self._seen_interim_results and self._was_bot_speaking and not self._bot_speaking: - # Reset aggregation timer. - self._aggregation_event.set() - - async def _handle_bot_started_speaking(self, _: BotStartedSpeakingFrame): - self._bot_speaking = True - - async def _handle_bot_stopped_speaking(self, _: BotStoppedSpeakingFrame): - self._bot_speaking = False - - async def _handle_transcription(self, frame: TranscriptionFrame): - text = frame.text - - # Make sure we really have some text. - if not text.strip(): - return - - self._aggregation += f" {text}" if self._aggregation else text - # We just got a final result, so let's reset interim results. - self._seen_interim_results = False - # Reset aggregation timer. - self._aggregation_event.set() - - async def _handle_interim_transcription(self, _: InterimTranscriptionFrame): - self._seen_interim_results = True - - def _create_aggregation_task(self): - if not self._aggregation_task: - self._aggregation_task = self.create_task(self._aggregation_task_handler()) - - async def _cancel_aggregation_task(self): - if self._aggregation_task: - await self.cancel_task(self._aggregation_task) - self._aggregation_task = None - - async def _aggregation_task_handler(self): - while True: - try: - # The _aggregation_task_handler handles two distinct timeout scenarios: - # - # 1. When emulating_vad=True: Wait for emulated VAD timeout before - # pushing aggregation (simulating VAD behavior when no actual VAD - # detection occurred). - # - # 2. When emulating_vad=False: Use aggregation_timeout as a buffer - # to wait for potential late-arriving transcription frames after - # a real VAD event. - # - # For emulated VAD scenarios, the timeout strategy depends on whether - # a turn analyzer is configured: - # - # - WITH turn analyzer: Use turn_emulated_vad_timeout parameter because - # the VAD's stop_secs is set very low (e.g. 0.2s) for rapid speech - # chunking to feed the turn analyzer. This low value is too fast - # for emulated VAD scenarios where we need to allow users time to - # finish speaking (e.g. 0.8s). - # - # - WITHOUT turn analyzer: Use VAD's stop_secs directly to maintain - # consistent user experience between real VAD detection and - # emulated VAD scenarios. - if not self._emulating_vad: - timeout = self._params.aggregation_timeout - elif self._turn_params: - timeout = self._params.turn_emulated_vad_timeout - else: - # Use VAD stop_secs when no turn analyzer is present, fallback if no VAD params - timeout = ( - self._vad_params.stop_secs - if self._vad_params - else self._params.turn_emulated_vad_timeout - ) - await asyncio.wait_for(self._aggregation_event.wait(), timeout=timeout) - await self._maybe_emulate_user_speaking() - except asyncio.TimeoutError: - if not self._user_speaking: - await self.push_aggregation() - - # If we are emulating VAD we still need to send the user stopped - # speaking frame. - if self._emulating_vad: - await self.push_frame( - EmulateUserStoppedSpeakingFrame(), FrameDirection.UPSTREAM - ) - self._emulating_vad = False - finally: - self._aggregation_event.clear() - - async def _maybe_emulate_user_speaking(self): - """Maybe emulate user speaking based on transcription. - - Emulate user speaking if we got a transcription but it was not - detected by VAD. Behavior when bot is speaking depends on the - enable_emulated_vad_interruptions parameter. - """ - # Check if we received a transcription but VAD was not able to detect - # voice (e.g. when you whisper a short utterance). In that case, we need - # to emulate VAD (i.e. user start/stopped speaking). - if ( - not self._user_speaking - and not self._waiting_for_aggregation - and len(self._aggregation) > 0 - ): - if self._bot_speaking and not self._params.enable_emulated_vad_interruptions: - # If emulated VAD interruptions are disabled and bot is speaking, ignore - logger.debug("Ignoring user speaking emulation, bot is speaking.") - await self.reset() - else: - # Either bot is not speaking, or emulated VAD interruptions are enabled - # - trigger user speaking emulation. - await self.push_frame(EmulateUserStartedSpeakingFrame(), FrameDirection.UPSTREAM) - self._emulating_vad = True - - -class LLMAssistantContextAggregator(LLMContextResponseAggregator): - """Assistant LLM aggregator that processes bot responses and function calls. - - This aggregator handles the complex logic of processing assistant responses including: - - - Text frame aggregation between response start/end markers - - Function call lifecycle management - - Context updates with timestamps - - Tool execution and result handling - - Interruption handling during responses - - The aggregator manages function calls in progress and coordinates between - text generation and tool execution phases of LLM responses. - - .. deprecated:: 0.0.99 - `LLMAssistantContextAggregator` is deprecated and will be removed in a future version. - Use the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. - """ - - def __init__( - self, - context: OpenAILLMContext, - *, - params: Optional[LLMAssistantAggregatorParams] = None, - **kwargs, - ): - """Initialize the assistant context aggregator. - - Args: - context: The OpenAI LLM context for conversation storage. - params: Configuration parameters for aggregation behavior. - **kwargs: Additional arguments. Supports deprecated 'expect_stripped_words'. - - .. deprecated:: 0.0.99 - `LLMAssistantContextAggregator` is deprecated and will be removed in a future version. - Use the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. - """ - # Super handles deprecation warning - super().__init__(context=context, role="assistant", **kwargs) - self._params = params or LLMAssistantAggregatorParams() - - if "expect_stripped_words" in kwargs: - with warnings.catch_warnings(): - warnings.simplefilter("always") - warnings.warn( - "Parameter 'expect_stripped_words' is deprecated, use 'params' instead.", - DeprecationWarning, - ) - - self._params.expect_stripped_words = kwargs["expect_stripped_words"] - - self._started = 0 - self._function_calls_in_progress: Dict[str, Optional[FunctionCallInProgressFrame]] = {} - self._context_updated_tasks: Set[asyncio.Task] = set() - - @property - def has_function_calls_in_progress(self) -> bool: - """Check if there are any function calls currently in progress. - - Returns: - True if function calls are in progress, False otherwise. - """ - return bool(self._function_calls_in_progress) - - async def handle_aggregation(self, aggregation: str): - """Add the aggregated assistant text to the context. - - Args: - aggregation: The aggregated assistant text to add as an assistant message. - """ - self._context.add_message({"role": "assistant", "content": aggregation}) - - async def handle_function_call_in_progress(self, frame: FunctionCallInProgressFrame): - """Handle a function call that is in progress. - - Args: - frame: The function call in progress frame to handle. - """ - pass - - async def handle_function_call_result(self, frame: FunctionCallResultFrame): - """Handle the result of a completed function call. - - Args: - frame: The function call result frame to handle. - """ - pass - - async def handle_function_call_cancel(self, frame: FunctionCallCancelFrame): - """Handle cancellation of a function call. - - Args: - frame: The function call cancel frame to handle. - """ - pass - - async def handle_user_image_frame(self, frame: UserImageRawFrame): - """Handle a user image frame associated with a function call. - - Args: - frame: The user image frame to handle. - """ - pass - - async def process_frame(self, frame: Frame, direction: FrameDirection): - """Process frames for assistant response aggregation and function call management. - - Args: - frame: The frame to process. - direction: The direction of frame flow in the pipeline. - """ - await super().process_frame(frame, direction) - - if isinstance(frame, InterruptionFrame): - await self._handle_interruptions(frame) - await self.push_frame(frame, direction) - elif isinstance(frame, LLMFullResponseStartFrame): - await self._handle_llm_start(frame) - elif isinstance(frame, LLMFullResponseEndFrame): - await self._handle_llm_end(frame) - elif isinstance(frame, TextFrame): - await self._handle_text(frame) - elif isinstance(frame, LLMRunFrame): - await self._handle_llm_run(frame) - elif isinstance(frame, LLMMessagesAppendFrame): - await self._handle_llm_messages_append(frame) - elif isinstance(frame, LLMMessagesUpdateFrame): - await self._handle_llm_messages_update(frame) - elif isinstance(frame, LLMSetToolsFrame): - self.set_tools(frame.tools) - elif isinstance(frame, LLMSetToolChoiceFrame): - self.set_tool_choice(frame.tool_choice) - elif isinstance(frame, FunctionCallsStartedFrame): - await self._handle_function_calls_started(frame) - elif isinstance(frame, FunctionCallInProgressFrame): - await self._handle_function_call_in_progress(frame) - elif isinstance(frame, FunctionCallResultFrame): - await self._handle_function_call_result(frame) - elif isinstance(frame, FunctionCallCancelFrame): - await self._handle_function_call_cancel(frame) - elif isinstance(frame, UserImageRawFrame) and frame.request and frame.request.tool_call_id: - await self._handle_user_image_frame(frame) - elif isinstance(frame, BotStoppedSpeakingFrame): - await self.push_aggregation() - await self.push_frame(frame, direction) - else: - await self.push_frame(frame, direction) - - async def push_aggregation(self): - """Push the current assistant aggregation with timestamp.""" - if not self._aggregation: - return - - aggregation = self._aggregation.strip() - await self.reset() - - if aggregation: - await self.handle_aggregation(aggregation) - - # Push context frame - await self.push_context_frame() - - # Push timestamp frame with current time - timestamp_frame = OpenAILLMContextAssistantTimestampFrame(timestamp=time_now_iso8601()) - await self.push_frame(timestamp_frame) - - async def _handle_llm_run(self, frame: LLMRunFrame): - await self.push_context_frame(FrameDirection.UPSTREAM) - - async def _handle_llm_messages_append(self, frame: LLMMessagesAppendFrame): - self.add_messages(frame.messages) - if frame.run_llm: - await self.push_context_frame(FrameDirection.UPSTREAM) - - async def _handle_llm_messages_update(self, frame: LLMMessagesUpdateFrame): - self.set_messages(frame.messages) - if frame.run_llm: - await self.push_context_frame(FrameDirection.UPSTREAM) - - async def _handle_interruptions(self, frame: InterruptionFrame): - await self.push_aggregation() - self._started = 0 - await self.reset() - - async def _handle_function_calls_started(self, frame: FunctionCallsStartedFrame): - function_names = [f"{f.function_name}:{f.tool_call_id}" for f in frame.function_calls] - logger.debug(f"{self} FunctionCallsStartedFrame: {function_names}") - for function_call in frame.function_calls: - self._function_calls_in_progress[function_call.tool_call_id] = None - - async def _handle_function_call_in_progress(self, frame: FunctionCallInProgressFrame): - logger.debug( - f"{self} FunctionCallInProgressFrame: [{frame.function_name}:{frame.tool_call_id}]" - ) - await self.handle_function_call_in_progress(frame) - self._function_calls_in_progress[frame.tool_call_id] = frame - - async def _handle_function_call_result(self, frame: FunctionCallResultFrame): - logger.debug( - f"{self} FunctionCallResultFrame: [{frame.function_name}:{frame.tool_call_id}]" - ) - if frame.tool_call_id not in self._function_calls_in_progress: - logger.warning( - f"FunctionCallResultFrame tool_call_id [{frame.tool_call_id}] is not running" - ) - return - - del self._function_calls_in_progress[frame.tool_call_id] - - properties = frame.properties - - await self.handle_function_call_result(frame) - - run_llm = False - - # Run inference if the function call result requires it. - if frame.result: - if properties and properties.run_llm is not None: - # If the tool call result has a run_llm property, use it. - run_llm = properties.run_llm - elif frame.run_llm is not None: - # If the frame is indicating we should run the LLM, do it. - run_llm = frame.run_llm - else: - # If this is the last function call in progress, run the LLM. - run_llm = not bool(self._function_calls_in_progress) - - if run_llm: - await self.push_context_frame(FrameDirection.UPSTREAM) - - # Call the `on_context_updated` callback once the function call result - # is added to the context. Also, run this in a separate task to make - # sure we don't block the pipeline. - if properties and properties.on_context_updated: - task_name = f"{frame.function_name}:{frame.tool_call_id}:on_context_updated" - task = self.create_task(properties.on_context_updated(), task_name) - self._context_updated_tasks.add(task) - task.add_done_callback(self._context_updated_task_finished) - - async def _handle_function_call_cancel(self, frame: FunctionCallCancelFrame): - logger.debug( - f"{self} FunctionCallCancelFrame: [{frame.function_name}:{frame.tool_call_id}]" - ) - function_call = self._function_calls_in_progress.get(frame.tool_call_id) - if function_call and function_call.cancel_on_interruption: - await self.handle_function_call_cancel(frame) - del self._function_calls_in_progress[frame.tool_call_id] - - async def _handle_user_image_frame(self, frame: UserImageRawFrame): - logger.debug( - f"{self} UserImageRawFrame: [{frame.request.function_name}:{frame.request.tool_call_id}]" - ) - - if frame.request.tool_call_id not in self._function_calls_in_progress: - logger.warning( - f"UserImageRawFrame tool_call_id [{frame.request.tool_call_id}] is not running" - ) - return - - del self._function_calls_in_progress[frame.request.tool_call_id] - - # Call the result_callback if provided. This signals that the image - # has been retrieved and the function call can now complete. - if frame.request and frame.request.result_callback: - await frame.request.result_callback(None) - - await self.handle_user_image_frame(frame) - await self.push_aggregation() - await self.push_context_frame(FrameDirection.UPSTREAM) - - async def _handle_llm_start(self, _: LLMFullResponseStartFrame): - self._started += 1 - - async def _handle_llm_end(self, _: LLMFullResponseEndFrame): - self._started -= 1 - await self.push_aggregation() - - async def _handle_text(self, frame: TextFrame): - if not frame.append_to_context: - return - - if self._params.expect_stripped_words: - self._aggregation += f" {frame.text}" if self._aggregation else frame.text - else: - self._aggregation += frame.text - - def _context_updated_task_finished(self, task: asyncio.Task): - self._context_updated_tasks.discard(task) - - -class LLMUserResponseAggregator(LLMUserContextAggregator): - """User response aggregator that outputs LLMMessagesFrame instead of context frames. - - .. deprecated:: 0.0.79 - This class is deprecated and will be removed in a future version. - Use `LLMUserContextAggregator` or another LLM-specific subclass instead. - - This aggregator extends LLMUserContextAggregator but pushes LLMMessagesFrame - objects downstream instead of OpenAILLMContextFrame objects. This is useful - when you need message-based output rather than context-based output. - """ - - def __init__( - self, - messages: Optional[List[dict]] = None, - *, - params: Optional[LLMUserAggregatorParams] = None, - **kwargs, - ): - """Initialize the user response aggregator. - - Args: - messages: Initial messages for the conversation context. - params: Configuration parameters for aggregation behavior. - **kwargs: Additional arguments passed to parent class. - """ - with warnings.catch_warnings(): - warnings.simplefilter("always") - warnings.warn( - "LLMUserResponseAggregator is deprecated and will be removed in a future version. " - "Use LLMUserContextAggregator or another LLM-specific subclass instead.", - DeprecationWarning, - stacklevel=2, - ) - super().__init__(context=OpenAILLMContext(messages), params=params, **kwargs) - - async def _process_aggregation(self): - """Process the current aggregation and push it downstream.""" - aggregation = self._aggregation - await self.reset() - await self.handle_aggregation(aggregation) - frame = LLMMessagesFrame(self._context.messages) - await self.push_frame(frame) - - -class LLMAssistantResponseAggregator(LLMAssistantContextAggregator): - """Assistant response aggregator that outputs LLMMessagesFrame instead of context frames. - - .. deprecated:: 0.0.79 - This class is deprecated and will be removed in a future version. - Use `LLMAssistantContextAggregator` or another LLM-specific subclass instead. - - This aggregator extends LLMAssistantContextAggregator but pushes LLMMessagesFrame - objects downstream instead of OpenAILLMContextFrame objects. This is useful - when you need message-based output rather than context-based output. - """ - - def __init__( - self, - messages: Optional[List[dict]] = None, - *, - params: Optional[LLMAssistantAggregatorParams] = None, - **kwargs, - ): - """Initialize the assistant response aggregator. - - Args: - messages: Initial messages for the conversation context. - params: Configuration parameters for aggregation behavior. - **kwargs: Additional arguments passed to parent class. - """ - with warnings.catch_warnings(): - warnings.simplefilter("always") - warnings.warn( - "LLMAssistantResponseAggregator is deprecated and will be removed in a future version. " - "Use LLMAssistantContextAggregator or another LLM-specific subclass instead.", - DeprecationWarning, - stacklevel=2, - ) - super().__init__(context=OpenAILLMContext(messages), params=params, **kwargs) - - async def push_aggregation(self): - """Push the aggregated assistant response as an LLMMessagesFrame.""" - if len(self._aggregation) > 0: - await self.handle_aggregation(self._aggregation) - - # Reset the aggregation. Reset it before pushing it down, otherwise - # if the tasks gets cancelled we won't be able to clear things up. - await self.reset() - - frame = LLMMessagesFrame(self._context.messages) - await self.push_frame(frame) diff --git a/src/pipecat/processors/aggregators/openai_llm_context.py b/src/pipecat/processors/aggregators/openai_llm_context.py deleted file mode 100644 index f75625156..000000000 --- a/src/pipecat/processors/aggregators/openai_llm_context.py +++ /dev/null @@ -1,413 +0,0 @@ -# -# Copyright (c) 2024-2026, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - -"""OpenAI LLM context management for Pipecat. - -This module provides classes for managing OpenAI-specific conversation contexts, -including message handling, tool management, and image/audio processing capabilities. - -.. deprecated:: 0.0.99 - This module is deprecated. - Use the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. -""" - -import base64 -import copy -import io -import json -import warnings -from dataclasses import dataclass -from typing import Any, Dict, List, Optional - -from openai._types import NOT_GIVEN, NotGiven -from openai.types.chat import ( - ChatCompletionMessageParam, - ChatCompletionToolChoiceOptionParam, - ChatCompletionToolParam, -) -from PIL import Image - -from pipecat.adapters.base_llm_adapter import BaseLLMAdapter -from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.frames.frames import AudioRawFrame, Frame - -# JSON custom encoder to handle bytes arrays so that we can log contexts -# with images to the console. - - -class CustomEncoder(json.JSONEncoder): - """Custom JSON encoder for handling special data types in logging. - - Provides specialized encoding for io.BytesIO objects to display - readable representations in log output instead of raw binary data. - """ - - def default(self, obj): - """Encode special objects for JSON serialization. - - Args: - obj: The object to encode. - - Returns: - Encoded representation of the object. - """ - if isinstance(obj, io.BytesIO): - # Convert the first 8 bytes to an ASCII hex string - return f"{obj.getbuffer()[0:8].hex()}..." - return super().default(obj) - - -class OpenAILLMContext: - """Manages conversation context for OpenAI LLM interactions. - - Handles message history, tool definitions, tool choices, and multimedia content - for OpenAI API conversations. Provides methods for message manipulation, - content formatting, and integration with various LLM adapters. - - .. deprecated:: 0.0.99 - `OpenAILLMContext` is deprecated and will be removed in a future version. - Use the universal `LLMContext` and `LLMContextAggregatorPair` instead. - - **Before:** - - context = OpenAILLMContext(messages, tools) - context_aggregator = llm.create_context_aggregator(context) - - **After:** - - context = LLMContext(messages, tools) - context_aggregator = LLMContextAggregatorPair(context) - """ - - def __init__( - self, - messages: Optional[List[ChatCompletionMessageParam]] = None, - tools: List[ChatCompletionToolParam] | NotGiven | ToolsSchema = NOT_GIVEN, - tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN, - ): - """Initialize the OpenAI LLM context. - - Args: - messages: Initial list of conversation messages. - tools: Available tools for the LLM to use. - tool_choice: Tool selection strategy for the LLM. - - .. deprecated:: 0.0.99 - `OpenAILLMContext` is deprecated and will be removed in a future version. - Use the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. - """ - with warnings.catch_warnings(): - warnings.simplefilter("always") - warnings.warn( - "OpenAILLMContext is deprecated and will be removed in a future version. " - "Use the universal LLMContext and LLMContextAggregatorPair instead. " - "See OpenAILLMContext docstring for migration guide.", - DeprecationWarning, - stacklevel=2, - ) - self._messages: List[ChatCompletionMessageParam] = messages if messages else [] - self._tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = tool_choice - self._tools: List[ChatCompletionToolParam] | NotGiven | ToolsSchema = tools - self._llm_adapter: Optional[BaseLLMAdapter] = None - - def get_llm_adapter(self) -> Optional[BaseLLMAdapter]: - """Get the current LLM adapter. - - Returns: - The currently set LLM adapter, or None if not set. - """ - return self._llm_adapter - - def set_llm_adapter(self, llm_adapter: BaseLLMAdapter): - """Set the LLM adapter for context processing. - - Args: - llm_adapter: The LLM adapter to use for tool conversion. - """ - self._llm_adapter = llm_adapter - - @staticmethod - def from_messages(messages: List[dict]) -> "OpenAILLMContext": - """Create a context from a list of message dictionaries. - - Args: - messages: List of message dictionaries to convert to context. - - Returns: - New OpenAILLMContext instance with the provided messages. - """ - context = OpenAILLMContext() - - for message in messages: - context.add_message(message) - return context - - @property - def messages(self) -> List[ChatCompletionMessageParam]: - """Get the current messages list. - - Returns: - List of conversation messages. - """ - return self._messages - - @property - def tools(self) -> List[ChatCompletionToolParam] | NotGiven | List[Any]: - """Get the tools list, converting through adapter if available. - - Returns: - Tools list, potentially converted by the LLM adapter. - """ - if self._llm_adapter: - return self._llm_adapter.from_standard_tools(self._tools) - return self._tools - - @property - def tool_choice(self) -> ChatCompletionToolChoiceOptionParam | NotGiven: - """Get the current tool choice setting. - - Returns: - The tool choice configuration. - """ - return self._tool_choice - - def add_message(self, message: ChatCompletionMessageParam): - """Add a single message to the context. - - Args: - message: The message to add to the conversation history. - """ - self._messages.append(message) - - def add_messages(self, messages: List[ChatCompletionMessageParam]): - """Add multiple messages to the context. - - Args: - messages: List of messages to add to the conversation history. - """ - self._messages.extend(messages) - - def set_messages(self, messages: List[ChatCompletionMessageParam]): - """Replace all messages in the context. - - Args: - messages: New list of messages to replace the current history. - """ - self._messages[:] = messages - - def get_messages(self) -> List[ChatCompletionMessageParam]: - """Get a copy of the current messages list. - - Returns: - List of all messages in the conversation history. - """ - return self._messages - - def get_messages_json(self) -> str: - """Get messages as a formatted JSON string. - - Returns: - JSON string representation of all messages with custom encoding. - """ - return json.dumps(self._messages, cls=CustomEncoder, ensure_ascii=False, indent=2) - - def get_messages_for_logging(self) -> List[Dict[str, Any]]: - """Get sanitized messages suitable for logging. - - Removes or truncates sensitive data like image content for safe logging. - - Returns: - List of messages in a format ready for logging. - """ - msgs = [] - for message in self.messages: - msg = copy.deepcopy(message) - if "content" in msg: - if isinstance(msg["content"], list): - for item in msg["content"]: - if item["type"] == "image_url": - if item["image_url"]["url"].startswith("data:image/"): - item["image_url"]["url"] = "data:image/..." - if "mime_type" in msg and msg["mime_type"].startswith("image/"): - msg["data"] = "..." - msgs.append(msg) - return msgs - - def from_standard_message(self, message): - """Convert from OpenAI message format to OpenAI message format (passthrough). - - OpenAI's format allows both simple string content and structured content:: - - Simple: {"role": "user", "content": "Hello"} - Structured: {"role": "user", "content": [{"type": "text", "text": "Hello"}]} - - Since OpenAI is our standard format, this is a passthrough function. - - Args: - message: Message in OpenAI format. - - Returns: - Same message, unchanged. - """ - return message - - def to_standard_messages(self, obj) -> list: - """Convert from OpenAI message format to OpenAI message format (passthrough). - - OpenAI's format is our standard format throughout Pipecat. This function - returns a list containing the original message to maintain consistency with - other LLM services that may need to return multiple messages. - - Args: - obj: Message in OpenAI format with either simple string content - or structured list content. - - Returns: - List containing the original messages, preserving the content format. - """ - return [obj] - - def get_messages_for_initializing_history(self): - """Get messages for initializing conversation history. - - Returns: - List of messages suitable for history initialization. - """ - return self._messages - - def get_messages_for_persistent_storage(self): - """Get messages formatted for persistent storage. - - Returns: - List of messages converted to standard format for storage. - """ - messages = [] - for m in self._messages: - standard_messages = self.to_standard_messages(m) - messages.extend(standard_messages) - return messages - - def set_tool_choice(self, tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven): - """Set the tool choice configuration. - - Args: - tool_choice: Tool selection strategy for the LLM. - """ - self._tool_choice = tool_choice - - def set_tools(self, tools: List[ChatCompletionToolParam] | NotGiven | ToolsSchema = NOT_GIVEN): - """Set the available tools for the LLM. - - Args: - tools: List of tools available to the LLM, or NOT_GIVEN to disable tools. - """ - if tools != NOT_GIVEN and isinstance(tools, list) and len(tools) == 0: - tools = NOT_GIVEN - self._tools = tools - - def add_image_frame_message( - self, *, format: str, size: tuple[int, int], image: bytes, text: str = None - ): - """Add a message containing an image frame. - - Args: - format: Image format (e.g., 'RGB', 'RGBA'). - size: Image dimensions as (width, height) tuple. - image: Raw image bytes. - text: Optional text to include with the image. - """ - buffer = io.BytesIO() - Image.frombytes(format, size, image).save(buffer, format="JPEG") - encoded_image = base64.b64encode(buffer.getvalue()).decode("utf-8") - - content = [] - if text: - content.append({"type": "text", "text": text}) - content.append( - {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"}}, - ) - self.add_message({"role": "user", "content": content}) - - def add_audio_frames_message(self, *, audio_frames: list[AudioRawFrame], text: str = None): - """Add a message containing audio frames. - - Args: - audio_frames: List of audio frame objects to include. - text: Optional text to include with the audio. - - Note: - This method is currently a placeholder for future implementation. - """ - # todo: implement for OpenAI models and others - pass - - def create_wav_header(self, sample_rate, num_channels, bits_per_sample, data_size): - """Create a WAV file header for audio data. - - Args: - sample_rate: Audio sample rate in Hz. - num_channels: Number of audio channels. - bits_per_sample: Bits per audio sample. - data_size: Size of audio data in bytes. - - Returns: - WAV header as a bytearray. - """ - # RIFF chunk descriptor - header = bytearray() - header.extend(b"RIFF") # ChunkID - header.extend((data_size + 36).to_bytes(4, "little")) # ChunkSize: total size - 8 - header.extend(b"WAVE") # Format - # "fmt " sub-chunk - header.extend(b"fmt ") # Subchunk1ID - header.extend((16).to_bytes(4, "little")) # Subchunk1Size (16 for PCM) - header.extend((1).to_bytes(2, "little")) # AudioFormat (1 for PCM) - header.extend(num_channels.to_bytes(2, "little")) # NumChannels - header.extend(sample_rate.to_bytes(4, "little")) # SampleRate - # Calculate byte rate and block align - byte_rate = sample_rate * num_channels * (bits_per_sample // 8) - block_align = num_channels * (bits_per_sample // 8) - header.extend(byte_rate.to_bytes(4, "little")) # ByteRate - header.extend(block_align.to_bytes(2, "little")) # BlockAlign - header.extend(bits_per_sample.to_bytes(2, "little")) # BitsPerSample - # "data" sub-chunk - header.extend(b"data") # Subchunk2ID - header.extend(data_size.to_bytes(4, "little")) # Subchunk2Size - return header - - -@dataclass -class OpenAILLMContextFrame(Frame): - """Frame containing OpenAI-specific LLM context. - - Like an LLMMessagesFrame, but with extra context specific to the OpenAI - API. The context in this message is also mutable, and will be changed by the - OpenAIContextAggregator frame processor. - - .. deprecated:: 0.0.99 - `OpenAILLMContextFrame` is deprecated and will be removed in a future version. - Use `LLMContextFrame` with the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. - - Parameters: - context: The OpenAI LLM context containing messages, tools, and configuration. - """ - - context: OpenAILLMContext - - def __post_init__(self): - super().__post_init__() - with warnings.catch_warnings(): - warnings.simplefilter("always") - warnings.warn( - "OpenAILLMContextFrame is deprecated and will be removed in a future version. " - "Use LLMContextFrame with the universal `LLMContext` and `LLMContextAggregatorPair` instead. " - "See OpenAILLMContext docstring for migration guide.", - DeprecationWarning, - stacklevel=2, - ) diff --git a/src/pipecat/processors/aggregators/vision_image_frame.py b/src/pipecat/processors/aggregators/vision_image_frame.py deleted file mode 100644 index e7360c07e..000000000 --- a/src/pipecat/processors/aggregators/vision_image_frame.py +++ /dev/null @@ -1,81 +0,0 @@ -# -# Copyright (c) 2024-2026, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - -"""Vision image frame aggregation for Pipecat. - -This module provides frame aggregation functionality to combine text and image -frames into vision frames for multimodal processing. -""" - -from pipecat.frames.frames import Frame, InputImageRawFrame, TextFrame -from pipecat.processors.aggregators.openai_llm_context import ( - OpenAILLMContext, - OpenAILLMContextFrame, -) -from pipecat.processors.frame_processor import FrameDirection, FrameProcessor - - -class VisionImageFrameAggregator(FrameProcessor): - """Aggregates consecutive text and image frames into vision frames. - - .. deprecated:: 0.0.85 - VisionImageRawFrame has been removed in favor of context frames - (LLMContextFrame or OpenAILLMContextFrame), so this aggregator is not - needed anymore. See the 12* examples for the new recommended pattern. - - This aggregator waits for a consecutive TextFrame and an InputImageRawFrame. - After the InputImageRawFrame arrives it will output a VisionImageRawFrame - combining both the text and image data for multimodal processing. - """ - - def __init__(self): - """Initialize the vision image frame aggregator. - - The aggregator starts with no cached text, waiting for the first - TextFrame to arrive before it can create vision frames. - """ - import warnings - - warnings.warn( - "VisionImageFrameAggregator is deprecated. " - "VisionImageRawFrame has been removed in favor of context frames " - "(LLMContextFrame or OpenAILLMContextFrame), so this aggregator is " - "not needed anymore. See the 12* examples for the new recommended " - "pattern.", - DeprecationWarning, - stacklevel=2, - ) - super().__init__() - self._describe_text = None - - async def process_frame(self, frame: Frame, direction: FrameDirection): - """Process incoming frames and aggregate text with images. - - Caches TextFrames and combines them with subsequent InputImageRawFrames - to create VisionImageRawFrames. Other frames are passed through unchanged. - - Args: - frame: The incoming frame to process. - direction: The direction of frame flow in the pipeline. - """ - await super().process_frame(frame, direction) - - if isinstance(frame, TextFrame): - self._describe_text = frame.text - elif isinstance(frame, InputImageRawFrame): - if self._describe_text: - context = OpenAILLMContext() - context.add_image_frame_message( - text=self._describe_text, - image=frame.image, - size=frame.size, - format=frame.format, - ) - frame = OpenAILLMContextFrame(context) - await self.push_frame(frame) - self._describe_text = None - else: - await self.push_frame(frame, direction) diff --git a/src/pipecat/processors/frame_processor.py b/src/pipecat/processors/frame_processor.py index 8efb1353d..29b613527 100644 --- a/src/pipecat/processors/frame_processor.py +++ b/src/pipecat/processors/frame_processor.py @@ -196,7 +196,6 @@ class FrameProcessor(BaseObject): # Other properties (deprecated) self._allow_interruptions = False self._interruption_strategies: List[BaseInterruptionStrategy] = [] - self._deprecated_openaillmcontext = False # Indicates whether we have received the StartFrame. self.__started = False @@ -826,9 +825,6 @@ class FrameProcessor(BaseObject): self._interruption_strategies = frame.interruption_strategies self._report_only_initial_ttfb = frame.report_only_initial_ttfb - # NOTE(aleix): Remove when OpenAILLMContext/LLMUserContextAggregator is removed. - self._deprecated_openaillmcontext = "deprecated_openaillmcontext" in frame.metadata - self.__create_process_task() async def __cancel(self, frame: CancelFrame): diff --git a/src/pipecat/processors/frameworks/langchain.py b/src/pipecat/processors/frameworks/langchain.py index e569758ce..165f749ea 100644 --- a/src/pipecat/processors/frameworks/langchain.py +++ b/src/pipecat/processors/frameworks/langchain.py @@ -17,7 +17,6 @@ from pipecat.frames.frames import ( LLMFullResponseStartFrame, TextFrame, ) -from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContextFrame from pipecat.processors.frame_processor import FrameDirection, FrameProcessor try: @@ -65,15 +64,11 @@ class LangchainProcessor(FrameProcessor): """ await super().process_frame(frame, direction) - if isinstance(frame, (LLMContextFrame, OpenAILLMContextFrame)): + if isinstance(frame, LLMContextFrame): # Messages are accumulated on the context as a list of messages. # The last one by the human is the one we want to send to the LLM. logger.debug(f"Got transcription frame {frame}") - messages = ( - frame.context.messages - if isinstance(frame, OpenAILLMContextFrame) - else frame.context.get_messages() - ) + messages = frame.context.get_messages() text: str = messages[-1]["content"] await self._ainvoke(text.strip()) diff --git a/src/pipecat/processors/frameworks/rtvi/observer.py b/src/pipecat/processors/frameworks/rtvi/observer.py index 7944dd3eb..958ba8841 100644 --- a/src/pipecat/processors/frameworks/rtvi/observer.py +++ b/src/pipecat/processors/frameworks/rtvi/observer.py @@ -59,7 +59,6 @@ from pipecat.metrics.metrics import ( TTSUsageMetricsData, ) from pipecat.observers.base_observer import BaseObserver, FramePushed -from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContextFrame from pipecat.processors.frame_processor import FrameDirection, FrameProcessor from pipecat.processors.frameworks.rtvi.frames import ( RTVIServerMessageFrame, @@ -358,10 +357,7 @@ class RTVIObserver(BaseObserver): and self._params.user_transcription_enabled ): await self._handle_user_transcriptions(frame) - elif ( - isinstance(frame, (OpenAILLMContextFrame, LLMContextFrame)) - and self._params.user_llm_enabled - ): + elif isinstance(frame, LLMContextFrame) and self._params.user_llm_enabled: await self._handle_context(frame) elif isinstance(frame, LLMFullResponseStartFrame) and self._params.bot_llm_enabled: await self.send_rtvi_message(RTVI.BotLLMStartedMessage()) @@ -575,13 +571,10 @@ class RTVIObserver(BaseObserver): if message: await self.send_rtvi_message(message) - async def _handle_context(self, frame: OpenAILLMContextFrame | LLMContextFrame): + async def _handle_context(self, frame: LLMContextFrame): """Process LLM context frames to extract user messages for the RTVI client.""" try: - if isinstance(frame, OpenAILLMContextFrame): - messages = frame.context.messages - else: - messages = frame.context.get_messages() + messages = frame.context.get_messages() if not messages: return diff --git a/src/pipecat/processors/frameworks/strands_agents.py b/src/pipecat/processors/frameworks/strands_agents.py index 8022f5387..eb1edbfdc 100644 --- a/src/pipecat/processors/frameworks/strands_agents.py +++ b/src/pipecat/processors/frameworks/strands_agents.py @@ -16,7 +16,6 @@ from pipecat.frames.frames import ( LLMTextFrame, ) from pipecat.metrics.metrics import LLMTokenUsage -from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContextFrame from pipecat.processors.frame_processor import FrameDirection, FrameProcessor try: @@ -72,7 +71,7 @@ class StrandsAgentsProcessor(FrameProcessor): direction: The direction of frame flow in the pipeline. """ await super().process_frame(frame, direction) - if isinstance(frame, (LLMContextFrame, OpenAILLMContextFrame)): + if isinstance(frame, LLMContextFrame): messages = frame.context.get_messages() if messages: last_message = messages[-1] diff --git a/src/pipecat/services/anthropic/llm.py b/src/pipecat/services/anthropic/llm.py index f35d6226c..58dc1da07 100644 --- a/src/pipecat/services/anthropic/llm.py +++ b/src/pipecat/services/anthropic/llm.py @@ -37,7 +37,6 @@ from pipecat.frames.frames import ( LLMEnablePromptCachingFrame, LLMFullResponseEndFrame, LLMFullResponseStartFrame, - LLMMessagesFrame, LLMThoughtEndFrame, LLMThoughtStartFrame, LLMThoughtTextFrame, @@ -45,16 +44,6 @@ from pipecat.frames.frames import ( ) from pipecat.metrics.metrics import LLMTokenUsage from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.llm_response import ( - LLMAssistantAggregatorParams, - LLMAssistantContextAggregator, - LLMUserAggregatorParams, - LLMUserContextAggregator, -) -from pipecat.processors.aggregators.openai_llm_context import ( - OpenAILLMContext, - OpenAILLMContextFrame, -) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.llm_service import FunctionCallFromLLM, LLMService from pipecat.services.settings import NOT_GIVEN as _NOT_GIVEN @@ -115,44 +104,6 @@ class AnthropicLLMSettings(LLMSettings): return instance -@dataclass -class AnthropicContextAggregatorPair: - """Pair of context aggregators for Anthropic conversations. - - Encapsulates both user and assistant context aggregators - to manage conversation flow and message formatting. - - .. deprecated:: 0.0.99 - `AnthropicContextAggregatorPair` is deprecated and will be removed in a future version. - Use the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. - - Parameters: - _user: The user context aggregator. - _assistant: The assistant context aggregator. - """ - - # Aggregators handle deprecation warnings - _user: "AnthropicUserContextAggregator" - _assistant: "AnthropicAssistantContextAggregator" - - def user(self) -> "AnthropicUserContextAggregator": - """Get the user context aggregator. - - Returns: - The user context aggregator instance. - """ - return self._user - - def assistant(self) -> "AnthropicAssistantContextAggregator": - """Get the assistant context aggregator. - - Returns: - The assistant context aggregator instance. - """ - return self._assistant - - class AnthropicLLMService(LLMService): """LLM service for Anthropic's Claude models. @@ -351,7 +302,7 @@ class AnthropicLLMService(LLMService): async def run_inference( self, - context: LLMContext | OpenAILLMContext, + context: LLMContext, max_tokens: Optional[int] = None, system_instruction: Optional[str] = None, ) -> Optional[str]: @@ -371,21 +322,15 @@ class AnthropicLLMService(LLMService): system = NOT_GIVEN tools = [] effective_instruction = system_instruction or self._settings.system_instruction - if isinstance(context, LLMContext): - adapter: AnthropicLLMAdapter = self.get_llm_adapter() - invocation_params = adapter.get_llm_invocation_params( - context, - enable_prompt_caching=self._settings.enable_prompt_caching, - system_instruction=effective_instruction, - ) - messages = invocation_params["messages"] - system = invocation_params["system"] - tools = invocation_params["tools"] - else: - context = AnthropicLLMContext.upgrade_to_anthropic(context) - messages = context.messages - system = getattr(context, "system", NOT_GIVEN) - tools = context.tools or [] + adapter: AnthropicLLMAdapter = self.get_llm_adapter() + invocation_params = adapter.get_llm_invocation_params( + context, + enable_prompt_caching=self._settings.enable_prompt_caching, + system_instruction=effective_instruction, + ) + messages = invocation_params["messages"] + system = invocation_params["system"] + tools = invocation_params["tools"] # Build params using the same method as streaming completions params = { @@ -410,70 +355,17 @@ class AnthropicLLMService(LLMService): return next((block.text for block in response.content if hasattr(block, "text")), None) - def create_context_aggregator( - self, - context: OpenAILLMContext, - *, - user_params: LLMUserAggregatorParams = LLMUserAggregatorParams(), - assistant_params: LLMAssistantAggregatorParams = LLMAssistantAggregatorParams(), - ) -> AnthropicContextAggregatorPair: - """Create Anthropic-specific context aggregators. - - Creates a pair of context aggregators optimized for Anthropic's message format, - including support for function calls, tool usage, and image handling. - - Args: - context: The LLM context. - user_params: User aggregator parameters. - assistant_params: Assistant aggregator parameters. - - Returns: - A pair of context aggregators, one for the user and one for the assistant, - encapsulated in an AnthropicContextAggregatorPair. - - .. deprecated:: 0.0.99 - `create_context_aggregator()` is deprecated and will be removed in a future version. - Use the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. - """ - context.set_llm_adapter(self.get_llm_adapter()) - - if isinstance(context, OpenAILLMContext): - context = AnthropicLLMContext.from_openai_context(context) - - # Aggregators handle deprecation warnings - user = AnthropicUserContextAggregator(context, params=user_params) - assistant = AnthropicAssistantContextAggregator(context, params=assistant_params) - - return AnthropicContextAggregatorPair(_user=user, _assistant=assistant) - - def _get_llm_invocation_params( - self, context: OpenAILLMContext | LLMContext - ) -> AnthropicLLMInvocationParams: - # Universal LLMContext - if isinstance(context, LLMContext): - adapter: AnthropicLLMAdapter = self.get_llm_adapter() - params: AnthropicLLMInvocationParams = adapter.get_llm_invocation_params( - context, - enable_prompt_caching=self._settings.enable_prompt_caching, - system_instruction=self._settings.system_instruction, - ) - return params - - # Anthropic-specific context - messages = ( - context.get_messages_with_cache_control_markers() - if self._settings.enable_prompt_caching - else context.messages - ) - return AnthropicLLMInvocationParams( - system=context.system, - messages=messages, - tools=context.tools or [], + def _get_llm_invocation_params(self, context: LLMContext) -> AnthropicLLMInvocationParams: + adapter: AnthropicLLMAdapter = self.get_llm_adapter() + params: AnthropicLLMInvocationParams = adapter.get_llm_invocation_params( + context, + enable_prompt_caching=self._settings.enable_prompt_caching, + system_instruction=self._settings.system_instruction, ) + return params @traced_llm - async def _process_context(self, context: OpenAILLMContext | LLMContext): + async def _process_context(self, context: LLMContext): # Usage tracking. We track the usage reported by Anthropic in prompt_tokens and # completion_tokens. We also estimate the completion tokens from output text # and use that estimate if we are interrupted, because we almost certainly won't @@ -491,15 +383,10 @@ class AnthropicLLMService(LLMService): params_from_context = self._get_llm_invocation_params(context) - if isinstance(context, LLMContext): - adapter = self.get_llm_adapter() - context_type_for_logging = "universal" - messages_for_logging = adapter.get_messages_for_logging(context) - else: - context_type_for_logging = "LLM-specific" - messages_for_logging = context.get_messages_for_logging() + adapter = self.get_llm_adapter() + messages_for_logging = adapter.get_messages_for_logging(context) logger.debug( - f"{self}: Generating chat from {context_type_for_logging} context [{params_from_context['system']}] | {messages_for_logging}" + f"{self}: Generating chat from context [{params_from_context['system']}] | {messages_for_logging}" ) await self.start_ttfb_metrics() @@ -666,14 +553,8 @@ class AnthropicLLMService(LLMService): await super().process_frame(frame, direction) context = None - if isinstance(frame, OpenAILLMContextFrame): - context: "AnthropicLLMContext" = AnthropicLLMContext.upgrade_to_anthropic(frame.context) - elif isinstance(frame, LLMContextFrame): + if isinstance(frame, LLMContextFrame): context = frame.context - elif isinstance(frame, LLMMessagesFrame): - # NOTE: LLMMessagesFrame is deprecated, so we don't support the newer universal - # LLMContext with it - context = AnthropicLLMContext.from_messages(frame.messages) elif isinstance(frame, LLMEnablePromptCachingFrame): logger.debug(f"Setting enable prompt caching to: [{frame.enable}]") self._settings.enable_prompt_caching = frame.enable @@ -707,581 +588,3 @@ class AnthropicLLMService(LLMService): total_tokens=prompt_tokens + completion_tokens, ) await self.start_llm_usage_metrics(tokens) - - -class AnthropicLLMContext(OpenAILLMContext): - """LLM context specialized for Anthropic's message format and features. - - Extends OpenAILLMContext to handle Anthropic-specific features like - system messages, prompt caching, and message format conversions. - Manages conversation state and message history formatting. - - .. deprecated:: 0.0.99 - `AnthropicLLMContext` is deprecated and will be removed in a future version. - Use the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. - """ - - def __init__( - self, - messages: Optional[List[dict]] = None, - tools: Optional[List[dict]] = None, - tool_choice: Optional[dict] = None, - *, - system: Union[str, NotGiven] = NOT_GIVEN, - ): - """Initialize the Anthropic LLM context. - - Args: - messages: Initial list of conversation messages. - tools: Available function calling tools. - tool_choice: Tool selection preference. - system: System message content. - """ - # Super handles deprecation warning - super().__init__(messages=messages, tools=tools, tool_choice=tool_choice) - self.__setup_local() - self.system = system - - def __setup_local(self): - # For beta prompt caching. This is a counter that tracks the number of turns - # we've seen above the cache threshold. We reset this when we reset the - # messages list. We only care about this number being 0, 1, or 2. But - # it's easiest just to treat it as a counter. - self.turns_above_cache_threshold = 0 - return - - @staticmethod - def upgrade_to_anthropic(obj: OpenAILLMContext) -> "AnthropicLLMContext": - """Upgrade an OpenAI context to Anthropic format. - - Converts message format and restructures content for Anthropic compatibility. - - Args: - obj: The OpenAI context to upgrade. - - Returns: - The upgraded Anthropic context. - """ - logger.debug(f"Upgrading to Anthropic: {obj}") - if isinstance(obj, OpenAILLMContext) and not isinstance(obj, AnthropicLLMContext): - obj.__class__ = AnthropicLLMContext - obj.__setup_local() - obj._restructure_from_openai_messages() - return obj - - @classmethod - def from_openai_context(cls, openai_context: OpenAILLMContext): - """Create Anthropic context from OpenAI context. - - Args: - openai_context: The OpenAI context to convert. - - Returns: - New Anthropic context with converted messages. - """ - self = cls( - messages=openai_context.messages, - tools=openai_context.tools, - tool_choice=openai_context.tool_choice, - ) - self.set_llm_adapter(openai_context.get_llm_adapter()) - self._restructure_from_openai_messages() - return self - - @classmethod - def from_messages(cls, messages: List[dict]) -> "AnthropicLLMContext": - """Create context from a list of messages. - - Args: - messages: List of conversation messages. - - Returns: - New Anthropic context with the provided messages. - """ - self = cls(messages=messages) - self._restructure_from_openai_messages() - return self - - def set_messages(self, messages: List): - """Set the messages list and reset cache tracking. - - Args: - messages: New list of messages to set. - """ - self.turns_above_cache_threshold = 0 - self._messages[:] = messages - self._restructure_from_openai_messages() - - def to_standard_messages(self, obj): - """Convert Anthropic message format to standard structured format. - - Handles text content and function calls for both user and assistant messages. - - Args: - obj: Message in Anthropic format. - - Returns: - List of messages in standard format. - - Examples: - Input Anthropic format:: - - { - "role": "assistant", - "content": [ - {"type": "text", "text": "Hello"}, - {"type": "tool_use", "id": "123", "name": "search", "input": {"q": "test"}} - ] - } - - Output standard format:: - - [ - {"role": "assistant", "content": [{"type": "text", "text": "Hello"}]}, - { - "role": "assistant", - "tool_calls": [ - { - "type": "function", - "id": "123", - "function": {"name": "search", "arguments": '{"q": "test"}'} - } - ] - } - ] - """ - # todo: image format (?) - # tool_use - role = obj.get("role") - content = obj.get("content") - if role == "assistant": - if isinstance(content, str): - return [{"role": role, "content": [{"type": "text", "text": content}]}] - elif isinstance(content, list): - text_items = [] - tool_items = [] - for item in content: - if item["type"] == "text": - text_items.append({"type": "text", "text": item["text"]}) - elif item["type"] == "tool_use": - tool_items.append( - { - "type": "function", - "id": item["id"], - "function": { - "name": item["name"], - "arguments": json.dumps(item["input"]), - }, - } - ) - messages = [] - if text_items: - messages.append({"role": role, "content": text_items}) - if tool_items: - messages.append({"role": role, "tool_calls": tool_items}) - return messages - elif role == "user": - if isinstance(content, str): - return [{"role": role, "content": [{"type": "text", "text": content}]}] - elif isinstance(content, list): - text_items = [] - tool_items = [] - for item in content: - if item["type"] == "text": - text_items.append({"type": "text", "text": item["text"]}) - elif item["type"] == "tool_result": - tool_items.append( - { - "role": "tool", - "tool_call_id": item["tool_use_id"], - "content": item["content"], - } - ) - messages = [] - if text_items: - messages.append({"role": role, "content": text_items}) - messages.extend(tool_items) - return messages - - def from_standard_message(self, message): - """Convert standard format message to Anthropic format. - - Handles conversion of text content, tool calls, and tool results. - Empty text content is converted to "(empty)". - - Args: - message: Message in standard format. - - Returns: - Message in Anthropic format. - - Examples: - Input standard format:: - - { - "role": "assistant", - "tool_calls": [ - { - "id": "123", - "function": {"name": "search", "arguments": '{"q": "test"}'} - } - ] - } - - Output Anthropic format:: - - { - "role": "assistant", - "content": [ - { - "type": "tool_use", - "id": "123", - "name": "search", - "input": {"q": "test"} - } - ] - } - """ - # todo: image messages (?) - if message["role"] == "tool": - return { - "role": "user", - "content": [ - { - "type": "tool_result", - "tool_use_id": message["tool_call_id"], - "content": message["content"], - }, - ], - } - if message.get("tool_calls"): - tc = message["tool_calls"] - ret = {"role": "assistant", "content": []} - for tool_call in tc: - function = tool_call["function"] - arguments = json.loads(function["arguments"]) - new_tool_use = { - "type": "tool_use", - "id": tool_call["id"], - "name": function["name"], - "input": arguments, - } - ret["content"].append(new_tool_use) - return ret - # check for empty text strings - content = message.get("content") - if isinstance(content, str): - if content == "": - content = "(empty)" - elif isinstance(content, list): - for item in content: - if item["type"] == "text" and item["text"] == "": - item["text"] = "(empty)" - - return message - - def add_image_frame_message( - self, *, format: str, size: tuple[int, int], image: bytes, text: str = None - ): - """Add an image message to the context. - - Converts the image to base64 JPEG format and adds it as a user message - with optional accompanying text. - - Args: - format: The image format (e.g., 'RGB', 'RGBA'). - size: Image dimensions as (width, height). - image: Raw image bytes. - text: Optional text to accompany the image. - """ - buffer = io.BytesIO() - Image.frombytes(format, size, image).save(buffer, format="JPEG") - encoded_image = base64.b64encode(buffer.getvalue()).decode("utf-8") - - # Anthropic docs say that the image should be the first content block in the message. - content = [ - { - "type": "image", - "source": { - "type": "base64", - "media_type": "image/jpeg", - "data": encoded_image, - }, - } - ] - if text: - content.append({"type": "text", "text": text}) - self.add_message({"role": "user", "content": content}) - - def add_message(self, message): - """Add a message to the context, merging with previous message if same role. - - Anthropic requires alternating roles, so consecutive messages from the same - role are merged together. - - Args: - message: The message to add to the context. - """ - try: - if self.messages: - # Anthropic requires that roles alternate. If this message's role is the same as the - # last message, we should add this message's content to the last message. - if self.messages[-1]["role"] == message["role"]: - # if the last message has just a content string, convert it to a list - # in the proper format - if isinstance(self.messages[-1]["content"], str): - self.messages[-1]["content"] = [ - {"type": "text", "text": self.messages[-1]["content"]} - ] - # if this message has just a content string, convert it to a list - # in the proper format - if isinstance(message["content"], str): - message["content"] = [{"type": "text", "text": message["content"]}] - # append the content of this message to the last message - self.messages[-1]["content"].extend(message["content"]) - else: - self.messages.append(message) - else: - self.messages.append(message) - except Exception as e: - logger.error(f"Error adding message: {e}") - - def get_messages_with_cache_control_markers(self) -> List[dict]: - """Get messages with prompt caching markers applied. - - Adds cache control markers to appropriate messages based on the - number of turns above the cache threshold. - - Returns: - List of messages with cache control markers added. - """ - try: - messages = copy.deepcopy(self.messages) - if self.turns_above_cache_threshold >= 1 and messages[-1]["role"] == "user": - if isinstance(messages[-1]["content"], str): - messages[-1]["content"] = [{"type": "text", "text": messages[-1]["content"]}] - messages[-1]["content"][-1]["cache_control"] = {"type": "ephemeral"} - if ( - self.turns_above_cache_threshold >= 2 - and len(messages) > 2 - and messages[-3]["role"] == "user" - ): - if isinstance(messages[-3]["content"], str): - messages[-3]["content"] = [{"type": "text", "text": messages[-3]["content"]}] - messages[-3]["content"][-1]["cache_control"] = {"type": "ephemeral"} - return messages - except Exception as e: - logger.error(f"Error adding cache control marker: {e}") - return self.messages - - def _restructure_from_openai_messages(self): - # first, map across self._messages calling self.from_standard_message(m) to modify messages in place - try: - self._messages[:] = [self.from_standard_message(m) for m in self._messages] - except Exception as e: - logger.error(f"Error mapping messages: {e}") - - # See if we should pull the system message out of our context.messages list. (For - # compatibility with Open AI messages format.) - if self.messages and self.messages[0]["role"] == "system": - if len(self.messages) == 1: - # If we have only have a system message in the list, all we can really do - # without introducing too much magic is change the role to "user". - self.messages[0]["role"] = "user" - else: - # If we have more than one message, we'll pull the system message out of the - # list. - self.system = self.messages[0]["content"] - self.messages.pop(0) - - # Merge consecutive messages with the same role. - i = 0 - while i < len(self.messages) - 1: - current_message = self.messages[i] - next_message = self.messages[i + 1] - if current_message["role"] == next_message["role"]: - # Convert content to list of dictionaries if it's a string - if isinstance(current_message["content"], str): - current_message["content"] = [ - {"type": "text", "text": current_message["content"]} - ] - if isinstance(next_message["content"], str): - next_message["content"] = [{"type": "text", "text": next_message["content"]}] - # Concatenate the content - current_message["content"].extend(next_message["content"]) - # Remove the next message from the list - self.messages.pop(i + 1) - else: - i += 1 - - # Avoid empty content in messages - for message in self.messages: - if isinstance(message["content"], str) and message["content"] == "": - message["content"] = "(empty)" - elif isinstance(message["content"], list) and len(message["content"]) == 0: - message["content"] = [{"type": "text", "text": "(empty)"}] - - def get_messages_for_persistent_storage(self): - """Get messages formatted for persistent storage. - - Includes system message at the beginning if present. - - Returns: - List of messages suitable for storage. - """ - messages = super().get_messages_for_persistent_storage() - if self.system: - messages.insert(0, {"role": "system", "content": self.system}) - return messages - - def get_messages_for_logging(self) -> List[Dict[str, Any]]: - """Get messages formatted for logging with sensitive data redacted. - - Replaces image data with placeholder text for cleaner logs. - - Returns: - List of messages in a format ready for logging. - """ - msgs = [] - for message in self.messages: - msg = copy.deepcopy(message) - if "content" in msg: - if isinstance(msg["content"], list): - for item in msg["content"]: - if item["type"] == "image": - item["source"]["data"] = "..." - msgs.append(msg) - return msgs - - -class AnthropicUserContextAggregator(LLMUserContextAggregator): - """Anthropic-specific user context aggregator. - - Handles aggregation of user messages for Anthropic LLM services. - Inherits all functionality from the base LLMUserContextAggregator. - - .. deprecated:: 0.0.99 - `AnthropicUserContextAggregator` is deprecated and will be removed in a future version. - Use the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. - """ - - # Super handles deprecation warning - pass - - -# -# Claude returns a text content block along with a tool use content block. This works quite nicely -# with streaming. We get the text first, so we can start streaming it right away. Then we get the -# tool_use block. While the text is streaming to TTS and the transport, we can run the tool call. -# -# But Claude is verbose. It would be nice to come up with prompt language that suppresses Claude's -# chattiness about it's tool thinking. -# - - -class AnthropicAssistantContextAggregator(LLMAssistantContextAggregator): - """Context aggregator for assistant messages in Anthropic conversations. - - Handles function call lifecycle management including in-progress tracking, - result handling, and cancellation for Anthropic's tool use format. - - .. deprecated:: 0.0.99 - `AnthropicAssistantContextAggregator` is deprecated and will be removed in a future version. - Use the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. - """ - - # Super handles deprecation warning - - async def handle_function_call_in_progress(self, frame: FunctionCallInProgressFrame): - """Handle a function call that is starting. - - Creates tool use message and placeholder tool result for tracking. - - Args: - frame: Frame containing function call details. - """ - assistant_message = {"role": "assistant", "content": []} - assistant_message["content"].append( - { - "type": "tool_use", - "id": frame.tool_call_id, - "name": frame.function_name, - "input": frame.arguments, - } - ) - self._context.add_message(assistant_message) - self._context.add_message( - { - "role": "user", - "content": [ - { - "type": "tool_result", - "tool_use_id": frame.tool_call_id, - "content": "IN_PROGRESS", - } - ], - } - ) - - async def handle_function_call_result(self, frame: FunctionCallResultFrame): - """Handle the result of a completed function call. - - Updates the tool result with actual return value or completion status. - - Args: - frame: Frame containing function call result. - """ - if frame.result: - result = json.dumps(frame.result, ensure_ascii=False) - await self._update_function_call_result(frame.function_name, frame.tool_call_id, result) - else: - await self._update_function_call_result( - frame.function_name, frame.tool_call_id, "COMPLETED" - ) - - async def handle_function_call_cancel(self, frame: FunctionCallCancelFrame): - """Handle cancellation of a function call. - - Updates the tool result to indicate cancellation. - - Args: - frame: Frame containing function call cancellation details. - """ - await self._update_function_call_result( - frame.function_name, frame.tool_call_id, "CANCELLED" - ) - - async def _update_function_call_result( - self, function_name: str, tool_call_id: str, result: Any - ): - for message in self._context.messages: - if message["role"] == "user": - for content in message["content"]: - if ( - isinstance(content, dict) - and content["type"] == "tool_result" - and content["tool_use_id"] == tool_call_id - ): - content["content"] = result - - async def handle_user_image_frame(self, frame: UserImageRawFrame): - """Handle a user image frame with function call context. - - Marks the associated function call as completed and adds the image - to the conversation context. - - Args: - frame: User image frame with request context. - """ - await self._update_function_call_result( - frame.request.function_name, frame.request.tool_call_id, "COMPLETED" - ) - self._context.add_image_frame_message( - format=frame.format, - size=frame.size, - image=frame.image, - text=frame.request.context, - ) diff --git a/src/pipecat/services/aws/agent_core.py b/src/pipecat/services/aws/agent_core.py index f21654584..d66af9c5b 100644 --- a/src/pipecat/services/aws/agent_core.py +++ b/src/pipecat/services/aws/agent_core.py @@ -26,15 +26,11 @@ from pipecat.frames.frames import ( LLMTextFrame, ) from pipecat.processors.aggregators.llm_context import LLMContext, LLMSpecificMessage -from pipecat.processors.aggregators.openai_llm_context import ( - OpenAILLMContext, - OpenAILLMContextFrame, -) from pipecat.processors.frame_processor import FrameDirection, FrameProcessor def default_context_to_payload_transformer( - context: LLMContext | OpenAILLMContext, + context: LLMContext, ) -> Optional[str]: """Default transformer to create AgentCore payload from LLM context. @@ -118,9 +114,7 @@ class AWSAgentCoreProcessor(FrameProcessor): aws_secret_key: Optional[str] = None, aws_session_token: Optional[str] = None, aws_region: Optional[str] = None, - context_to_payload_transformer: Optional[ - Callable[[LLMContext | OpenAILLMContext], Optional[str]] - ] = None, + context_to_payload_transformer: Optional[Callable[[LLMContext], Optional[str]]] = None, response_to_output_transformer: Optional[Callable[[str], Optional[str]]] = None, **kwargs, ): @@ -200,7 +194,7 @@ class AWSAgentCoreProcessor(FrameProcessor): direction: The direction of frame flow in the pipeline. """ await super().process_frame(frame, direction) - if isinstance(frame, (LLMContextFrame, OpenAILLMContextFrame)): + if isinstance(frame, LLMContextFrame): # Create payload to invoke AgentCore agent payload = self._context_to_payload_transformer(frame.context) diff --git a/src/pipecat/services/aws/llm.py b/src/pipecat/services/aws/llm.py index cda186167..2e2bd3c59 100644 --- a/src/pipecat/services/aws/llm.py +++ b/src/pipecat/services/aws/llm.py @@ -38,21 +38,10 @@ from pipecat.frames.frames import ( LLMContextFrame, LLMFullResponseEndFrame, LLMFullResponseStartFrame, - LLMMessagesFrame, UserImageRawFrame, ) from pipecat.metrics.metrics import LLMTokenUsage from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.llm_response import ( - LLMAssistantAggregatorParams, - LLMAssistantContextAggregator, - LLMUserAggregatorParams, - LLMUserContextAggregator, -) -from pipecat.processors.aggregators.openai_llm_context import ( - OpenAILLMContext, - OpenAILLMContextFrame, -) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.llm_service import LLMService from pipecat.services.settings import NOT_GIVEN, LLMSettings, _NotGiven @@ -87,657 +76,6 @@ class AWSBedrockLLMSettings(LLMSettings): ) -@dataclass -class AWSBedrockContextAggregatorPair: - """Container for AWS Bedrock context aggregators. - - Provides convenient access to both user and assistant context aggregators - for AWS Bedrock LLM operations. - - .. deprecated:: 0.0.99 - `AWSBedrockContextAggregatorPair` is deprecated and will be removed in a future version. - Use the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. - - Parameters: - _user: The user context aggregator instance. - _assistant: The assistant context aggregator instance. - """ - - # Aggregators handle deprecation warnings - _user: "AWSBedrockUserContextAggregator" - _assistant: "AWSBedrockAssistantContextAggregator" - - def user(self) -> "AWSBedrockUserContextAggregator": - """Get the user context aggregator. - - Returns: - The user context aggregator instance. - """ - return self._user - - def assistant(self) -> "AWSBedrockAssistantContextAggregator": - """Get the assistant context aggregator. - - Returns: - The assistant context aggregator instance. - """ - return self._assistant - - -class AWSBedrockLLMContext(OpenAILLMContext): - """AWS Bedrock-specific LLM context implementation. - - Extends OpenAI LLM context to handle AWS Bedrock's specific message format - and system message handling. Manages conversion between OpenAI and Bedrock - message formats. - - .. deprecated:: 0.0.99 - `AWSBedrockLLMContext` is deprecated and will be removed in a future version. - Use the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. - """ - - def __init__( - self, - messages: Optional[List[dict]] = None, - tools: Optional[List[dict]] = None, - tool_choice: Optional[dict] = None, - *, - system: Optional[str] = None, - ): - """Initialize AWS Bedrock LLM context. - - Args: - messages: List of conversation messages in OpenAI format. - tools: List of available function calling tools. - tool_choice: Tool selection strategy or specific tool choice. - system: System message content for AWS Bedrock. - """ - # Super handles deprecation warning - super().__init__(messages=messages, tools=tools, tool_choice=tool_choice) - self.system = system - - @staticmethod - def upgrade_to_bedrock(obj: OpenAILLMContext) -> "AWSBedrockLLMContext": - """Upgrade an OpenAI LLM context to AWS Bedrock format. - - Args: - obj: The OpenAI LLM context to upgrade. - - Returns: - The upgraded AWS Bedrock LLM context. - """ - logger.debug(f"Upgrading to AWS Bedrock: {obj}") - if isinstance(obj, OpenAILLMContext) and not isinstance(obj, AWSBedrockLLMContext): - obj.__class__ = AWSBedrockLLMContext - obj._restructure_from_openai_messages() - else: - obj._restructure_from_bedrock_messages() - return obj - - @classmethod - def from_openai_context(cls, openai_context: OpenAILLMContext): - """Create AWS Bedrock context from OpenAI context. - - Args: - openai_context: The OpenAI LLM context to convert. - - Returns: - New AWS Bedrock LLM context instance. - """ - self = cls( - messages=openai_context.messages, - tools=openai_context.tools, - tool_choice=openai_context.tool_choice, - ) - self.set_llm_adapter(openai_context.get_llm_adapter()) - self._restructure_from_openai_messages() - return self - - @classmethod - def from_messages(cls, messages: List[dict]) -> "AWSBedrockLLMContext": - """Create AWS Bedrock context from message list. - - Args: - messages: List of messages in OpenAI format. - - Returns: - New AWS Bedrock LLM context instance. - """ - self = cls(messages=messages) - self._restructure_from_openai_messages() - return self - - def set_messages(self, messages: List): - """Set the messages list and restructure for Bedrock format. - - Args: - messages: List of messages to set. - """ - self._messages[:] = messages - self._restructure_from_openai_messages() - - def to_standard_messages(self, obj): - """Convert AWS Bedrock message format to standard structured format. - - Handles text content and function calls for both user and assistant messages. - - Args: - obj: Message in AWS Bedrock format. - - Returns: - List of messages in standard format. - - Examples: - AWS Bedrock format input:: - - { - "role": "assistant", - "content": [ - {"text": "Hello"}, - {"toolUse": {"toolUseId": "123", "name": "search", "input": {"q": "test"}}} - ] - } - - Standard format output:: - - [ - {"role": "assistant", "content": [{"type": "text", "text": "Hello"}]}, - { - "role": "assistant", - "tool_calls": [ - { - "type": "function", - "id": "123", - "function": {"name": "search", "arguments": '{"q": "test"}'} - } - ] - } - ] - """ - role = obj.get("role") - content = obj.get("content") - - if role == "assistant": - if isinstance(content, str): - return [{"role": role, "content": [{"type": "text", "text": content}]}] - elif isinstance(content, list): - text_items = [] - tool_items = [] - for item in content: - if "text" in item: - text_items.append({"type": "text", "text": item["text"]}) - elif "toolUse" in item: - tool_use = item["toolUse"] - tool_items.append( - { - "type": "function", - "id": tool_use["toolUseId"], - "function": { - "name": tool_use["name"], - "arguments": json.dumps(tool_use["input"]), - }, - } - ) - messages = [] - if text_items: - messages.append({"role": role, "content": text_items}) - if tool_items: - messages.append({"role": role, "tool_calls": tool_items}) - return messages - elif role == "user": - if isinstance(content, str): - return [{"role": role, "content": [{"type": "text", "text": content}]}] - elif isinstance(content, list): - text_items = [] - tool_items = [] - for item in content: - if "text" in item: - text_items.append({"type": "text", "text": item["text"]}) - elif "toolResult" in item: - tool_result = item["toolResult"] - # Extract content from toolResult - result_content = "" - if isinstance(tool_result["content"], list): - for content_item in tool_result["content"]: - if "text" in content_item: - result_content = content_item["text"] - elif "json" in content_item: - result_content = json.dumps(content_item["json"]) - else: - result_content = tool_result["content"] - - tool_items.append( - { - "role": "tool", - "tool_call_id": tool_result["toolUseId"], - "content": result_content, - } - ) - messages = [] - if text_items: - messages.append({"role": role, "content": text_items}) - messages.extend(tool_items) - return messages - - def from_standard_message(self, message): - """Convert standard format message to AWS Bedrock format. - - Handles conversion of text content, tool calls, and tool results. - Empty text content is converted to "(empty)". - - Args: - message: Message in standard format. - - Returns: - Message in AWS Bedrock format. - - Examples: - Standard format input:: - - { - "role": "assistant", - "tool_calls": [ - { - "id": "123", - "function": {"name": "search", "arguments": '{"q": "test"}'} - } - ] - } - - AWS Bedrock format output:: - - { - "role": "assistant", - "content": [ - { - "toolUse": { - "toolUseId": "123", - "name": "search", - "input": {"q": "test"} - } - } - ] - } - """ - if message["role"] == "tool": - # Try to parse the content as JSON if it looks like JSON - try: - if message["content"].strip().startswith("{") and message[ - "content" - ].strip().endswith("}"): - content_json = json.loads(message["content"]) - tool_result_content = [{"json": content_json}] - else: - tool_result_content = [{"text": message["content"]}] - except (json.JSONDecodeError, ValueError, AttributeError): - tool_result_content = [{"text": message["content"]}] - - return { - "role": "user", - "content": [ - { - "toolResult": { - "toolUseId": message["tool_call_id"], - "content": tool_result_content, - }, - }, - ], - } - - if message.get("tool_calls"): - tc = message["tool_calls"] - ret = {"role": "assistant", "content": []} - for tool_call in tc: - function = tool_call["function"] - arguments = json.loads(function["arguments"]) - new_tool_use = { - "toolUse": { - "toolUseId": tool_call["id"], - "name": function["name"], - "input": arguments, - } - } - ret["content"].append(new_tool_use) - return ret - - # Handle text content - content = message.get("content") - if isinstance(content, str): - if content == "": - return {"role": message["role"], "content": [{"text": "(empty)"}]} - else: - return {"role": message["role"], "content": [{"text": content}]} - elif isinstance(content, list): - new_content = [] - for item in content: - # fix empty text - if item.get("type", "") == "text": - text_content = item["text"] if item["text"] != "" else "(empty)" - new_content.append({"text": text_content}) - # handle image_url -> image conversion - if item["type"] == "image_url": - new_item = { - "image": { - "format": "jpeg", - "source": { - "bytes": base64.b64decode(item["image_url"]["url"].split(",")[1]) - }, - } - } - new_content.append(new_item) - # In the case where there's a single image in the list (like what - # would result from a UserImageRawFrame), ensure that the image - # comes before text - image_indices = [i for i, item in enumerate(new_content) if "image" in item] - text_indices = [i for i, item in enumerate(new_content) if "text" in item] - if len(image_indices) == 1 and text_indices: - img_idx = image_indices[0] - first_txt_idx = text_indices[0] - if img_idx > first_txt_idx: - # Move image before the first text - image_item = new_content.pop(img_idx) - new_content.insert(first_txt_idx, image_item) - return {"role": message["role"], "content": new_content} - - return message - - def add_image_frame_message( - self, *, format: str, size: tuple[int, int], image: bytes, text: str = None - ): - """Add an image message to the context. - - Args: - format: The image format (e.g., 'RGB', 'RGBA'). - size: The image dimensions as (width, height). - image: The raw image data as bytes. - text: Optional text to accompany the image. - """ - buffer = io.BytesIO() - Image.frombytes(format, size, image).save(buffer, format="JPEG") - encoded_image = base64.b64encode(buffer.getvalue()).decode("utf-8") - - # Image should be the first content block in the message - content = [{"type": "image", "format": "jpeg", "source": {"bytes": encoded_image}}] - if text: - content.append({"text": text}) - self.add_message({"role": "user", "content": content}) - - def add_message(self, message): - """Add a message to the context, merging with previous message if same role. - - AWS Bedrock requires alternating roles, so consecutive messages from the - same role are merged together. - - Args: - message: The message to add to the context. - """ - try: - if self.messages: - # AWS Bedrock requires that roles alternate. If this message's - # role is the same as the last message, we should add this - # message's content to the last message. - if self.messages[-1]["role"] == message["role"]: - # if the last message has just a content string, convert it to a list - # in the proper format - if isinstance(self.messages[-1]["content"], str): - self.messages[-1]["content"] = [{"text": self.messages[-1]["content"]}] - # if this message has just a content string, convert it to a list - # in the proper format - if isinstance(message["content"], str): - message["content"] = [{"text": message["content"]}] - # append the content of this message to the last message - self.messages[-1]["content"].extend(message["content"]) - else: - self.messages.append(message) - else: - self.messages.append(message) - except Exception as e: - logger.error(f"Error adding message: {e}") - - def _restructure_from_bedrock_messages(self): - """Restructure messages in AWS Bedrock format. - - Handles system messages, merging consecutive messages with the same role, - and ensuring proper content formatting. - """ - # Handle system message if present at the beginning - if self.messages and self.messages[0]["role"] == "system": - if len(self.messages) == 1: - self.messages[0]["role"] = "user" - else: - system_content = self.messages.pop(0)["content"] - if isinstance(system_content, str): - system_content = [{"text": system_content}] - - if self.system: - if isinstance(self.system, str): - self.system = [{"text": self.system}] - self.system.extend(system_content) - else: - self.system = system_content - - # Ensure content is properly formatted - for msg in self.messages: - if isinstance(msg["content"], str): - msg["content"] = [{"text": msg["content"]}] - elif not msg["content"]: - msg["content"] = [{"text": "(empty)"}] - elif isinstance(msg["content"], list): - for idx, item in enumerate(msg["content"]): - if isinstance(item, dict) and "text" in item and item["text"] == "": - item["text"] = "(empty)" - elif isinstance(item, str) and item == "": - msg["content"][idx] = {"text": "(empty)"} - - # Merge consecutive messages with the same role - merged_messages = [] - for msg in self.messages: - if merged_messages and merged_messages[-1]["role"] == msg["role"]: - merged_messages[-1]["content"].extend(msg["content"]) - else: - merged_messages.append(msg) - - self.messages.clear() - self.messages.extend(merged_messages) - - def _restructure_from_openai_messages(self): - # first, map across self._messages calling self.from_standard_message(m) to modify messages in place - try: - self._messages[:] = [self.from_standard_message(m) for m in self._messages] - except Exception as e: - logger.error(f"Error mapping messages: {e}") - - # See if we should pull the system message out of our context.messages list. (For - # compatibility with Open AI messages format.) - if self.messages and self.messages[0]["role"] == "system": - self.system = self.messages[0]["content"] - self.messages.pop(0) - - # Merge consecutive messages with the same role. - i = 0 - while i < len(self.messages) - 1: - current_message = self.messages[i] - next_message = self.messages[i + 1] - if current_message["role"] == next_message["role"]: - # Convert content to list of dictionaries if it's a string - if isinstance(current_message["content"], str): - current_message["content"] = [ - {"type": "text", "text": current_message["content"]} - ] - if isinstance(next_message["content"], str): - next_message["content"] = [{"type": "text", "text": next_message["content"]}] - # Concatenate the content - current_message["content"].extend(next_message["content"]) - # Remove the next message from the list - self.messages.pop(i + 1) - else: - i += 1 - - # Avoid empty content in messages - for message in self.messages: - if isinstance(message["content"], str) and message["content"] == "": - message["content"] = "(empty)" - elif isinstance(message["content"], list) and len(message["content"]) == 0: - message["content"] = [{"type": "text", "text": "(empty)"}] - - def get_messages_for_persistent_storage(self): - """Get messages formatted for persistent storage. - - Returns: - List of messages including system message if present. - """ - messages = super().get_messages_for_persistent_storage() - if self.system: - messages.insert(0, {"role": "system", "content": self.system}) - return messages - - def get_messages_for_logging(self) -> List[Dict[str, Any]]: - """Get messages formatted for logging with sensitive data redacted. - - Returns: - List of messages in a format ready for logging. - """ - msgs = [] - for message in self.messages: - msg = copy.deepcopy(message) - if "content" in msg: - if isinstance(msg["content"], list): - for item in msg["content"]: - if item.get("image"): - item["image"]["source"]["bytes"] = "..." - msgs.append(msg) - return msgs - - -class AWSBedrockUserContextAggregator(LLMUserContextAggregator): - """User context aggregator for AWS Bedrock LLM service. - - Handles aggregation of user messages and frames for AWS Bedrock format. - Inherits all functionality from the base LLM user context aggregator. - - .. deprecated:: 0.0.99 - `AWSBedrockUserContextAggregator` is deprecated and will be removed in a future version. - Use the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. - - Args: - context: The LLM context to aggregate messages into. - params: Configuration parameters for the aggregator. - """ - - # Super handles deprecation warning - pass - - -class AWSBedrockAssistantContextAggregator(LLMAssistantContextAggregator): - """Assistant context aggregator for AWS Bedrock LLM service. - - Handles aggregation of assistant responses and function calls for AWS Bedrock - format, including tool use and tool result handling. - - .. deprecated:: 0.0.99 - `AWSBedrockAssistantContextAggregator` is deprecated and will be removed in a future version. - Use the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. - - Args: - context: The LLM context to aggregate messages into. - params: Configuration parameters for the aggregator. - """ - - # Super handles deprecation warning - - async def handle_function_call_in_progress(self, frame: FunctionCallInProgressFrame): - """Handle function call in progress frame. - - Args: - frame: The function call in progress frame to handle. - """ - # Format tool use according to AWS Bedrock API - self._context.add_message( - { - "role": "assistant", - "content": [ - { - "toolUse": { - "toolUseId": frame.tool_call_id, - "name": frame.function_name, - "input": frame.arguments if frame.arguments else {}, - } - } - ], - } - ) - self._context.add_message( - { - "role": "user", - "content": [ - { - "toolResult": { - "toolUseId": frame.tool_call_id, - "content": [{"text": "IN_PROGRESS"}], - } - } - ], - } - ) - - async def handle_function_call_result(self, frame: FunctionCallResultFrame): - """Handle function call result frame. - - Args: - frame: The function call result frame to handle. - """ - if frame.result: - result = json.dumps(frame.result, ensure_ascii=False) - await self._update_function_call_result(frame.function_name, frame.tool_call_id, result) - else: - await self._update_function_call_result( - frame.function_name, frame.tool_call_id, "COMPLETED" - ) - - async def handle_function_call_cancel(self, frame: FunctionCallCancelFrame): - """Handle function call cancel frame. - - Args: - frame: The function call cancel frame to handle. - """ - await self._update_function_call_result( - frame.function_name, frame.tool_call_id, "CANCELLED" - ) - - async def _update_function_call_result( - self, function_name: str, tool_call_id: str, result: Any - ): - for message in self._context.messages: - if message["role"] == "user": - for content in message["content"]: - if ( - isinstance(content, dict) - and content.get("toolResult") - and content["toolResult"]["toolUseId"] == tool_call_id - ): - content["toolResult"]["content"] = [{"text": result}] - - async def handle_user_image_frame(self, frame: UserImageRawFrame): - """Handle user image frame. - - Args: - frame: The user image frame to handle. - """ - await self._update_function_call_result( - frame.request.function_name, frame.request.tool_call_id, "COMPLETED" - ) - self._context.add_image_frame_message( - format=frame.format, - size=frame.size, - image=frame.image, - text=frame.request.context, - ) - - class AWSBedrockLLMService(LLMService): """AWS Bedrock Large Language Model service implementation. @@ -924,7 +262,7 @@ class AWSBedrockLLMService(LLMService): async def run_inference( self, - context: LLMContext | OpenAILLMContext, + context: LLMContext, max_tokens: Optional[int] = None, system_instruction: Optional[str] = None, ) -> Optional[str]: @@ -943,17 +281,12 @@ class AWSBedrockLLMService(LLMService): messages = [] system = [] effective_instruction = system_instruction or self._settings.system_instruction - if isinstance(context, LLMContext): - adapter: AWSBedrockLLMAdapter = self.get_llm_adapter() - params: AWSBedrockLLMInvocationParams = adapter.get_llm_invocation_params( - context, system_instruction=effective_instruction - ) - messages = params["messages"] - system = params["system"] # [{"text": "system message"}] or None - else: - context = AWSBedrockLLMContext.upgrade_to_bedrock(context) - messages = context.messages - system = getattr(context, "system", None) # [{"text": "system message"}] + adapter: AWSBedrockLLMAdapter = self.get_llm_adapter() + params: AWSBedrockLLMInvocationParams = adapter.get_llm_invocation_params( + context, system_instruction=effective_instruction + ) + messages = params["messages"] + system = params["system"] # [{"text": "system message"}] or None # Prepare request parameters using the same method as streaming inference_config = self._build_inference_config() @@ -1021,44 +354,6 @@ class AWSBedrockLLMService(LLMService): response = await client.converse_stream(**request_params) return response - def create_context_aggregator( - self, - context: OpenAILLMContext, - *, - user_params: LLMUserAggregatorParams = LLMUserAggregatorParams(), - assistant_params: LLMAssistantAggregatorParams = LLMAssistantAggregatorParams(), - ) -> AWSBedrockContextAggregatorPair: - """Create AWS Bedrock-specific context aggregators. - - Creates a pair of context aggregators optimized for AWS Bedrocks's message - format, including support for function calls, tool usage, and image handling. - - Args: - context: The LLM context to create aggregators for. - user_params: Parameters for user message aggregation. - assistant_params: Parameters for assistant message aggregation. - - Returns: - AWSBedrockContextAggregatorPair: A pair of context aggregators, one for - the user and one for the assistant, encapsulated in an - AWSBedrockContextAggregatorPair. - - .. deprecated:: 0.0.99 - `create_context_aggregator()` is deprecated and will be removed in a future version. - Use the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. - """ - context.set_llm_adapter(self.get_llm_adapter()) - - if isinstance(context, OpenAILLMContext): - context = AWSBedrockLLMContext.from_openai_context(context) - - # Aggregators handle deprecation warnings - user = AWSBedrockUserContextAggregator(context, params=user_params) - assistant = AWSBedrockAssistantContextAggregator(context, params=assistant_params) - - return AWSBedrockContextAggregatorPair(_user=user, _assistant=assistant) - def _create_no_op_tool(self): """Create a no-operation tool for AWS Bedrock when tool content exists but no tools are defined. @@ -1074,27 +369,15 @@ class AWSBedrockLLMService(LLMService): } } - def _get_llm_invocation_params( - self, context: OpenAILLMContext | LLMContext - ) -> AWSBedrockLLMInvocationParams: - # Universal LLMContext - if isinstance(context, LLMContext): - adapter: AWSBedrockLLMAdapter = self.get_llm_adapter() - params: AWSBedrockLLMInvocationParams = adapter.get_llm_invocation_params( - context, system_instruction=self._settings.system_instruction - ) - return params - - # AWS Bedrock-specific context - return AWSBedrockLLMInvocationParams( - system=getattr(context, "system", None), - messages=context.messages, - tools=context.tools or [], - tool_choice=context.tool_choice, + def _get_llm_invocation_params(self, context: LLMContext) -> AWSBedrockLLMInvocationParams: + adapter: AWSBedrockLLMAdapter = self.get_llm_adapter() + params: AWSBedrockLLMInvocationParams = adapter.get_llm_invocation_params( + context, system_instruction=self._settings.system_instruction ) + return params @traced_llm - async def _process_context(self, context: AWSBedrockLLMContext | LLMContext): + async def _process_context(self, context: LLMContext): # Usage tracking prompt_tokens = 0 completion_tokens = 0 @@ -1173,15 +456,10 @@ class AWSBedrockLLMService(LLMService): request_params["performanceConfig"] = {"latency": self._settings.latency} # Log request params with messages redacted for logging - if isinstance(context, LLMContext): - adapter = self.get_llm_adapter() - context_type_for_logging = "universal" - messages_for_logging = adapter.get_messages_for_logging(context) - else: - context_type_for_logging = "LLM-specific" - messages_for_logging = context.get_messages_for_logging() + adapter = self.get_llm_adapter() + messages_for_logging = adapter.get_messages_for_logging(context) logger.debug( - f"{self}: Generating chat from {context_type_for_logging} context [{system}] | {messages_for_logging}" + f"{self}: Generating chat from context [{system}] | {messages_for_logging}" ) async with self._aws_session.client( @@ -1287,14 +565,8 @@ class AWSBedrockLLMService(LLMService): await super().process_frame(frame, direction) context = None - if isinstance(frame, OpenAILLMContextFrame): - context = AWSBedrockLLMContext.upgrade_to_bedrock(frame.context) if isinstance(frame, LLMContextFrame): context = frame.context - elif isinstance(frame, LLMMessagesFrame): - # NOTE: LLMMessagesFrame is deprecated, so we don't support the newer universal - # LLMContext with it - context = AWSBedrockLLMContext.from_messages(frame.messages) else: await self.push_frame(frame, direction) diff --git a/src/pipecat/services/aws/nova_sonic/context.py b/src/pipecat/services/aws/nova_sonic/context.py deleted file mode 100644 index a87564c5b..000000000 --- a/src/pipecat/services/aws/nova_sonic/context.py +++ /dev/null @@ -1,460 +0,0 @@ -# -# Copyright (c) 2024-2026, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - -"""Context management for AWS Nova Sonic LLM service. - -This module provides specialized context aggregators and message handling for AWS Nova Sonic, -including conversation history management and role-specific message processing. - -.. deprecated:: 0.0.91 - AWS Nova Sonic no longer uses types from this module under the hood. - It now uses ``LLMContext`` and ``LLMContextAggregatorPair``. - Using the new patterns should allow you to not need types from this module. - - BEFORE:: - - # Setup - context = OpenAILLMContext(messages, tools) - context_aggregator = llm.create_context_aggregator(context) - - # Context frame type - frame: OpenAILLMContextFrame - - # Context type - context: AWSNovaSonicLLMContext - # or - context: OpenAILLMContext - - AFTER:: - - # Setup - context = LLMContext(messages, tools) - context_aggregator = LLMContextAggregatorPair(context) - - # Context frame type - frame: LLMContextFrame - - # Context type - context: LLMContext -""" - -import warnings - -with warnings.catch_warnings(): - warnings.simplefilter("always") - warnings.warn( - "Types in pipecat.services.aws.nova_sonic.context (or " - "pipecat.services.aws_nova_sonic.context) are deprecated. \n" - "AWS Nova Sonic no longer uses types from this module under the hood. \n" - "It now uses `LLMContext` and `LLMContextAggregatorPair`. \n" - "Using the new patterns should allow you to not need types from this module.\n\n" - "BEFORE:\n" - "```\n" - "# Setup\n" - "context = OpenAILLMContext(messages, tools)\n" - "context_aggregator = llm.create_context_aggregator(context)\n\n" - "# Context frame type\n" - "frame: OpenAILLMContextFrame\n\n" - "# Context type\n" - "context: AWSNovaSonicLLMContext\n" - "# or\n" - "context: OpenAILLMContext\n\n" - "```\n\n" - "AFTER:\n" - "```\n" - "# Setup\n" - "context = LLMContext(messages, tools)\n" - "context_aggregator = LLMContextAggregatorPair(context)\n\n" - "# Context frame type\n" - "frame: LLMContextFrame\n\n" - "# Context type\n" - "context: LLMContext\n\n" - "```", - DeprecationWarning, - stacklevel=2, - ) - -import copy -from dataclasses import dataclass, field -from enum import Enum - -from loguru import logger - -from pipecat.frames.frames import ( - BotStoppedSpeakingFrame, - DataFrame, - Frame, - FunctionCallResultFrame, - InterruptionFrame, - LLMFullResponseEndFrame, - LLMFullResponseStartFrame, - LLMMessagesAppendFrame, - LLMMessagesUpdateFrame, - LLMSetToolChoiceFrame, - LLMSetToolsFrame, - TextFrame, - UserImageRawFrame, -) -from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext -from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.aws.nova_sonic.frames import AWSNovaSonicFunctionCallResultFrame -from pipecat.services.openai.llm import ( - OpenAIAssistantContextAggregator, - OpenAIUserContextAggregator, -) - - -class Role(Enum): - """Roles supported in AWS Nova Sonic conversations. - - Parameters: - SYSTEM: System-level messages (not used in conversation history). - USER: Messages sent by the user. - ASSISTANT: Messages sent by the assistant. - TOOL: Messages sent by tools (not used in conversation history). - """ - - SYSTEM = "SYSTEM" - USER = "USER" - ASSISTANT = "ASSISTANT" - TOOL = "TOOL" - - -@dataclass -class AWSNovaSonicConversationHistoryMessage: - """A single message in AWS Nova Sonic conversation history. - - Parameters: - role: The role of the message sender (USER or ASSISTANT only). - text: The text content of the message. - """ - - role: Role # only USER and ASSISTANT - text: str - - -@dataclass -class AWSNovaSonicConversationHistory: - """Complete conversation history for AWS Nova Sonic initialization. - - Parameters: - system_instruction: System-level instruction for the conversation. - messages: List of conversation messages between user and assistant. - """ - - system_instruction: str = None - messages: list[AWSNovaSonicConversationHistoryMessage] = field(default_factory=list) - - -class AWSNovaSonicLLMContext(OpenAILLMContext): - """Specialized LLM context for AWS Nova Sonic service. - - Extends OpenAI context with Nova Sonic-specific message handling, - conversation history management, and text buffering capabilities. - - .. deprecated:: 0.0.99 - `AWSNovaSonicLLMContext` is deprecated and will be removed in a future version. - Use the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. - """ - - def __init__(self, messages=None, tools=None, **kwargs): - """Initialize AWS Nova Sonic LLM context. - - Args: - messages: Initial messages for the context. - tools: Available tools for the context. - **kwargs: Additional arguments passed to parent class. - """ - # Super handles deprecation warning - super().__init__(messages=messages, tools=tools, **kwargs) - self.__setup_local() - - def __setup_local(self, system_instruction: str = ""): - self._assistant_text = "" - self._user_text = "" - self._system_instruction = system_instruction - - @staticmethod - def upgrade_to_nova_sonic( - obj: OpenAILLMContext, system_instruction: str - ) -> "AWSNovaSonicLLMContext": - """Upgrade an OpenAI context to AWS Nova Sonic context. - - Args: - obj: The OpenAI context to upgrade. - system_instruction: System instruction for the context. - - Returns: - The upgraded AWS Nova Sonic context. - """ - if isinstance(obj, OpenAILLMContext) and not isinstance(obj, AWSNovaSonicLLMContext): - obj.__class__ = AWSNovaSonicLLMContext - obj.__setup_local(system_instruction) - return obj - - # NOTE: this method has the side-effect of updating _system_instruction from messages - def get_messages_for_initializing_history(self) -> AWSNovaSonicConversationHistory: - """Get conversation history for initializing AWS Nova Sonic session. - - Processes stored messages and extracts system instruction and conversation - history in the format expected by AWS Nova Sonic. - - Returns: - Formatted conversation history with system instruction and messages. - """ - history = AWSNovaSonicConversationHistory(system_instruction=self._system_instruction) - - # Bail if there are no messages - if not self.messages: - return history - - messages = copy.deepcopy(self.messages) - - # If we have a "system" message as our first message, let's pull that out into "instruction" - if messages[0].get("role") == "system": - system = messages.pop(0) - content = system.get("content") - if isinstance(content, str): - history.system_instruction = content - elif isinstance(content, list): - history.system_instruction = content[0].get("text") - if history.system_instruction: - self._system_instruction = history.system_instruction - - # Process remaining messages to fill out conversation history. - # Nova Sonic supports "user" and "assistant" messages in history. - for message in messages: - history_message = self.from_standard_message(message) - if history_message: - history.messages.append(history_message) - - return history - - def get_messages_for_persistent_storage(self): - """Get messages formatted for persistent storage. - - Returns: - List of messages including system instruction if present. - """ - messages = super().get_messages_for_persistent_storage() - # If we have a system instruction and messages doesn't already contain it, add it - if self._system_instruction and not (messages and messages[0].get("role") == "system"): - messages.insert(0, {"role": "system", "content": self._system_instruction}) - return messages - - def from_standard_message(self, message) -> AWSNovaSonicConversationHistoryMessage: - """Convert standard message format to Nova Sonic format. - - Args: - message: Standard message dictionary to convert. - - Returns: - Nova Sonic conversation history message, or None if not convertible. - """ - role = message.get("role") - if message.get("role") == "user" or message.get("role") == "assistant": - content = message.get("content") - if isinstance(message.get("content"), list): - content = "" - for c in message.get("content"): - if c.get("type") == "text": - content += " " + c.get("text") - else: - logger.error( - f"Unhandled content type in context message: {c.get('type')} - {message}" - ) - # There won't be content if this is an assistant tool call entry. - # We're ignoring those since they can't be loaded into AWS Nova Sonic conversation - # history - if content: - return AWSNovaSonicConversationHistoryMessage(role=Role[role.upper()], text=content) - # NOTE: we're ignoring messages with role "tool" since they can't be loaded into AWS Nova - # Sonic conversation history - - def buffer_user_text(self, text): - """Buffer user text for later flushing to context. - - Args: - text: User text to buffer. - """ - self._user_text += f" {text}" if self._user_text else text - # logger.debug(f"User text buffered: {self._user_text}") - - def flush_aggregated_user_text(self) -> str: - """Flush buffered user text to context as a complete message. - - Returns: - The flushed user text, or empty string if no text was buffered. - """ - if not self._user_text: - return "" - user_text = self._user_text - message = { - "role": "user", - "content": [{"type": "text", "text": user_text}], - } - self._user_text = "" - self.add_message(message) - # logger.debug(f"Context updated (user): {self.get_messages_for_logging()}") - return user_text - - def buffer_assistant_text(self, text): - """Buffer assistant text for later flushing to context. - - Args: - text: Assistant text to buffer. - """ - self._assistant_text += text - # logger.debug(f"Assistant text buffered: {self._assistant_text}") - - def flush_aggregated_assistant_text(self): - """Flush buffered assistant text to context as a complete message.""" - if not self._assistant_text: - return - message = { - "role": "assistant", - "content": [{"type": "text", "text": self._assistant_text}], - } - self._assistant_text = "" - self.add_message(message) - # logger.debug(f"Context updated (assistant): {self.get_messages_for_logging()}") - - -@dataclass -class AWSNovaSonicMessagesUpdateFrame(DataFrame): - """Frame containing updated AWS Nova Sonic context. - - Parameters: - context: The updated AWS Nova Sonic LLM context. - """ - - context: AWSNovaSonicLLMContext - - -class AWSNovaSonicUserContextAggregator(OpenAIUserContextAggregator): - """Context aggregator for user messages in AWS Nova Sonic conversations. - - Extends the OpenAI user context aggregator to emit Nova Sonic-specific - context update frames. - - .. deprecated:: 0.0.99 - `AWSNovaSonicUserContextAggregator` is deprecated and will be removed in a future version. - Use the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. - """ - - # Super handles deprecation warning - - async def process_frame( - self, frame: Frame, direction: FrameDirection = FrameDirection.DOWNSTREAM - ): - """Process frames and emit Nova Sonic-specific context updates. - - Args: - frame: The frame to process. - direction: The direction the frame is traveling. - """ - await super().process_frame(frame, direction) - - # Parent does not push LLMMessagesUpdateFrame - if isinstance(frame, LLMMessagesUpdateFrame): - await self.push_frame(AWSNovaSonicMessagesUpdateFrame(context=self._context)) - - -class AWSNovaSonicAssistantContextAggregator(OpenAIAssistantContextAggregator): - """Context aggregator for assistant messages in AWS Nova Sonic conversations. - - Provides specialized handling for assistant responses and function calls - in AWS Nova Sonic context, with custom frame processing logic. - - .. deprecated:: 0.0.99 - `AWSNovaSonicAssistantContextAggregator` is deprecated and will be removed in a future version. - Use the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. - """ - - # Super handles deprecation warning - - async def process_frame(self, frame: Frame, direction: FrameDirection): - """Process frames with Nova Sonic-specific logic. - - Args: - frame: The frame to process. - direction: The direction the frame is traveling. - """ - # HACK: For now, disable the context aggregator by making it just pass through all frames - # that the parent handles (except the function call stuff, which we still need). - # For an explanation of this hack, see - # AWSNovaSonicLLMService._report_assistant_response_text_added. - if isinstance( - frame, - ( - InterruptionFrame, - LLMFullResponseStartFrame, - LLMFullResponseEndFrame, - TextFrame, - LLMMessagesAppendFrame, - LLMMessagesUpdateFrame, - LLMSetToolsFrame, - LLMSetToolChoiceFrame, - UserImageRawFrame, - BotStoppedSpeakingFrame, - ), - ): - await self.push_frame(frame, direction) - else: - await super().process_frame(frame, direction) - - async def handle_function_call_result(self, frame: FunctionCallResultFrame): - """Handle function call results for AWS Nova Sonic. - - Args: - frame: The function call result frame to handle. - """ - await super().handle_function_call_result(frame) - - # The standard function callback code path pushes the FunctionCallResultFrame from the LLM - # itself, so we didn't have a chance to add the result to the AWS Nova Sonic server-side - # context. Let's push a special frame to do that. - await self.push_frame( - AWSNovaSonicFunctionCallResultFrame(result_frame=frame), FrameDirection.UPSTREAM - ) - - -@dataclass -class AWSNovaSonicContextAggregatorPair: - """Pair of user and assistant context aggregators for AWS Nova Sonic. - - .. deprecated:: 0.0.99 - `AWSNovaSonicContextAggregatorPair` is deprecated and will be removed in a future version. - Use the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. - - Parameters: - _user: The user context aggregator. - _assistant: The assistant context aggregator. - """ - - # Aggregators handle deprecation warnings - _user: AWSNovaSonicUserContextAggregator - _assistant: AWSNovaSonicAssistantContextAggregator - - def user(self) -> AWSNovaSonicUserContextAggregator: - """Get the user context aggregator. - - Returns: - The user context aggregator instance. - """ - return self._user - - def assistant(self) -> AWSNovaSonicAssistantContextAggregator: - """Get the assistant context aggregator. - - Returns: - The assistant context aggregator instance. - """ - return self._assistant diff --git a/src/pipecat/services/aws/nova_sonic/frames.py b/src/pipecat/services/aws/nova_sonic/frames.py deleted file mode 100644 index f392eba7f..000000000 --- a/src/pipecat/services/aws/nova_sonic/frames.py +++ /dev/null @@ -1,25 +0,0 @@ -# -# Copyright (c) 2024-2026, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - -"""Custom frames for AWS Nova Sonic LLM service.""" - -from dataclasses import dataclass - -from pipecat.frames.frames import DataFrame, FunctionCallResultFrame - - -@dataclass -class AWSNovaSonicFunctionCallResultFrame(DataFrame): - """Frame containing function call result for AWS Nova Sonic processing. - - This frame wraps a standard function call result frame to enable - AWS Nova Sonic-specific handling and context updates. - - Parameters: - result_frame: The underlying function call result frame. - """ - - result_frame: FunctionCallResultFrame diff --git a/src/pipecat/services/aws/nova_sonic/llm.py b/src/pipecat/services/aws/nova_sonic/llm.py index 3541946c7..545c42b1c 100644 --- a/src/pipecat/services/aws/nova_sonic/llm.py +++ b/src/pipecat/services/aws/nova_sonic/llm.py @@ -49,15 +49,7 @@ from pipecat.frames.frames import ( UserStoppedSpeakingFrame, ) from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.llm_response import ( - LLMAssistantAggregatorParams, - LLMUserAggregatorParams, -) from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair -from pipecat.processors.aggregators.openai_llm_context import ( - OpenAILLMContext, - OpenAILLMContextFrame, -) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.llm_service import LLMService from pipecat.services.settings import NOT_GIVEN, LLMSettings, _NotGiven @@ -531,12 +523,8 @@ class AWSNovaSonicLLMService(LLMService): """ await super().process_frame(frame, direction) - if isinstance(frame, (LLMContextFrame, OpenAILLMContextFrame)): - context = ( - frame.context - if isinstance(frame, LLMContextFrame) - else LLMContext.from_openai_context(frame.context) - ) + if isinstance(frame, LLMContextFrame): + context = frame.context await self._handle_context(context) elif isinstance(frame, InputAudioRawFrame): await self._handle_input_audio_frame(frame) @@ -1353,44 +1341,6 @@ class AWSNovaSonicLLMService(LLMService): # We're no longer waiting for a trigger transcription self._waiting_for_trigger_transcription = False - # - # context - # - - def create_context_aggregator( - self, - context: OpenAILLMContext, - *, - user_params: LLMUserAggregatorParams = LLMUserAggregatorParams(), - assistant_params: LLMAssistantAggregatorParams = LLMAssistantAggregatorParams(), - ) -> LLMContextAggregatorPair: - """Create context aggregator pair for managing conversation context. - - NOTE: this method exists only for backward compatibility. New code - should instead do:: - - context = LLMContext(...) - context_aggregator = LLMContextAggregatorPair(context) - - Args: - context: The OpenAI LLM context. - user_params: Parameters for the user context aggregator. - assistant_params: Parameters for the assistant context aggregator. - - Returns: - A pair of user and assistant context aggregators. - - .. deprecated:: 0.0.99 - `create_context_aggregator()` is deprecated and will be removed in a future version. - Use the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. - """ - # from_openai_context handles deprecation warning - context = LLMContext.from_openai_context(context) - return LLMContextAggregatorPair( - context, user_params=user_params, assistant_params=assistant_params - ) - # # assistant response trigger # HACK: only needed for the older Nova Sonic (as opposed to Nova 2 Sonic) model diff --git a/src/pipecat/services/aws_nova_sonic/context.py b/src/pipecat/services/aws_nova_sonic/context.py deleted file mode 100644 index 01f152b16..000000000 --- a/src/pipecat/services/aws_nova_sonic/context.py +++ /dev/null @@ -1,21 +0,0 @@ -# -# Copyright (c) 2024-2026, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - -"""Context management for AWS Nova Sonic LLM service. - -This module provides specialized context aggregators and message handling for AWS Nova Sonic, -including conversation history management and role-specific message processing. - -.. deprecated:: 0.0.91 - AWS Nova Sonic no longer uses types from this module under the hood. - It now uses `LLMContext` and `LLMContextAggregatorPair`. - Using the new patterns should allow you to not need types from this module. - - See deprecation warning in pipecat.services.aws.nova_sonic.context for more - details. -""" - -from pipecat.services.aws.nova_sonic.context import * diff --git a/src/pipecat/services/aws_nova_sonic/frames.py b/src/pipecat/services/aws_nova_sonic/frames.py deleted file mode 100644 index 7ac81215c..000000000 --- a/src/pipecat/services/aws_nova_sonic/frames.py +++ /dev/null @@ -1,21 +0,0 @@ -# -# Copyright (c) 2024-2026, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - -"""Custom frames for AWS Nova Sonic LLM service.""" - -import warnings - -from pipecat.services.aws.nova_sonic.frames import * - -with warnings.catch_warnings(): - warnings.simplefilter("always") - warnings.warn( - "Types in pipecat.services.aws_nova_sonic.frames are deprecated. " - "Please use the equivalent types from " - "pipecat.services.aws.nova_sonic.frames instead.", - DeprecationWarning, - stacklevel=2, - ) diff --git a/src/pipecat/services/gemini_multimodal_live/__init__.py b/src/pipecat/services/gemini_multimodal_live/__init__.py deleted file mode 100644 index ac4524606..000000000 --- a/src/pipecat/services/gemini_multimodal_live/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -from .file_api import GeminiFileAPI -from .gemini import GeminiMultimodalLiveLLMService - -__all__ = [ - "GeminiFileAPI", - "GeminiMultimodalLiveLLMService", -] diff --git a/src/pipecat/services/gemini_multimodal_live/events.py b/src/pipecat/services/gemini_multimodal_live/events.py deleted file mode 100644 index a560a0b02..000000000 --- a/src/pipecat/services/gemini_multimodal_live/events.py +++ /dev/null @@ -1,44 +0,0 @@ -# -# Copyright (c) 2024-2026, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - -"""Event models and utilities for Google Gemini Multimodal Live API. - -.. deprecated:: 0.0.90 - Importing StartSensitivity and EndSensitivity from this module is deprecated. - Import them directly from google.genai.types instead. -""" - -import warnings - -from loguru import logger - -try: - from google.genai.types import ( - EndSensitivity as _EndSensitivity, - ) - from google.genai.types import ( - StartSensitivity as _StartSensitivity, - ) -except ModuleNotFoundError as e: - logger.error(f"Exception: {e}") - logger.error("In order to use Google AI, you need to `pip install pipecat-ai[google]`.") - raise Exception(f"Missing module: {e}") - -# These aliases are just here for backward compatibility, since we used to -# define public-facing StartSensitivity and EndSensitivity enums in this -# module. -with warnings.catch_warnings(): - warnings.simplefilter("always") - warnings.warn( - "Importing StartSensitivity and EndSensitivity from " - "pipecat.services.gemini_multimodal_live.events is deprecated. " - "Please import them directly from google.genai.types instead.", - DeprecationWarning, - stacklevel=2, - ) - -StartSensitivity = _StartSensitivity -EndSensitivity = _EndSensitivity diff --git a/src/pipecat/services/gemini_multimodal_live/file_api.py b/src/pipecat/services/gemini_multimodal_live/file_api.py deleted file mode 100644 index 02df16ded..000000000 --- a/src/pipecat/services/gemini_multimodal_live/file_api.py +++ /dev/null @@ -1,39 +0,0 @@ -# -# Copyright (c) 2024-2026, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - -"""Gemini File API client for uploading and managing files. - -This module provides a client for Google's Gemini File API, enabling file -uploads, metadata retrieval, listing, and deletion. Files uploaded through -this API can be referenced in Gemini generative model calls. - -.. deprecated:: 0.0.90 - Importing GeminiFileAPI from this module is deprecated. - Import it from pipecat.services.google.gemini_live.file_api instead. -""" - -import warnings - -from loguru import logger - -try: - from pipecat.services.google.gemini_live.file_api import GeminiFileAPI as _GeminiFileAPI -except ModuleNotFoundError as e: - logger.error(f"Exception: {e}") - logger.error("In order to use Google AI, you need to `pip install pipecat-ai[google]`.") - raise Exception(f"Missing module: {e}") - -# These aliases are just here for backward compatibility, since we used to -# define public-facing StartSensitivity and EndSensitivity enums in this -# module. -warnings.warn( - "Importing GeminiFileAPI from " - "pipecat.services.gemini_multimodal_live.file_api is deprecated. " - "Please import it from pipecat.services.google.gemini_live.file_api instead.", - DeprecationWarning, - stacklevel=2, -) -GeminiFileAPI = _GeminiFileAPI diff --git a/src/pipecat/services/gemini_multimodal_live/gemini.py b/src/pipecat/services/gemini_multimodal_live/gemini.py deleted file mode 100644 index 6f01e5a23..000000000 --- a/src/pipecat/services/gemini_multimodal_live/gemini.py +++ /dev/null @@ -1,57 +0,0 @@ -# -# Copyright (c) 2024-2026, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - -"""Google Gemini Live API service implementation. - -This module provides real-time conversational AI capabilities using Google's -Gemini Live API, supporting both text and audio modalities with -voice transcription, streaming responses, and tool usage. - -.. deprecated:: 0.0.90 - This module is deprecated. Please use the equivalent types from - pipecat.services.google.gemini_live.llm instead. Note that the new type names - do not include 'Multimodal'. -""" - -import warnings - -from pipecat.services.google.gemini_live.llm import ( - ContextWindowCompressionParams as _ContextWindowCompressionParams, -) -from pipecat.services.google.gemini_live.llm import ( - GeminiLiveAssistantContextAggregator, - GeminiLiveContext, - GeminiLiveContextAggregatorPair, - GeminiLiveLLMService, - GeminiLiveUserContextAggregator, - GeminiModalities, -) -from pipecat.services.google.gemini_live.llm import GeminiMediaResolution as _GeminiMediaResolution -from pipecat.services.google.gemini_live.llm import GeminiVADParams as _GeminiVADParams -from pipecat.services.google.gemini_live.llm import InputParams as _InputParams - -with warnings.catch_warnings(): - warnings.simplefilter("always") - warnings.warn( - "Types in pipecat.services.gemini_multimodal_live.gemini are deprecated. " - "Please use the equivalent types from " - "pipecat.services.google.gemini_live.llm instead. Note that the new type " - "names do not include 'Multimodal' " - "(e.g. `GeminiMultimodalLiveLLMService` is now `GeminiLiveLLMService`).", - DeprecationWarning, - stacklevel=2, - ) - -GeminiMultimodalLiveContext = GeminiLiveContext -GeminiMultimodalLiveUserContextAggregator = GeminiLiveUserContextAggregator -GeminiMultimodalLiveAssistantContextAggregator = GeminiLiveAssistantContextAggregator -GeminiMultimodalLiveContextAggregatorPair = GeminiLiveContextAggregatorPair -GeminiMultimodalModalities = GeminiModalities -GeminiMediaResolution = _GeminiMediaResolution -GeminiVADParams = _GeminiVADParams -ContextWindowCompressionParams = _ContextWindowCompressionParams -InputParams = _InputParams -GeminiMultimodalLiveLLMService = GeminiLiveLLMService diff --git a/src/pipecat/services/google/gemini_live/llm.py b/src/pipecat/services/google/gemini_live/llm.py index 846a614a2..1bd7174b2 100644 --- a/src/pipecat/services/google/gemini_live/llm.py +++ b/src/pipecat/services/google/gemini_live/llm.py @@ -59,23 +59,11 @@ from pipecat.frames.frames import ( ) from pipecat.metrics.metrics import LLMTokenUsage from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.llm_response import ( - LLMAssistantAggregatorParams, - LLMUserAggregatorParams, -) from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair -from pipecat.processors.aggregators.openai_llm_context import ( - OpenAILLMContext, - OpenAILLMContextFrame, -) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.google.frames import LLMSearchOrigin, LLMSearchResponseFrame, LLMSearchResult from pipecat.services.google.utils import update_google_client_http_options from pipecat.services.llm_service import FunctionCallFromLLM, LLMService -from pipecat.services.openai.llm import ( - OpenAIAssistantContextAggregator, - OpenAIUserContextAggregator, -) from pipecat.services.settings import NOT_GIVEN, LLMSettings, _NotGiven from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.string import match_endofsentence @@ -224,274 +212,6 @@ def language_to_gemini_language(language: Language) -> Optional[str]: return resolve_language(language, LANGUAGE_MAP, use_base_code=False) -class GeminiLiveContext(OpenAILLMContext): - """Extended OpenAI context for Gemini Live API. - - Provides Gemini-specific context management including system instruction - extraction and message format conversion for the Live API. - - .. deprecated:: 0.0.92 - Gemini Live no longer uses `GeminiLiveContext` under the hood. - It now uses `LLMContext`. - """ - - @staticmethod - def upgrade(obj: OpenAILLMContext) -> "GeminiLiveContext": - """Upgrade an OpenAI context to Gemini context. - - Args: - obj: The OpenAI context to upgrade. - - Returns: - The upgraded Gemini context instance. - """ - # This warning is here rather than `__init__` since `upgrade()` was the - # "main" way that GeminiLiveContext instances were created. - # Almost no users should be seeing this message anyway, as - # GeminiLiveContext instances were typically created under the hood: - # the user would pass an OpenAILLMContext instance, which would be - # upgraded without them necessarily knowing. - with warnings.catch_warnings(): - warnings.simplefilter("always") - warnings.warn( - "GeminiLiveContext is deprecated. " - "Gemini Live no longer uses GeminiLiveContext under the hood. " - "It now uses LLMContext.", - DeprecationWarning, - stacklevel=2, - ) - - if isinstance(obj, OpenAILLMContext) and not isinstance(obj, GeminiLiveContext): - logger.debug(f"Upgrading to Gemini Live Context: {obj}") - obj.__class__ = GeminiLiveContext - obj._restructure_from_openai_messages() - return obj - - def _restructure_from_openai_messages(self): - pass - - def extract_system_instructions(self): - """Extract system instructions from context messages. - - Returns: - Combined system instruction text from all system messages. - """ - system_instruction = "" - for item in self.messages: - if item.get("role") == "system": - content = item.get("content", "") - if content: - if system_instruction and not system_instruction.endswith("\n"): - system_instruction += "\n" - system_instruction += str(content) - return system_instruction - - def add_file_reference(self, file_uri: str, mime_type: str, text: Optional[str] = None): - """Add a file reference to the context. - - This adds a user message with a file reference that will be sent during context initialization. - - Args: - file_uri: URI of the uploaded file - mime_type: MIME type of the file - text: Optional text prompt to accompany the file - """ - # Create parts list with file reference - parts = [] - if text: - parts.append({"type": "text", "text": text}) - - # Add file reference part - parts.append( - {"type": "file_data", "file_data": {"mime_type": mime_type, "file_uri": file_uri}} - ) - - # Add to messages - message = {"role": "user", "content": parts} - self.messages.append(message) - logger.info(f"Added file reference to context: {file_uri}") - - def get_messages_for_initializing_history(self) -> List[Content]: - """Get messages formatted for Gemini history initialization. - - Returns: - List of messages in Gemini format for conversation history. - """ - messages: List[Content] = [] - for item in self.messages: - role = item.get("role") - - if role == "system": - continue - - elif role == "assistant": - role = "model" - - content = item.get("content") - parts: List[Part] = [] - if isinstance(content, str): - parts = [Part(text=content)] - elif isinstance(content, list): - for part in content: - if part.get("type") == "text": - parts.append(Part(text=part.get("text"))) - elif part.get("type") == "file_data": - file_data = part.get("file_data", {}) - parts.append( - Part( - file_data=FileData( - mime_type=file_data.get("mime_type"), - file_uri=file_data.get("file_uri"), - ) - ) - ) - else: - logger.warning(f"Unsupported content type: {str(part)[:80]}") - else: - logger.warning(f"Unsupported content type: {str(content)[:80]}") - messages.append(Content(role=role, parts=parts)) - return messages - - -class GeminiLiveUserContextAggregator(OpenAIUserContextAggregator): - """User context aggregator for Gemini Live. - - Extends OpenAI user aggregator to handle Gemini-specific message passing - while maintaining compatibility with the standard aggregation pipeline. - - .. deprecated:: 0.0.92 - Gemini Live no longer expects a `GeminiLiveUserContextAggregator`. - It now expects a `LLMUserAggregator`. - """ - - def __init__(self, *args, **kwargs): - """Initialize Gemini Live user context aggregator.""" - # Almost no users should be seeing this message, as - # `GeminiLiveUserContextAggregator`` instances were typically created - # under the hood, as part of `llm.create_context_aggregator()`. - with warnings.catch_warnings(): - warnings.simplefilter("always") - warnings.warn( - "GeminiLiveUserContextAggregator is deprecated. " - "Gemini Live no longer expects a GeminiLiveUserContextAggregator. " - "It now expects a LLMUserAggregator.", - DeprecationWarning, - stacklevel=2, - ) - super().__init__(*args, **kwargs) - - async def process_frame(self, frame, direction): - """Process incoming frames for user context aggregation. - - Args: - frame: The frame to process. - direction: The frame processing direction. - """ - await super().process_frame(frame, direction) - # kind of a hack just to pass the LLMMessagesAppendFrame through, but it's fine for now - if isinstance(frame, LLMMessagesAppendFrame): - await self.push_frame(frame, direction) - - -class GeminiLiveAssistantContextAggregator(OpenAIAssistantContextAggregator): - """Assistant context aggregator for Gemini Live. - - Handles assistant response aggregation while filtering out LLMTextFrames - to prevent duplicate context entries, as Gemini Live pushes both - LLMTextFrames and TTSTextFrames. - - .. deprecated:: 0.0.92 - Gemini Live no longer uses `GeminiLiveAssistantContextAggregator` under the hood. - It now uses `LLMAssistantAggregator`. - """ - - def __init__(self, *args, **kwargs): - """Initialize Gemini Live assistant context aggregator.""" - # Almost no users should be seeing this message, as - # `GeminiLiveAssistantContextAggregator` instances were typically - # created under the hood, as part of `llm.create_context_aggregator()`. - with warnings.catch_warnings(): - warnings.simplefilter("always") - warnings.warn( - "GeminiLiveAssistantContextAggregator is deprecated. " - "Gemini Live no longer uses GeminiLiveAssistantContextAggregator under the hood. " - "It now uses LLMAssistantAggregator.", - DeprecationWarning, - stacklevel=2, - ) - super().__init__(*args, **kwargs) - - async def process_frame(self, frame: Frame, direction: FrameDirection): - """Process incoming frames for assistant context aggregation. - - Args: - frame: The frame to process. - direction: The frame processing direction. - """ - # The LLMAssistantContextAggregator uses TextFrames to aggregate the LLM output, - # but the GeminiLiveAssistantContextAggregator pushes LLMTextFrames and TTSTextFrames. We - # need to override this proces_frame for LLMTextFrame, so that only the TTSTextFrames - # are process. This ensures that the context gets only one set of messages. - if not isinstance(frame, LLMTextFrame): - await super().process_frame(frame, direction) - - async def handle_user_image_frame(self, frame: UserImageRawFrame): - """Handle user image frames. - - Args: - frame: The user image frame to handle. - """ - # We don't want to store any images in the context. Revisit this later - # when the API evolves. - pass - - -@dataclass -class GeminiLiveContextAggregatorPair: - """Pair of user and assistant context aggregators for Gemini Live. - - .. deprecated:: 0.0.92 - `GeminiLiveContextAggregatorPair` is deprecated. - Use `LLMContextAggregatorPair` instead. - - Parameters: - _user: The user context aggregator instance. - _assistant: The assistant context aggregator instance. - """ - - _user: GeminiLiveUserContextAggregator - _assistant: GeminiLiveAssistantContextAggregator - - def __post_init__(self): - # Almost no users should be seeing this message, as - # `GeminiLiveContextAggregatorPair` instances were typically created - # under the hood, with `llm.create_context_aggregator()`. - with warnings.catch_warnings(): - warnings.simplefilter("always") - warnings.warn( - "GeminiLiveContextAggregatorPair is deprecated. " - "Use LLMContextAggregatorPair instead.", - DeprecationWarning, - stacklevel=2, - ) - - def user(self) -> GeminiLiveUserContextAggregator: - """Get the user context aggregator. - - Returns: - The user context aggregator instance. - """ - return self._user - - def assistant(self) -> GeminiLiveAssistantContextAggregator: - """Get the assistant context aggregator. - - Returns: - The assistant context aggregator instance. - """ - return self._assistant - - class GeminiModalities(Enum): """Supported modalities for Gemini Live. @@ -945,23 +665,6 @@ class GeminiLiveLLMService(LLMService): self._settings.language = self._language_code logger.info(f"Set Gemini language to: {self._language_code}") - async def set_context(self, context: OpenAILLMContext): - """Set the context explicitly from outside the pipeline. - - This is useful when initializing a conversation because in server-side VAD mode we might not have a - way to trigger the pipeline. This sends the history to the server. The `inference_on_context_initialization` - flag controls whether to set the turnComplete flag when we do this. Without that flag, the model will - not respond. This is often what we want when setting the context at the beginning of a conversation. - - Args: - context: The OpenAI LLM context to set. - """ - if self._context: - logger.error("Context already set. Can only set up Gemini Live context once.") - return - self._context = GeminiLiveContext.upgrade(context) - await self._create_initial_response() - # # standard AIService frame handling # @@ -1053,13 +756,8 @@ class GeminiLiveLLMService(LLMService): if isinstance(frame, TranscriptionFrame): await self.push_frame(frame, direction) - elif isinstance(frame, (LLMContextFrame, OpenAILLMContextFrame)): - context = ( - frame.context - if isinstance(frame, LLMContextFrame) - else LLMContext.from_openai_context(frame.context) - ) - await self._handle_context(context) + elif isinstance(frame, LLMContextFrame): + await self._handle_context(frame.context) elif isinstance(frame, InputTextRawFrame): await self._send_user_text(frame.text) await self.push_frame(frame, direction) @@ -2078,40 +1776,3 @@ class GeminiLiveLLMService(LLMService): # cost/stability implications for a service cluster, let's just treat a # send-side error as fatal. await self.push_error(error_msg=f"Send error: {error}") - - def create_context_aggregator( - self, - context: OpenAILLMContext, - *, - user_params: LLMUserAggregatorParams = LLMUserAggregatorParams(), - assistant_params: LLMAssistantAggregatorParams = LLMAssistantAggregatorParams(), - ) -> LLMContextAggregatorPair: - """Create an instance of GeminiLiveContextAggregatorPair from an OpenAILLMContext. - - Constructor keyword arguments for both the user and assistant aggregators can be provided. - - NOTE: this method exists only for backward compatibility. New code - should instead do:: - - context = LLMContext(...) - context_aggregator = LLMContextAggregatorPair(context) - - Args: - context: The LLM context to use. - user_params: User aggregator parameters. Defaults to LLMUserAggregatorParams(). - assistant_params: Assistant aggregator parameters. Defaults to LLMAssistantAggregatorParams(). - - Returns: - A pair of user and assistant context aggregators. - - .. deprecated:: 0.0.99 - `create_context_aggregator()` is deprecated and will be removed in a future version. - Use the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. - """ - # from_openai_context handles deprecation warning - context = LLMContext.from_openai_context(context) - assistant_params.expect_stripped_words = False - return LLMContextAggregatorPair( - context, user_params=user_params, assistant_params=assistant_params - ) diff --git a/src/pipecat/services/google/llm.py b/src/pipecat/services/google/llm.py index c8d54830e..7af1754a6 100644 --- a/src/pipecat/services/google/llm.py +++ b/src/pipecat/services/google/llm.py @@ -34,29 +34,16 @@ from pipecat.frames.frames import ( LLMFullResponseEndFrame, LLMFullResponseStartFrame, LLMMessagesAppendFrame, - LLMMessagesFrame, LLMThoughtEndFrame, LLMThoughtStartFrame, LLMThoughtTextFrame, ) from pipecat.metrics.metrics import LLMTokenUsage from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.llm_response import ( - LLMAssistantAggregatorParams, - LLMUserAggregatorParams, -) -from pipecat.processors.aggregators.openai_llm_context import ( - OpenAILLMContext, - OpenAILLMContextFrame, -) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.google.frames import LLMSearchResponseFrame from pipecat.services.google.utils import update_google_client_http_options from pipecat.services.llm_service import FunctionCallFromLLM, LLMService -from pipecat.services.openai.llm import ( - OpenAIAssistantContextAggregator, - OpenAIUserContextAggregator, -) from pipecat.services.settings import ( NOT_GIVEN, LLMSettings, @@ -90,595 +77,6 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") -class GoogleUserContextAggregator(OpenAIUserContextAggregator): - """Google-specific user context aggregator. - - Extends OpenAI user context aggregator to handle Google AI's specific - Content and Part message format for user messages. - - .. deprecated:: 0.0.99 - `OpenAIUserContextAggregator` is deprecated and will be removed in a future version. - Use the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. - """ - - # Super handles deprecation warning - - async def handle_aggregation(self, aggregation: str): - """Add the aggregated user text to the context as a Google Content message. - - Args: - aggregation: The aggregated user text to add as a user message. - """ - self._context.add_message(Content(role="user", parts=[Part(text=aggregation)])) - - -class GoogleAssistantContextAggregator(OpenAIAssistantContextAggregator): - """Google-specific assistant context aggregator. - - Extends OpenAI assistant context aggregator to handle Google AI's specific - Content and Part message format for assistant responses and function calls. - - .. deprecated:: 0.0.99 - `GoogleAssistantContextAggregator` is deprecated and will be removed in a future version. - Use the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. - """ - - # Super handles deprecation warning - - async def handle_aggregation(self, aggregation: str): - """Handle aggregated assistant text response. - - Args: - aggregation: The aggregated text response from the assistant. - """ - self._context.add_message(Content(role="model", parts=[Part(text=aggregation)])) - - async def handle_function_call_in_progress(self, frame: FunctionCallInProgressFrame): - """Handle function call in progress frame. - - Args: - frame: Frame containing function call details. - """ - self._context.add_message( - Content( - role="model", - parts=[ - Part( - function_call=FunctionCall( - id=frame.tool_call_id, name=frame.function_name, args=frame.arguments - ) - ) - ], - ) - ) - self._context.add_message( - Content( - role="user", - parts=[ - Part( - function_response=FunctionResponse( - id=frame.tool_call_id, - name=frame.function_name, - response={"response": "IN_PROGRESS"}, - ) - ) - ], - ) - ) - - async def handle_function_call_result(self, frame: FunctionCallResultFrame): - """Handle function call result frame. - - Args: - frame: Frame containing function call result. - """ - if frame.result: - await self._update_function_call_result( - frame.function_name, frame.tool_call_id, frame.result - ) - else: - await self._update_function_call_result( - frame.function_name, frame.tool_call_id, "COMPLETED" - ) - - async def handle_function_call_cancel(self, frame: FunctionCallCancelFrame): - """Handle function call cancellation frame. - - Args: - frame: Frame containing function call cancellation details. - """ - await self._update_function_call_result( - frame.function_name, frame.tool_call_id, "CANCELLED" - ) - - async def _update_function_call_result( - self, function_name: str, tool_call_id: str, result: Any - ): - for message in self._context.messages: - if message.role == "user": - for part in message.parts: - if part.function_response and part.function_response.id == tool_call_id: - part.function_response.response = { - "value": json.dumps(result, ensure_ascii=False) - } - - -@dataclass -class GoogleContextAggregatorPair: - """Pair of Google context aggregators for user and assistant messages. - - .. deprecated:: 0.0.99 - `GoogleContextAggregatorPair` is deprecated and will be removed in a future version. - Use the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. - - Parameters: - _user: User context aggregator for handling user messages. - _assistant: Assistant context aggregator for handling assistant responses. - """ - - # Aggregators handle deprecation warnings - _user: GoogleUserContextAggregator - _assistant: GoogleAssistantContextAggregator - - def user(self) -> GoogleUserContextAggregator: - """Get the user context aggregator. - - Returns: - The user context aggregator instance. - """ - return self._user - - def assistant(self) -> GoogleAssistantContextAggregator: - """Get the assistant context aggregator. - - Returns: - The assistant context aggregator instance. - """ - return self._assistant - - -class GoogleLLMContext(OpenAILLMContext): - """Google AI LLM context that extends OpenAI context for Google-specific formatting. - - This class handles conversion between OpenAI-style messages and Google AI's - Content/Part format, including system messages, function calls, and media. - - .. deprecated:: 0.0.99 - `GoogleLLMContext` is deprecated and will be removed in a future version. - Use the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. - """ - - def __init__( - self, - messages: Optional[List[dict]] = None, - tools: Optional[List[dict]] = None, - tool_choice: Optional[dict] = None, - ): - """Initialize GoogleLLMContext. - - Args: - messages: Initial messages in OpenAI format. - tools: Available tools/functions for the model. - tool_choice: Tool choice configuration. - """ - # Super handles deprecation warning - super().__init__(messages=messages, tools=tools, tool_choice=tool_choice) - self.system_message = None - - @staticmethod - def upgrade_to_google(obj: OpenAILLMContext) -> "GoogleLLMContext": - """Upgrade an OpenAI context to a Google context. - - Args: - obj: OpenAI LLM context to upgrade. - - Returns: - GoogleLLMContext instance with converted messages. - """ - if isinstance(obj, OpenAILLMContext) and not isinstance(obj, GoogleLLMContext): - logger.debug(f"Upgrading to Google: {obj}") - obj.__class__ = GoogleLLMContext - obj._restructure_from_openai_messages() - return obj - - def set_messages(self, messages: List): - """Set messages and restructure them for Google format. - - Args: - messages: List of messages to set. - """ - self._messages[:] = messages - self._restructure_from_openai_messages() - - def add_messages(self, messages: List): - """Add messages to the context, converting to Google format as needed. - - Args: - messages: List of messages to add (can be mixed formats). - """ - # Convert each message individually - converted_messages = [] - for msg in messages: - if isinstance(msg, Content): - # Already in Gemini format - converted_messages.append(msg) - else: - # Convert from standard format to Gemini format - converted = self.from_standard_message(msg) - if converted is not None: - converted_messages.append(converted) - - # Add the converted messages to our existing messages - self._messages.extend(converted_messages) - - def get_messages_for_logging(self) -> List[Dict[str, Any]]: - """Get messages formatted for logging with sensitive data redacted. - - Returns: - List of messages in a format ready for logging. - """ - msgs = [] - for message in self.messages: - obj = message.to_json_dict() - try: - if "parts" in obj: - for part in obj["parts"]: - if "inline_data" in part: - part["inline_data"]["data"] = "..." - except Exception as e: - logger.debug(f"Error: {e}") - msgs.append(obj) - return msgs - - def add_image_frame_message( - self, *, format: str, size: tuple[int, int], image: bytes, text: str = None - ): - """Add an image message to the context. - - Args: - format: Image format (e.g., 'RGB', 'RGBA'). - size: Image dimensions as (width, height). - image: Raw image bytes. - text: Optional text to accompany the image. - """ - buffer = io.BytesIO() - Image.frombytes(format, size, image).save(buffer, format="JPEG") - - parts = [] - if text: - parts.append(Part(text=text)) - parts.append(Part(inline_data=Blob(mime_type="image/jpeg", data=buffer.getvalue()))) - - self.add_message(Content(role="user", parts=parts)) - - def add_audio_frames_message( - self, *, audio_frames: list[AudioRawFrame], text: str = "Audio follows" - ): - """Add audio frames as a message to the context. - - Args: - audio_frames: List of audio frames to add. - text: Text description of the audio content. - """ - if not audio_frames: - return - - sample_rate = audio_frames[0].sample_rate - num_channels = audio_frames[0].num_channels - - parts = [] - data = b"".join(frame.audio for frame in audio_frames) - # NOTE(aleix): According to the docs only text or inline_data should be needed. - # (see https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference) - parts.append(Part(text=text)) - parts.append( - Part( - inline_data=Blob( - mime_type="audio/wav", - data=( - bytes( - self.create_wav_header(sample_rate, num_channels, 16, len(data)) + data - ) - ), - ) - ), - ) - self.add_message(Content(role="user", parts=parts)) - # message = {"mime_type": "audio/mp3", "data": bytes(data + create_wav_header(sample_rate, num_channels, 16, len(data)))} - # self.add_message(message) - - def from_standard_message(self, message): - """Convert standard format message to Google Content object. - - Handles conversion of text, images, and function calls to Google's format. - System messages are stored separately and return None. - - Args: - message: Message in standard format. - - Returns: - Content object with role and parts, or None for system messages. - - Examples: - Standard text message:: - - { - "role": "user", - "content": "Hello there" - } - - Converts to Google Content with:: - - Content( - role="user", - parts=[Part(text="Hello there")] - ) - - Standard function call message:: - - { - "role": "assistant", - "tool_calls": [ - { - "function": { - "name": "search", - "arguments": '{"query": "test"}' - } - } - ] - } - - Converts to Google Content with:: - - Content( - role="model", - parts=[Part(function_call=FunctionCall(name="search", args={"query": "test"}))] - ) - - System message returns None and stores content in self.system_message. - """ - role = message["role"] - content = message.get("content", []) - if role == "system": - # System instructions are returned as plain text - if isinstance(content, str): - self.system_message = content - elif isinstance(content, list): - # If content is a list, we assume it's a list of text parts, per the standard - self.system_message = " ".join( - part["text"] for part in content if part.get("type") == "text" - ) - return None - elif role == "assistant": - role = "model" - - parts = [] - if message.get("tool_calls"): - for tc in message["tool_calls"]: - parts.append( - Part( - function_call=FunctionCall( - name=tc["function"]["name"], - args=json.loads(tc["function"]["arguments"]), - ) - ) - ) - elif role == "tool": - role = "model" - try: - response = json.loads(message["content"]) - if isinstance(response, dict): - response_dict = response - else: - response_dict = {"value": response} - except Exception as e: - # Response might not be JSON-deserializable (e.g. plain text). - response_dict = {"value": message["content"]} - parts.append( - Part( - function_response=FunctionResponse( - name="tool_call_result", # seems to work to hard-code the same name every time - response=response_dict, - ) - ) - ) - elif isinstance(content, str): - parts.append(Part(text=content)) - elif isinstance(content, list): - for c in content: - if c["type"] == "text": - parts.append(Part(text=c["text"])) - elif c["type"] == "image_url": - # Extract MIME type from data URL (format: "data:image/jpeg;base64,...") - url = c["image_url"]["url"] - mime_type = ( - url.split(":")[1].split(";")[0] if url.startswith("data:") else "image/jpeg" - ) - parts.append( - Part( - inline_data=Blob( - mime_type=mime_type, - data=base64.b64decode(url.split(",")[1]), - ) - ) - ) - - message = Content(role=role, parts=parts) - return message - - def to_standard_messages(self, obj) -> list: - """Convert Google Content object to standard structured format. - - Handles text, images, and function calls from Google's Content/Part objects. - - Args: - obj: Google Content object with role and parts. - - Returns: - List containing a single message in standard format. - - Examples: - Google Content with text:: - - Content( - role="user", - parts=[Part(text="Hello")] - ) - - Converts to:: - - [ - { - "role": "user", - "content": [{"type": "text", "text": "Hello"}] - } - ] - - Google Content with function call:: - - Content( - role="model", - parts=[Part(function_call=FunctionCall(name="search", args={"q": "test"}))] - ) - - Converts to:: - - [ - { - "role": "assistant", - "tool_calls": [ - { - "id": "search", - "type": "function", - "function": { - "name": "search", - "arguments": '{"q": "test"}' - } - } - ] - } - ] - - Google Content with image:: - - Content( - role="user", - parts=[Part(inline_data=Blob(mime_type="image/jpeg", data=bytes_data))] - ) - - Converts to:: - - [ - { - "role": "user", - "content": [ - { - "type": "image_url", - "image_url": {"url": "data:image/jpeg;base64,"} - } - ] - } - ] - """ - msg = {"role": obj.role, "content": []} - if msg["role"] == "model": - msg["role"] = "assistant" - - for part in obj.parts: - if part.text: - msg["content"].append({"type": "text", "text": part.text}) - elif part.inline_data: - encoded = base64.b64encode(part.inline_data.data).decode("utf-8") - msg["content"].append( - { - "type": "image_url", - "image_url": {"url": f"data:{part.inline_data.mime_type};base64,{encoded}"}, - } - ) - elif part.function_call: - args = part.function_call.args if hasattr(part.function_call, "args") else {} - msg["tool_calls"] = [ - { - "id": part.function_call.name, - "type": "function", - "function": { - "name": part.function_call.name, - "arguments": json.dumps(args), - }, - } - ] - - elif part.function_response: - msg["role"] = "tool" - resp = ( - part.function_response.response - if hasattr(part.function_response, "response") - else {} - ) - msg["tool_call_id"] = part.function_response.name - msg["content"] = json.dumps(resp) - - # there might be no content parts for tool_calls messages - if not msg["content"]: - del msg["content"] - return [msg] - - def _restructure_from_openai_messages(self): - """Restructures messages to ensure proper Google format and message ordering. - - This method handles conversion of OpenAI-formatted messages to Google format, - with special handling for function calls, function responses, and system messages. - System messages are added back to the context as user messages when needed. - - The final message order is preserved as: - 1. Function calls (from model) - 2. Function responses (from user) - 3. Text messages (converted from system messages) - - Note: - System messages are only added back when there are no regular text - messages in the context, ensuring proper conversation continuity - after function calls. - """ - self.system_message = None - converted_messages = [] - - # Process each message, preserving Google-formatted messages and converting others - for message in self._messages: - if isinstance(message, Content): - # Keep existing Google-formatted messages (e.g., function calls/responses) - converted_messages.append(message) - continue - - # Convert OpenAI format to Google format, system messages return None - converted = self.from_standard_message(message) - if converted is not None: - converted_messages.append(converted) - - # Update message list - self._messages[:] = converted_messages - - # Check if we only have function-related messages (no regular text) - has_regular_messages = any( - len(msg.parts) == 1 - and getattr(msg.parts[0], "text", None) - and not getattr(msg.parts[0], "function_call", None) - and not getattr(msg.parts[0], "function_response", None) - for msg in self._messages - ) - - # Add system message back as a user message if we only have function messages - if self.system_message and not has_regular_messages: - self._messages.append(Content(role="user", parts=[Part(text=self.system_message)])) - - # Remove any empty messages - self._messages = [m for m in self._messages if m.parts] - - class GoogleThinkingConfig(BaseModel): """Configuration for controlling the model's internal "thinking" process used before generating a response. @@ -741,8 +139,7 @@ class GoogleLLMService(LLMService): """Google AI (Gemini) LLM service implementation. This class implements inference with Google's AI models, translating internally - from an OpenAILLMContext or a universal LLMContext to the messages format - expected by the Google AI model. + from an LLMContext to the messages format expected by the Google AI model. """ Settings = GoogleLLMSettings @@ -885,7 +282,7 @@ class GoogleLLMService(LLMService): async def run_inference( self, - context: LLMContext | OpenAILLMContext, + context: LLMContext, max_tokens: Optional[int] = None, system_instruction: Optional[str] = None, ) -> Optional[str]: @@ -905,19 +302,13 @@ class GoogleLLMService(LLMService): system = [] tools = [] effective_instruction = system_instruction or self._settings.system_instruction - if isinstance(context, LLMContext): - adapter = self.get_llm_adapter() - params: GeminiLLMInvocationParams = adapter.get_llm_invocation_params( - context, system_instruction=effective_instruction - ) - messages = params["messages"] - system = params["system_instruction"] - tools = params["tools"] - else: - context = GoogleLLMContext.upgrade_to_google(context) - messages = context.messages - system = getattr(context, "system_message", None) - tools = context.tools or [] + adapter = self.get_llm_adapter() + params: GeminiLLMInvocationParams = adapter.get_llm_invocation_params( + context, system_instruction=effective_instruction + ) + messages = params["messages"] + system = params["system_instruction"] + tools = params["tools"] # Build generation config using the same method as streaming generation_params = self._build_generation_params( @@ -1004,17 +395,24 @@ class GoogleLLMService(LLMService): except Exception as e: logger.error(f"Failed to unset thinking budget: {e}") - async def _stream_content( - self, params_from_context: GeminiLLMInvocationParams - ) -> AsyncIterator[GenerateContentResponse]: - messages = params_from_context["messages"] + async def _stream_content(self, context: LLMContext) -> AsyncIterator[GenerateContentResponse]: + adapter = self.get_llm_adapter() + params: GeminiLLMInvocationParams = adapter.get_llm_invocation_params( + context, system_instruction=self._settings.system_instruction + ) + + logger.debug( + f"{self}: Generating chat from context [{params['system_instruction']}] | {adapter.get_messages_for_logging(context)}" + ) + + messages = params["messages"] # The adapter already resolved system_instruction vs context system message. - system_instruction = params_from_context["system_instruction"] + system_instruction = params["system_instruction"] tools = [] - if params_from_context["tools"]: - tools = params_from_context["tools"] + if params["tools"]: + tools = params["tools"] elif self._tools: tools = self._tools tool_config = None @@ -1040,37 +438,8 @@ class GoogleLLMService(LLMService): config=generation_config, ) - async def _stream_content_specific_context( - self, context: OpenAILLMContext - ) -> AsyncIterator[GenerateContentResponse]: - logger.debug( - f"{self}: Generating chat from LLM-specific context [{context.system_message}] | {context.get_messages_for_logging()}" - ) - - params = GeminiLLMInvocationParams( - messages=context.messages, - system_instruction=context.system_message, - tools=context.tools, - ) - - return await self._stream_content(params) - - async def _stream_content_universal_context( - self, context: LLMContext - ) -> AsyncIterator[GenerateContentResponse]: - adapter = self.get_llm_adapter() - params: GeminiLLMInvocationParams = adapter.get_llm_invocation_params( - context, system_instruction=self._settings.system_instruction - ) - - logger.debug( - f"{self}: Generating chat from universal context [{params['system_instruction']}] | {adapter.get_messages_for_logging(context)}" - ) - - return await self._stream_content(params) - @traced_llm - async def _process_context(self, context: OpenAILLMContext | LLMContext): + async def _process_context(self, context: LLMContext): await self.push_frame(LLMFullResponseStartFrame()) prompt_tokens = 0 @@ -1083,12 +452,8 @@ class GoogleLLMService(LLMService): accumulated_text = "" try: - # Generate content using either OpenAILLMContext or universal LLMContext - response = await ( - self._stream_content_specific_context(context) - if isinstance(context, OpenAILLMContext) - else self._stream_content_universal_context(context) - ) + # Generate content from LLMContext + response = await self._stream_content(context) function_calls = [] async for chunk in response: @@ -1274,15 +639,8 @@ class GoogleLLMService(LLMService): context = None - if isinstance(frame, OpenAILLMContextFrame): - context = GoogleLLMContext.upgrade_to_google(frame.context) - elif isinstance(frame, LLMContextFrame): - # Handle universal (LLM-agnostic) LLM context frames + if isinstance(frame, LLMContextFrame): context = frame.context - elif isinstance(frame, LLMMessagesFrame): - # NOTE: LLMMessagesFrame is deprecated, so we don't support the newer universal - # LLMContext with it - context = GoogleLLMContext(frame.messages) else: await self.push_frame(frame, direction) @@ -1305,41 +663,3 @@ class GoogleLLMService(LLMService): except Exception: # Do nothing - we're shutting down anyway pass - - def create_context_aggregator( - self, - context: OpenAILLMContext, - *, - user_params: LLMUserAggregatorParams = LLMUserAggregatorParams(), - assistant_params: LLMAssistantAggregatorParams = LLMAssistantAggregatorParams(), - ) -> GoogleContextAggregatorPair: - """Create Google-specific context aggregators. - - Creates a pair of context aggregators optimized for Google's message format, - including support for function calls, tool usage, and image handling. - - Args: - context: The LLM context to create aggregators for. - user_params: Parameters for user message aggregation. - assistant_params: Parameters for assistant message aggregation. - - Returns: - GoogleContextAggregatorPair: A pair of context aggregators, one for - the user and one for the assistant, encapsulated in an - GoogleContextAggregatorPair. - - .. deprecated:: 0.0.99 - `create_context_aggregator()` is deprecated and will be removed in a future version. - Use the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. - """ - context.set_llm_adapter(self.get_llm_adapter()) - - if isinstance(context, OpenAILLMContext): - context = GoogleLLMContext.upgrade_to_google(context) - - # Aggregators handle deprecation warnings - user = GoogleUserContextAggregator(context, params=user_params) - assistant = GoogleAssistantContextAggregator(context, params=assistant_params) - - return GoogleContextAggregatorPair(_user=user, _assistant=assistant) diff --git a/src/pipecat/services/google/llm_openai.py b/src/pipecat/services/google/llm_openai.py deleted file mode 100644 index f9d182e78..000000000 --- a/src/pipecat/services/google/llm_openai.py +++ /dev/null @@ -1,18 +0,0 @@ -# -# Copyright (c) 2024-2026, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - -"""Deprecated: use ``pipecat.services.google.openai.llm`` instead.""" - -import warnings - -warnings.warn( - "Module `pipecat.services.google.llm_openai` is deprecated, " - "use `pipecat.services.google.openai.llm` instead.", - DeprecationWarning, - stacklevel=2, -) - -from pipecat.services.google.openai.llm import * # noqa: E402, F401, F403 diff --git a/src/pipecat/services/google/openai/__init__.py b/src/pipecat/services/google/openai/__init__.py deleted file mode 100644 index c4d243b97..000000000 --- a/src/pipecat/services/google/openai/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# -# Copyright (c) 2024-2026, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# diff --git a/src/pipecat/services/google/openai/llm.py b/src/pipecat/services/google/openai/llm.py deleted file mode 100644 index da5d1be7a..000000000 --- a/src/pipecat/services/google/openai/llm.py +++ /dev/null @@ -1,213 +0,0 @@ -# -# Copyright (c) 2024-2026, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - -"""Google LLM service using OpenAI-compatible API format. - -This module provides integration with Google's AI LLM models using the OpenAI -API format through Google's Gemini API OpenAI compatibility layer. -""" - -import json -import os -from dataclasses import dataclass -from typing import Optional - -from openai import AsyncStream -from openai.types.chat import ChatCompletionChunk - -from pipecat.services.llm_service import FunctionCallFromLLM - -# Suppress gRPC fork warnings -os.environ["GRPC_ENABLE_FORK_SUPPORT"] = "false" - -from loguru import logger - -from pipecat.frames.frames import LLMTextFrame -from pipecat.metrics.metrics import LLMTokenUsage -from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext -from pipecat.services.openai.base_llm import BaseOpenAILLMService -from pipecat.services.openai.llm import OpenAILLMService - - -@dataclass -class GoogleOpenAILLMSettings(BaseOpenAILLMService.Settings): - """Settings for GoogleLLMOpenAIBetaService.""" - - pass - - -class GoogleLLMOpenAIBetaService(OpenAILLMService): - """Google LLM service using OpenAI-compatible API format. - - This service provides access to Google's AI LLM models (like Gemini) through - the OpenAI API format. It handles streaming responses, function calls, and - tool usage while maintaining compatibility with OpenAI's interface. - - Note: This service includes a workaround for a Google API bug where function - call indices may be incorrectly set to None, resulting in empty function names. - - .. deprecated:: 0.0.82 - GoogleLLMOpenAIBetaService is deprecated and will be removed in a future version. - Use GoogleLLMService instead for better integration with Google's native API. - - Reference: - https://ai.google.dev/gemini-api/docs/openai - """ - - Settings = GoogleOpenAILLMSettings - _settings: Settings - - def __init__( - self, - *, - api_key: str, - base_url: str = "https://generativelanguage.googleapis.com/v1beta/openai/", - model: Optional[str] = None, - settings: Optional[Settings] = None, - **kwargs, - ): - """Initialize the Google LLM service. - - Args: - api_key: Google API key for authentication. - base_url: Base URL for Google's OpenAI-compatible API. - model: Google model name to use (e.g., "gemini-2.0-flash"). - - .. deprecated:: 0.0.105 - Use ``settings=GoogleLLMOpenAIBetaService.Settings(model=...)`` instead. - - settings: Runtime-updatable settings. When provided alongside deprecated - parameters, ``settings`` values take precedence. - **kwargs: Additional arguments passed to the parent OpenAILLMService. - """ - import warnings - - with warnings.catch_warnings(): - warnings.simplefilter("always") - warnings.warn( - "GoogleLLMOpenAIBetaService is deprecated and will be removed in a future version. " - "Use GoogleLLMService instead for better integration with Google's native API.", - DeprecationWarning, - stacklevel=2, - ) - - # 1. Initialize default_settings with hardcoded defaults - default_settings = self.Settings(model="gemini-2.0-flash") - - # 2. Apply direct init arg overrides (deprecated) - if model is not None: - self._warn_init_param_moved_to_settings("model", "model") - default_settings.model = model - - # 3. (No step 3, as there's no params object to apply) - - # 4. Apply settings delta (canonical API, always wins) - if settings is not None: - default_settings.apply_update(settings) - - super().__init__(api_key=api_key, base_url=base_url, settings=default_settings, **kwargs) - - async def _process_context(self, context: OpenAILLMContext): - functions_list = [] - arguments_list = [] - tool_id_list = [] - func_idx = 0 - function_name = "" - arguments = "" - tool_call_id = "" - - await self.start_ttfb_metrics() - - chunk_stream: AsyncStream[ - ChatCompletionChunk - ] = await self._stream_chat_completions_specific_context(context) - - # Use context manager to ensure stream is closed on cancellation/exception. - # Without this, CancelledError during iteration leaves the underlying socket open. - async with chunk_stream: - async for chunk in chunk_stream: - if chunk.usage: - tokens = LLMTokenUsage( - prompt_tokens=chunk.usage.prompt_tokens or 0, - completion_tokens=chunk.usage.completion_tokens or 0, - total_tokens=chunk.usage.total_tokens or 0, - ) - await self.start_llm_usage_metrics(tokens) - - if chunk.choices is None or len(chunk.choices) == 0: - continue - - await self.stop_ttfb_metrics() - - if not chunk.choices[0].delta: - continue - - if chunk.choices[0].delta.tool_calls: - # We're streaming the LLM response to enable the fastest response times. - # For text, we just yield each chunk as we receive it and count on consumers - # to do whatever coalescing they need (eg. to pass full sentences to TTS) - # - # If the LLM is a function call, we'll do some coalescing here. - # If the response contains a function name, we'll yield a frame to tell consumers - # that they can start preparing to call the function with that name. - # We accumulate all the arguments for the rest of the streamed response, then when - # the response is done, we package up all the arguments and the function name and - # yield a frame containing the function name and the arguments. - logger.debug(f"Tool call: {chunk.choices[0].delta.tool_calls}") - tool_call = chunk.choices[0].delta.tool_calls[0] - if tool_call.index != func_idx: - functions_list.append(function_name) - arguments_list.append(arguments) - tool_id_list.append(tool_call_id) - function_name = "" - arguments = "" - tool_call_id = "" - func_idx += 1 - if tool_call.function and tool_call.function.name: - function_name += tool_call.function.name - tool_call_id = tool_call.id - if tool_call.function and tool_call.function.arguments: - # Keep iterating through the response to collect all the argument fragments - arguments += tool_call.function.arguments - elif chunk.choices[0].delta.content: - await self.push_frame(LLMTextFrame(chunk.choices[0].delta.content)) - - # if we got a function name and arguments, check to see if it's a function with - # a registered handler. If so, run the registered callback, save the result to - # the context, and re-prompt to get a chat answer. If we don't have a registered - # handler, raise an exception. - if function_name and arguments: - # added to the list as last function name and arguments not added to the list - functions_list.append(function_name) - arguments_list.append(arguments) - tool_id_list.append(tool_call_id) - - logger.debug( - f"Function list: {functions_list}, Arguments list: {arguments_list}, Tool ID list: {tool_id_list}" - ) - - function_calls = [] - for function_name, arguments, tool_id in zip( - functions_list, arguments_list, tool_id_list - ): - if function_name == "": - # TODO: Remove the _process_context method once Google resolves the bug - # where the index is incorrectly set to None instead of returning the actual index, - # which currently results in an empty function name(''). - continue - - arguments = json.loads(arguments) - - function_calls.append( - FunctionCallFromLLM( - context=context, - tool_call_id=tool_id, - function_name=function_name, - arguments=arguments, - ) - ) - - await self.run_function_calls(function_calls) diff --git a/src/pipecat/services/llm_service.py b/src/pipecat/services/llm_service.py index f2e247de9..e4f96c388 100644 --- a/src/pipecat/services/llm_service.py +++ b/src/pipecat/services/llm_service.py @@ -55,11 +55,6 @@ from pipecat.processors.aggregators.llm_context import ( LLMContext, LLMSpecificMessage, ) -from pipecat.processors.aggregators.llm_response import ( - LLMAssistantAggregatorParams, - LLMUserAggregatorParams, -) -from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.processors.frame_processor import FrameDirection from pipecat.services.ai_service import AIService from pipecat.services.settings import LLMSettings @@ -110,7 +105,7 @@ class FunctionCallParams: tool_call_id: str arguments: Mapping[str, Any] llm: "LLMService" - context: OpenAILLMContext | LLMContext + context: LLMContext result_callback: FunctionCallResultCallback @@ -153,7 +148,7 @@ class FunctionCallRunnerItem: function_name: str tool_call_id: str arguments: Mapping[str, Any] - context: OpenAILLMContext | LLMContext + context: LLMContext run_llm: Optional[bool] = None @@ -247,7 +242,7 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService): async def run_inference( self, - context: LLMContext | OpenAILLMContext, + context: LLMContext, max_tokens: Optional[int] = None, system_instruction: Optional[str] = None, ) -> Optional[str]: @@ -267,41 +262,6 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService): """ raise NotImplementedError(f"run_inference() not supported by {self.__class__.__name__}") - def create_context_aggregator( - self, - context: OpenAILLMContext, - *, - user_params: LLMUserAggregatorParams = LLMUserAggregatorParams(), - assistant_params: LLMAssistantAggregatorParams = LLMAssistantAggregatorParams(), - ) -> Any: - """Create a context aggregator for managing LLM conversation context. - - Must be implemented by subclasses. - - Args: - context: The LLM context to create an aggregator for. - user_params: Parameters for user message aggregation. - assistant_params: Parameters for assistant message aggregation. - - Returns: - A context aggregator instance. - - .. deprecated:: 0.0.99 - `create_context_aggregator()` is deprecated and will be removed in a future version. - Use the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. - """ - with warnings.catch_warnings(): - warnings.simplefilter("always") - warnings.warn( - "create_context_aggregator() is deprecated and will be removed in a future version. " - "Use the universal LLMContext and LLMContextAggregatorPair directly instead. " - "See OpenAILLMContext docstring for migration guide.", - DeprecationWarning, - stacklevel=2, - ) - pass - async def start(self, frame: StartFrame): """Start the LLM service. diff --git a/src/pipecat/services/mem0/memory.py b/src/pipecat/services/mem0/memory.py index 754e2be0a..86f37be41 100644 --- a/src/pipecat/services/mem0/memory.py +++ b/src/pipecat/services/mem0/memory.py @@ -17,12 +17,8 @@ from typing import Any, Dict, List, Optional from loguru import logger from pydantic import BaseModel, Field -from pipecat.frames.frames import Frame, LLMContextFrame, LLMMessagesFrame +from pipecat.frames.frames import Frame, LLMContextFrame from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.openai_llm_context import ( - OpenAILLMContext, - OpenAILLMContextFrame, -) from pipecat.processors.frame_processor import FrameDirection, FrameProcessor try: @@ -227,9 +223,7 @@ class Mem0MemoryService(FrameProcessor): logger.error(f"Error retrieving memories from Mem0: {e}") return [] - async def _enhance_context_with_memories( - self, context: LLMContext | OpenAILLMContext, query: str - ): + async def _enhance_context_with_memories(self, context: LLMContext, query: str): """Enhance the LLM context with relevant memories. Args: @@ -272,13 +266,9 @@ class Mem0MemoryService(FrameProcessor): await super().process_frame(frame, direction) context = None - messages = None - if isinstance(frame, (LLMContextFrame, OpenAILLMContextFrame)): + if isinstance(frame, LLMContextFrame): context = frame.context - elif isinstance(frame, LLMMessagesFrame): - messages = frame.messages - context = LLMContext(messages) if context: try: @@ -302,12 +292,8 @@ class Mem0MemoryService(FrameProcessor): # Store the conversation in Mem0 as a background task self.create_task(self._store_messages(messages_to_store), name="mem0_store") - # If we received an LLMMessagesFrame, create a new one with the enhanced messages - if messages is not None: - await self.push_frame(LLMMessagesFrame(context.get_messages())) - else: - # Otherwise, pass the enhanced context frame downstream - await self.push_frame(frame) + # Pass the enhanced context frame downstream + await self.push_frame(frame) except Exception as e: await self.push_error( error_msg=f"Error processing with Mem0: {str(e)}", exception=e diff --git a/src/pipecat/services/nvidia/llm.py b/src/pipecat/services/nvidia/llm.py index 66bbd4402..a06dfd4da 100644 --- a/src/pipecat/services/nvidia/llm.py +++ b/src/pipecat/services/nvidia/llm.py @@ -15,7 +15,6 @@ from typing import Optional from pipecat.metrics.metrics import LLMTokenUsage from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.services.openai.base_llm import BaseOpenAILLMService from pipecat.services.openai.llm import OpenAILLMService @@ -84,7 +83,7 @@ class NvidiaLLMService(OpenAILLMService): self._has_reported_prompt_tokens = False self._is_processing = False - async def _process_context(self, context: OpenAILLMContext | LLMContext): + async def _process_context(self, context: LLMContext): """Process a context through the LLM and accumulate token usage metrics. This method overrides the parent class implementation to handle NVIDIA's diff --git a/src/pipecat/services/openai/base_llm.py b/src/pipecat/services/openai/base_llm.py index c06ed3afd..d007615ba 100644 --- a/src/pipecat/services/openai/base_llm.py +++ b/src/pipecat/services/openai/base_llm.py @@ -31,15 +31,10 @@ from pipecat.frames.frames import ( LLMContextFrame, LLMFullResponseEndFrame, LLMFullResponseStartFrame, - LLMMessagesFrame, LLMTextFrame, ) from pipecat.metrics.metrics import LLMTokenUsage from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.openai_llm_context import ( - OpenAILLMContext, - OpenAILLMContextFrame, -) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.llm_service import FunctionCallFromLLM, LLMService from pipecat.services.settings import NOT_GIVEN as _NOT_GIVEN @@ -61,11 +56,10 @@ class OpenAILLMSettings(LLMSettings): class BaseOpenAILLMService(LLMService): """Base class for all services that use the AsyncOpenAI client. - This service consumes OpenAILLMContextFrame or LLMContextFrame frames, - which contain a reference to an OpenAILLMContext or LLMContext object. The - context defines what is sent to the LLM for completion, including user, - assistant, and system messages, as well as tool choices and function call - configurations. + This service consumes LLMContextFrame frames, which contain a reference to + an LLMContext object. The context defines what is sent to the LLM for + completion, including user, assistant, and system messages, as well as tool + choices and function call configurations. """ Settings = OpenAILLMSettings @@ -274,19 +268,27 @@ class BaseOpenAILLMService(LLMService): """ return self._full_model_name - async def get_chat_completions( - self, params_from_context: OpenAILLMInvocationParams - ) -> AsyncStream[ChatCompletionChunk]: + async def get_chat_completions(self, context: LLMContext) -> AsyncStream[ChatCompletionChunk]: """Get streaming chat completions from OpenAI API with optional timeout and retry. Args: - params_from_context: Parameters, derived from the LLM context, to - use for the chat completion. Contains messages, tools, and tool - choice. + context: Context to use for the chat completion. + Contains messages, tools, and tool choice. Returns: Async stream of chat completion chunks. """ + adapter = self.get_llm_adapter() + logger.debug( + f"{self}: Generating chat from context {adapter.get_messages_for_logging(context)}" + ) + + params_from_context: OpenAILLMInvocationParams = adapter.get_llm_invocation_params( + context, + system_instruction=self._settings.system_instruction, + convert_developer_to_user=not self.supports_developer_role, + ) + params = self.build_chat_completion_params(params_from_context) if self._retry_on_timeout: @@ -340,7 +342,7 @@ class BaseOpenAILLMService(LLMService): async def run_inference( self, - context: LLMContext | OpenAILLMContext, + context: LLMContext, max_tokens: Optional[int] = None, system_instruction: Optional[str] = None, ) -> Optional[str]: @@ -357,17 +359,12 @@ class BaseOpenAILLMService(LLMService): The LLM's response as a string, or None if no response is generated. """ effective_instruction = system_instruction or self._settings.system_instruction - if isinstance(context, LLMContext): - adapter = self.get_llm_adapter() - invocation_params: OpenAILLMInvocationParams = adapter.get_llm_invocation_params( - context, - system_instruction=effective_instruction, - convert_developer_to_user=not self.supports_developer_role, - ) - else: - invocation_params = OpenAILLMInvocationParams( - messages=context.messages, tools=context.tools, tool_choice=context.tool_choice - ) + adapter = self.get_llm_adapter() + invocation_params: OpenAILLMInvocationParams = adapter.get_llm_invocation_params( + context, + system_instruction=effective_instruction, + convert_developer_to_user=not self.supports_developer_role, + ) # Build params using the same method as streaming completions params = self.build_chat_completion_params(invocation_params) @@ -389,59 +386,8 @@ class BaseOpenAILLMService(LLMService): return response.choices[0].message.content - async def _stream_chat_completions_specific_context( - self, context: OpenAILLMContext - ) -> AsyncStream[ChatCompletionChunk]: - logger.debug( - f"{self}: Generating chat from LLM-specific context {context.get_messages_for_logging()}" - ) - - messages: List[ChatCompletionMessageParam] = context.get_messages() - - # base64 encode any images - for message in messages: - if message.get("mime_type") == "image/jpeg": - # Avoid .getvalue() which makes a full copy of BytesIO - raw_bytes = message["data"].read() - encoded_image = base64.b64encode(raw_bytes).decode("utf-8") - text = message.get("content", "") - message["content"] = [ - {"type": "text", "text": text}, - { - "type": "image_url", - "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"}, - }, - ] - # Explicit cleanup - del message["data"] - del message["mime_type"] - - params = OpenAILLMInvocationParams( - messages=messages, tools=context.tools, tool_choice=context.tool_choice - ) - chunks = await self.get_chat_completions(params) - - return chunks - - async def _stream_chat_completions_universal_context( - self, context: LLMContext - ) -> AsyncStream[ChatCompletionChunk]: - adapter = self.get_llm_adapter() - logger.debug( - f"{self}: Generating chat from universal context {adapter.get_messages_for_logging(context)}" - ) - - params: OpenAILLMInvocationParams = adapter.get_llm_invocation_params( - context, - system_instruction=self._settings.system_instruction, - convert_developer_to_user=not self.supports_developer_role, - ) - chunks = await self.get_chat_completions(params) - - return chunks - @traced_llm - async def _process_context(self, context: OpenAILLMContext | LLMContext): + async def _process_context(self, context: LLMContext): functions_list = [] arguments_list = [] tool_id_list = [] @@ -452,12 +398,8 @@ class BaseOpenAILLMService(LLMService): await self.start_ttfb_metrics() - # Generate chat completions using either OpenAILLMContext or universal LLMContext - chunk_stream = await ( - self._stream_chat_completions_specific_context(context) - if isinstance(context, OpenAILLMContext) - else self._stream_chat_completions_universal_context(context) - ) + # Generate chat completions from LLMContext + chunk_stream = await self.get_chat_completions(context) # Ensure stream and its async iterator are closed on cancellation/exception # to prevent socket leaks and uvloop crashes. Closing the iterator first @@ -582,9 +524,7 @@ class BaseOpenAILLMService(LLMService): async def process_frame(self, frame: Frame, direction: FrameDirection): """Process frames for LLM completion requests. - Handles OpenAILLMContextFrame, LLMContextFrame, LLMMessagesFrame, - and LLMUpdateSettingsFrame to trigger LLM completions and manage - settings. + Handles LLMContextFrame to trigger LLM completions. Args: frame: The frame to process. @@ -593,16 +533,8 @@ class BaseOpenAILLMService(LLMService): await super().process_frame(frame, direction) context = None - if isinstance(frame, OpenAILLMContextFrame): - # Handle OpenAI-specific context frames + if isinstance(frame, LLMContextFrame): context = frame.context - elif isinstance(frame, LLMContextFrame): - # Handle universal (LLM-agnostic) LLM context frames - context = frame.context - elif isinstance(frame, LLMMessagesFrame): - # NOTE: LLMMessagesFrame is deprecated, so we don't support the newer universal - # LLMContext with it - context = OpenAILLMContext.from_messages(frame.messages) else: await self.push_frame(frame, direction) diff --git a/src/pipecat/services/openai/llm.py b/src/pipecat/services/openai/llm.py index 553733922..24c2134fc 100644 --- a/src/pipecat/services/openai/llm.py +++ b/src/pipecat/services/openai/llm.py @@ -18,51 +18,9 @@ from pipecat.frames.frames import ( FunctionCallResultFrame, UserImageRawFrame, ) -from pipecat.processors.aggregators.llm_response import ( - LLMAssistantAggregatorParams, - LLMAssistantContextAggregator, - LLMUserAggregatorParams, - LLMUserContextAggregator, -) -from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.services.openai.base_llm import BaseOpenAILLMService -@dataclass -class OpenAIContextAggregatorPair: - """Pair of OpenAI context aggregators for user and assistant messages. - - .. deprecated:: 0.0.99 - `OpenAIContextAggregatorPair` is deprecated and will be removed in a future version. - Use the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. - - Parameters: - _user: User context aggregator for processing user messages. - _assistant: Assistant context aggregator for processing assistant messages. - """ - - # Aggregators handle deprecation warnings - _user: "OpenAIUserContextAggregator" - _assistant: "OpenAIAssistantContextAggregator" - - def user(self) -> "OpenAIUserContextAggregator": - """Get the user context aggregator. - - Returns: - The user context aggregator instance. - """ - return self._user - - def assistant(self) -> "OpenAIAssistantContextAggregator": - """Get the assistant context aggregator. - - Returns: - The assistant context aggregator instance. - """ - return self._assistant - - class OpenAILLMService(BaseOpenAILLMService): """OpenAI LLM service implementation. @@ -145,161 +103,3 @@ class OpenAILLMService(BaseOpenAILLMService): default_settings.apply_update(settings) super().__init__(service_tier=service_tier, settings=default_settings, **kwargs) - - def create_context_aggregator( - self, - context: OpenAILLMContext, - *, - user_params: LLMUserAggregatorParams = LLMUserAggregatorParams(), - assistant_params: LLMAssistantAggregatorParams = LLMAssistantAggregatorParams(), - ) -> OpenAIContextAggregatorPair: - """Create OpenAI-specific context aggregators. - - Creates a pair of context aggregators optimized for OpenAI's message format, - including support for function calls, tool usage, and image handling. - - Args: - context: The LLM context to create aggregators for. - user_params: Parameters for user message aggregation. - assistant_params: Parameters for assistant message aggregation. - - Returns: - OpenAIContextAggregatorPair: A pair of context aggregators, one for - the user and one for the assistant, encapsulated in an - OpenAIContextAggregatorPair. - - .. deprecated:: 0.0.99 - `create_context_aggregator()` is deprecated and will be removed in a future version. - Use the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. - """ - context.set_llm_adapter(self.get_llm_adapter()) - - # Aggregators handle deprecation warnings - user = OpenAIUserContextAggregator(context, params=user_params) - assistant = OpenAIAssistantContextAggregator(context, params=assistant_params) - - return OpenAIContextAggregatorPair(_user=user, _assistant=assistant) - - -class OpenAIUserContextAggregator(LLMUserContextAggregator): - """OpenAI-specific user context aggregator. - - Handles aggregation of user messages for OpenAI LLM services. - Inherits all functionality from the base LLMUserContextAggregator. - - .. deprecated:: 0.0.99 - `OpenAIUserContextAggregator` is deprecated and will be removed in a future version. - Use the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. - """ - - # Super handles deprecation warning - pass - - -class OpenAIAssistantContextAggregator(LLMAssistantContextAggregator): - """OpenAI-specific assistant context aggregator. - - Handles aggregation of assistant messages for OpenAI LLM services, - with specialized support for OpenAI's function calling format, - tool usage tracking, and image message handling. - - .. deprecated:: 0.0.99 - `OpenAIAssistantContextAggregator` is deprecated and will be removed in a future version. - Use the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. - """ - - # Super handles deprecation warning - - async def handle_function_call_in_progress(self, frame: FunctionCallInProgressFrame): - """Handle a function call in progress. - - Adds the function call to the context with an IN_PROGRESS status - to track ongoing function execution. - - Args: - frame: Frame containing function call progress information. - """ - self._context.add_message( - { - "role": "assistant", - "tool_calls": [ - { - "id": frame.tool_call_id, - "function": { - "name": frame.function_name, - "arguments": json.dumps(frame.arguments), - }, - "type": "function", - } - ], - } - ) - self._context.add_message( - { - "role": "tool", - "content": "IN_PROGRESS", - "tool_call_id": frame.tool_call_id, - } - ) - - async def handle_function_call_result(self, frame: FunctionCallResultFrame): - """Handle the result of a function call. - - Updates the context with the function call result, replacing any - previous IN_PROGRESS status. - - Args: - frame: Frame containing the function call result. - """ - if frame.result: - result = json.dumps(frame.result, ensure_ascii=False) - await self._update_function_call_result(frame.function_name, frame.tool_call_id, result) - else: - await self._update_function_call_result( - frame.function_name, frame.tool_call_id, "COMPLETED" - ) - - async def handle_function_call_cancel(self, frame: FunctionCallCancelFrame): - """Handle a cancelled function call. - - Updates the context to mark the function call as cancelled. - - Args: - frame: Frame containing the function call cancellation information. - """ - await self._update_function_call_result( - frame.function_name, frame.tool_call_id, "CANCELLED" - ) - - async def _update_function_call_result( - self, function_name: str, tool_call_id: str, result: Any - ): - for message in self._context.messages: - if ( - message["role"] == "tool" - and message["tool_call_id"] - and message["tool_call_id"] == tool_call_id - ): - message["content"] = result - - async def handle_user_image_frame(self, frame: UserImageRawFrame): - """Handle a user image frame from a function call request. - - Marks the associated function call as completed and adds the image - to the context for processing. - - Args: - frame: Frame containing the user image and request context. - """ - await self._update_function_call_result( - frame.request.function_name, frame.request.tool_call_id, "COMPLETED" - ) - self._context.add_image_frame_message( - format=frame.format, - size=frame.size, - image=frame.image, - text=frame.request.context, - ) diff --git a/src/pipecat/services/openai/realtime/context.py b/src/pipecat/services/openai/realtime/context.py deleted file mode 100644 index 7870cc519..000000000 --- a/src/pipecat/services/openai/realtime/context.py +++ /dev/null @@ -1,368 +0,0 @@ -# -# Copyright (c) 2024-2026, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - -"""OpenAI Realtime LLM context and aggregator implementations. - -.. deprecated:: 0.0.92 - OpenAI Realtime no longer uses types from this module under the hood. - It now uses ``LLMContext`` and ``LLMContextAggregatorPair``. - Using the new patterns should allow you to not need types from this module. - - BEFORE:: - - # Setup - context = OpenAILLMContext(messages, tools) - context_aggregator = llm.create_context_aggregator(context) - - # Context aggregator type - context_aggregator: OpenAIContextAggregatorPair - - # Context frame type - frame: OpenAILLMContextFrame - - # Context type - context: OpenAIRealtimeLLMContext - # or - context: OpenAILLMContext - - AFTER:: - - # Setup - context = LLMContext(messages, tools) - context_aggregator = LLMContextAggregatorPair(context) - - # Context aggregator type - context_aggregator: LLMContextAggregatorPair - - # Context frame type - frame: LLMContextFrame - - # Context type - context: LLMContext -""" - -import warnings - -with warnings.catch_warnings(): - warnings.simplefilter("always") - warnings.warn( - "Types in pipecat.services.openai.realtime.llm (or " - "pipecat.services.openai_realtime.llm) are deprecated. \n" - "OpenAI Realtime no longer uses types from this module under the hood. \n" - "It now uses `LLMContext` and `LLMContextAggregatorPair`. \n" - "Using the new patterns should allow you to not need types from this module.\n\n" - "BEFORE:\n" - "```\n" - "# Setup\n" - "context = OpenAILLMContext(messages, tools)\n" - "context_aggregator = llm.create_context_aggregator(context)\n\n" - "# Context aggregator type\n" - "context_aggregator: OpenAIContextAggregatorPair\n\n" - "# Context frame type\n" - "frame: OpenAILLMContextFrame\n\n" - "# Context type\n" - "context: OpenAIRealtimeLLMContext\n" - "# or\n" - "context: OpenAILLMContext\n\n" - "```\n\n" - "AFTER:\n" - "```\n" - "# Setup\n" - "context = LLMContext(messages, tools)\n" - "context_aggregator = LLMContextAggregatorPair(context)\n\n" - "# Context aggregator type\n" - "context_aggregator: LLMContextAggregatorPair\n\n" - "# Context frame type\n" - "frame: LLMContextFrame\n\n" - "# Context type\n" - "context: LLMContext\n\n" - "```\n", - ) - -import copy -import json - -from loguru import logger - -from pipecat.frames.frames import ( - Frame, - FunctionCallResultFrame, - InterimTranscriptionFrame, - LLMMessagesUpdateFrame, - LLMSetToolsFrame, - LLMTextFrame, - TranscriptionFrame, -) -from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext -from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.openai.llm import ( - OpenAIAssistantContextAggregator, - OpenAIUserContextAggregator, -) - -from . import events -from .frames import RealtimeFunctionCallResultFrame, RealtimeMessagesUpdateFrame - - -class OpenAIRealtimeLLMContext(OpenAILLMContext): - """OpenAI Realtime LLM context with session management and message conversion. - - Extends the standard OpenAI LLM context to support real-time session properties, - instruction management, and conversion between standard message formats and - realtime conversation items. - - .. deprecated:: 0.0.99 - `OpenAIRealtimeLLMContext` is deprecated and will be removed in a future version. - Use the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. - """ - - def __init__(self, messages=None, tools=None, **kwargs): - """Initialize the OpenAIRealtimeLLMContext. - - Args: - messages: Initial conversation messages. Defaults to None. - tools: Available function tools. Defaults to None. - **kwargs: Additional arguments passed to parent OpenAILLMContext. - """ - # Super handles deprecation warning - super().__init__(messages=messages, tools=tools, **kwargs) - self.__setup_local() - - def __setup_local(self): - self.llm_needs_settings_update = True - self.llm_needs_initial_messages = True - self._session_instructions = "" - - return - - @staticmethod - def upgrade_to_realtime(obj: OpenAILLMContext) -> "OpenAIRealtimeLLMContext": - """Upgrade a standard OpenAI LLM context to a realtime context. - - Args: - obj: The OpenAILLMContext instance to upgrade. - - Returns: - The upgraded OpenAIRealtimeLLMContext instance. - """ - if isinstance(obj, OpenAILLMContext) and not isinstance(obj, OpenAIRealtimeLLMContext): - obj.__class__ = OpenAIRealtimeLLMContext - obj.__setup_local() - return obj - - # todo - # - finish implementing all frames - - def from_standard_message(self, message): - """Convert a standard message format to a realtime conversation item. - - Args: - message: The standard message dictionary to convert. - - Returns: - A ConversationItem instance for the realtime API. - """ - if message.get("role") == "user": - content = message.get("content") - if isinstance(message.get("content"), list): - content = "" - for c in message.get("content"): - if c.get("type") == "text": - content += " " + c.get("text") - else: - logger.error( - f"Unhandled content type in context message: {c.get('type')} - {message}" - ) - return events.ConversationItem( - role="user", - type="message", - content=[events.ItemContent(type="input_text", text=content)], - ) - if message.get("role") == "assistant" and message.get("tool_calls"): - tc = message.get("tool_calls")[0] - return events.ConversationItem( - type="function_call", - call_id=tc["id"], - name=tc["function"]["name"], - arguments=tc["function"]["arguments"], - ) - logger.error(f"Unhandled message type in from_standard_message: {message}") - - def get_messages_for_initializing_history(self): - """Get conversation items for initializing the realtime session history. - - Converts the context's messages to a format suitable for the realtime API, - handling system instructions and conversation history packaging. - - Returns: - List of conversation items for session initialization. - """ - # We can't load a long conversation history into the openai realtime api yet. (The API/model - # forgets that it can do audio, if you do a series of `conversation.item.create` calls.) So - # our general strategy until this is fixed is just to put everything into a first "user" - # message as a single input. - if not self.messages: - return [] - - messages = copy.deepcopy(self.messages) - - # If we have a "system" message as our first message, let's pull that out into session - # "instructions" - if messages[0].get("role") == "system": - self.llm_needs_settings_update = True - system = messages.pop(0) - content = system.get("content") - if isinstance(content, str): - self._session_instructions = content - elif isinstance(content, list): - self._session_instructions = content[0].get("text") - if not messages: - return [] - - # If we have just a single "user" item, we can just send it normally - if len(messages) == 1 and messages[0].get("role") == "user": - return [self.from_standard_message(messages[0])] - - # Otherwise, let's pack everything into a single "user" message with a bit of - # explanation for the LLM - intro_text = """ - This is a previously saved conversation. Please treat this conversation history as a - starting point for the current conversation.""" - - trailing_text = """ - This is the end of the previously saved conversation. Please continue the conversation - from here. If the last message is a user instruction or question, act on that instruction - or answer the question. If the last message is an assistant response, simple say that you - are ready to continue the conversation.""" - - return [ - { - "role": "user", - "type": "message", - "content": [ - { - "type": "input_text", - "text": "\n\n".join( - [intro_text, json.dumps(messages, indent=2), trailing_text] - ), - } - ], - } - ] - - def add_user_content_item_as_message(self, item): - """Add a user content item as a standard message to the context. - - Args: - item: The conversation item to add as a user message. - """ - message = { - "role": "user", - "content": [{"type": "text", "text": item.content[0].transcript}], - } - self.add_message(message) - - -class OpenAIRealtimeUserContextAggregator(OpenAIUserContextAggregator): - """User context aggregator for OpenAI Realtime API. - - Handles user input frames and generates appropriate context updates - for the realtime conversation, including message updates and tool settings. - - .. deprecated:: 0.0.99 - `OpenAIRealtimeUserContextAggregator` is deprecated and will be removed in a future version. - Use the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. - - Args: - context: The OpenAI realtime LLM context. - **kwargs: Additional arguments passed to parent aggregator. - """ - - # Super handles deprecation warning - - async def process_frame( - self, frame: Frame, direction: FrameDirection = FrameDirection.DOWNSTREAM - ): - """Process incoming frames and handle realtime-specific frame types. - - Args: - frame: The frame to process. - direction: The direction of frame flow in the pipeline. - """ - await super().process_frame(frame, direction) - # Parent does not push LLMMessagesUpdateFrame. This ensures that in a typical pipeline, - # messages are only processed by the user context aggregator, which is generally what we want. But - # we also need to send new messages over the websocket, so the openai realtime API has them - # in its context. - if isinstance(frame, LLMMessagesUpdateFrame): - await self.push_frame(RealtimeMessagesUpdateFrame(context=self._context)) - - # Parent also doesn't push the LLMSetToolsFrame. - if isinstance(frame, LLMSetToolsFrame): - await self.push_frame(frame, direction) - - async def push_aggregation(self): - """Push user input aggregation. - - Currently ignores all user input coming into the pipeline as realtime - audio input is handled directly by the service. - """ - # for the moment, ignore all user input coming into the pipeline. - # todo: think about whether/how to fix this to allow for text input from - # upstream (transport/transcription, or other sources) - pass - - -class OpenAIRealtimeAssistantContextAggregator(OpenAIAssistantContextAggregator): - """Assistant context aggregator for OpenAI Realtime API. - - Handles assistant output frames from the realtime service, filtering - out duplicate text frames and managing function call results. - - .. deprecated:: 0.0.99 - `OpenAIRealtimeAssistantContextAggregator` is deprecated and will be removed in a future version. - Use the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. - - Args: - context: The OpenAI realtime LLM context. - **kwargs: Additional arguments passed to parent aggregator. - """ - - # Super handles deprecation warning - - # The LLMAssistantContextAggregator uses TextFrames to aggregate the LLM output, - # but the OpenAIRealtimeLLMService pushes LLMTextFrames and TTSTextFrames. We - # need to override this proces_frame for LLMTextFrame, so that only the TTSTextFrames - # are process. This ensures that the context gets only one set of messages. - # OpenAIRealtimeLLMService also pushes TranscriptionFrames and InterimTranscriptionFrames, - # so we need to ignore pushing those as well, as they're also TextFrames. - async def process_frame(self, frame: Frame, direction: FrameDirection): - """Process assistant frames, filtering out duplicate text content. - - Args: - frame: The frame to process. - direction: The direction of frame flow in the pipeline. - """ - if not isinstance(frame, (LLMTextFrame, TranscriptionFrame, InterimTranscriptionFrame)): - await super().process_frame(frame, direction) - - async def handle_function_call_result(self, frame: FunctionCallResultFrame): - """Handle function call result and notify the realtime service. - - Args: - frame: The function call result frame to handle. - """ - await super().handle_function_call_result(frame) - - # The standard function callback code path pushes the FunctionCallResultFrame from the llm itself, - # so we didn't have a chance to add the result to the openai realtime api context. Let's push a - # special frame to do that. - await self.push_frame( - RealtimeFunctionCallResultFrame(result_frame=frame), FrameDirection.UPSTREAM - ) diff --git a/src/pipecat/services/openai/realtime/frames.py b/src/pipecat/services/openai/realtime/frames.py deleted file mode 100644 index 0d3eef974..000000000 --- a/src/pipecat/services/openai/realtime/frames.py +++ /dev/null @@ -1,58 +0,0 @@ -# -# Copyright (c) 2024-2026, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - -"""Custom frame types for OpenAI Realtime API integration. - -.. deprecated:: 0.0.92 - OpenAI Realtime no longer uses types from this module under the hood. - - It now works more like most LLM services in Pipecat, relying on updates to - its context, pushed by context aggregators, to update its internal state. - - Listen for ``LLMContextFrame`` s for context updates. -""" - -import warnings - -with warnings.catch_warnings(): - warnings.simplefilter("always") - warnings.warn( - "Types in pipecat.services.openai.realtime.frames are deprecated. \n" - "OpenAI Realtime no longer uses types from this module under the hood. \n\n" - "It now works more like other LLM services in Pipecat, relying on updates to \n" - "its context, pushed by context aggregators, to update its internal state.\n\n" - "Listen for `LLMContextFrame`s for context updates.\n" - ) - -from dataclasses import dataclass -from typing import TYPE_CHECKING - -from pipecat.frames.frames import DataFrame, FunctionCallResultFrame - -if TYPE_CHECKING: - from pipecat.services.openai.realtime.context import OpenAIRealtimeLLMContext - - -@dataclass -class RealtimeMessagesUpdateFrame(DataFrame): - """Frame indicating that the realtime context messages have been updated. - - Parameters: - context: The updated OpenAI realtime LLM context. - """ - - context: "OpenAIRealtimeLLMContext" - - -@dataclass -class RealtimeFunctionCallResultFrame(DataFrame): - """Frame containing function call results for the realtime service. - - Parameters: - result_frame: The function call result frame to send to the realtime API. - """ - - result_frame: FunctionCallResultFrame diff --git a/src/pipecat/services/openai/realtime/llm.py b/src/pipecat/services/openai/realtime/llm.py index 293ee0d87..a1e593ecd 100644 --- a/src/pipecat/services/openai/realtime/llm.py +++ b/src/pipecat/services/openai/realtime/llm.py @@ -48,15 +48,7 @@ from pipecat.frames.frames import ( ) from pipecat.metrics.metrics import LLMTokenUsage from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.llm_response import ( - LLMAssistantAggregatorParams, - LLMUserAggregatorParams, -) from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair -from pipecat.processors.aggregators.openai_llm_context import ( - OpenAILLMContext, - OpenAILLMContextFrame, -) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.llm_service import FunctionCallFromLLM, LLMService from pipecat.services.settings import ( @@ -564,13 +556,8 @@ class OpenAIRealtimeLLMService(LLMService): if isinstance(frame, TranscriptionFrame): pass - elif isinstance(frame, (LLMContextFrame, OpenAILLMContextFrame)): - context = ( - frame.context - if isinstance(frame, LLMContextFrame) - else LLMContext.from_openai_context(frame.context) - ) - await self._handle_context(context) + elif isinstance(frame, LLMContextFrame): + await self._handle_context(frame.context) elif isinstance(frame, InputAudioRawFrame): if not self._audio_input_paused: await self._send_user_audio(frame) @@ -1133,74 +1120,3 @@ class OpenAIRealtimeLLMService(LLMService): output=json.dumps(result, ensure_ascii=False), ) await self.send_client_event(events.ConversationItemCreateEvent(item=item)) - - def create_context_aggregator( - self, - context: OpenAILLMContext, - *, - user_params: LLMUserAggregatorParams = LLMUserAggregatorParams(), - assistant_params: LLMAssistantAggregatorParams = LLMAssistantAggregatorParams(), - ) -> LLMContextAggregatorPair: - """Create an instance of OpenAIContextAggregatorPair from an OpenAILLMContext. - - NOTE: this method exists only for backward compatibility. New code - should instead do:: - - context = LLMContext(...) - context_aggregator = LLMContextAggregatorPair(context) - - Constructor keyword arguments for both the user and assistant aggregators can be provided. - - Args: - context: The LLM context. - user_params: User aggregator parameters. - assistant_params: Assistant aggregator parameters. - - Returns: - OpenAIContextAggregatorPair: A pair of context aggregators, one for - the user and one for the assistant, encapsulated in an - OpenAIContextAggregatorPair. - - .. deprecated:: 0.0.99 - `create_context_aggregator()` is deprecated and will be removed in a future version. - Use the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. - """ - # Log warning about transcription frame direction change in 0.0.92. - # We're putting this warning here rather than in the constructor so - # that it shows up for folks who haven't updated their code at all - # since 0.0.92, gives them a way to acknowledge and dismiss the - # warning, and encourages adoption of a new preferred pattern. - logger.warning( - "As of version 0.0.92, TranscriptionFrames and InterimTranscriptionFrames " - "now go upstream from OpenAIRealtimeLLMService, so if you're using " - "TranscriptProcessor, say, you'll want to adjust accordingly:\n\n" - "pipeline = Pipeline(\n" - " [\n" - " transport.input(),\n" - " context_aggregator.user(),\n\n" - " # BEFORE\n" - " llm,\n" - " transcript.user(),\n\n" - " # AFTER\n" - " transcript.user(),\n" - " llm,\n\n" - " transport.output(),\n" - " transcript.assistant(),\n" - " context_aggregator.assistant(),\n" - " ]\n" - ")\n\n" - "Also, LLMTextFrames are no longer pushed from " - "OpenAIRealtimeLLMService when it's configured with " - "output_modalities=['audio']. Listen for TTSTextFrames instead.\n\n" - "Once you've made the appropriate changes (if needed), you can " - "dismiss this warning by updating to the new context-setup pattern:\n\n" - " context = LLMContext(messages, tools)\n" - " context_aggregator = LLMContextAggregatorPair(context)\n" - ) - # from_openai_context handles deprecation warning already - context = LLMContext.from_openai_context(context) - assistant_params.expect_stripped_words = False - return LLMContextAggregatorPair( - context, user_params=user_params, assistant_params=assistant_params - ) diff --git a/src/pipecat/services/openai_realtime/context.py b/src/pipecat/services/openai_realtime/context.py deleted file mode 100644 index 75dd6c216..000000000 --- a/src/pipecat/services/openai_realtime/context.py +++ /dev/null @@ -1,18 +0,0 @@ -# -# Copyright (c) 2024-2026, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - -"""OpenAI Realtime LLM context and aggregator implementations. - -.. deprecated:: 0.0.91 - OpenAI Realtime no longer uses types from this module under the hood. - It now uses `LLMContext` and `LLMContextAggregatorPair`. - Using the new patterns should allow you to not need types from this module. - - See deprecation warning in pipecat.services.openai.realtime.context for - more details. -""" - -from pipecat.services.openai.realtime.context import * diff --git a/src/pipecat/services/openai_realtime/frames.py b/src/pipecat/services/openai_realtime/frames.py deleted file mode 100644 index c2db1eb70..000000000 --- a/src/pipecat/services/openai_realtime/frames.py +++ /dev/null @@ -1,21 +0,0 @@ -# -# Copyright (c) 2024-2026, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - -"""Custom frame types for OpenAI Realtime API integration.""" - -import warnings - -from pipecat.services.openai.realtime.frames import * - -with warnings.catch_warnings(): - warnings.simplefilter("always") - warnings.warn( - "Types in pipecat.services.openai_realtime.frames are deprecated. " - "Please use the equivalent types from " - "pipecat.services.openai.realtime.frames instead.", - DeprecationWarning, - stacklevel=2, - ) diff --git a/src/pipecat/services/openai_realtime_beta/__init__.py b/src/pipecat/services/openai_realtime_beta/__init__.py deleted file mode 100644 index b7c976bb6..000000000 --- a/src/pipecat/services/openai_realtime_beta/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -from .azure import AzureRealtimeBetaLLMService -from .events import ( - InputAudioNoiseReduction, - InputAudioTranscription, - SemanticTurnDetection, - SessionProperties, - TurnDetection, -) -from .openai import OpenAIRealtimeBetaLLMService - -__all__ = [ - "AzureRealtimeBetaLLMService", - "InputAudioNoiseReduction", - "InputAudioTranscription", - "SemanticTurnDetection", - "SessionProperties", - "TurnDetection", - "OpenAIRealtimeBetaLLMService", -] diff --git a/src/pipecat/services/openai_realtime_beta/azure.py b/src/pipecat/services/openai_realtime_beta/azure.py deleted file mode 100644 index b590f2c29..000000000 --- a/src/pipecat/services/openai_realtime_beta/azure.py +++ /dev/null @@ -1,94 +0,0 @@ -# -# Copyright (c) 2024-2026, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - -"""Azure OpenAI Realtime Beta LLM service implementation.""" - -import warnings -from dataclasses import dataclass - -from loguru import logger - -from .openai import OpenAIRealtimeBetaLLMService - -try: - from websockets.asyncio.client import connect as websocket_connect -except ModuleNotFoundError as e: - logger.error(f"Exception: {e}") - logger.error( - "In order to use OpenAI, you need to `pip install pipecat-ai[openai]`. Also, set `OPENAI_API_KEY` environment variable." - ) - raise Exception(f"Missing module: {e}") - - -@dataclass -class AzureRealtimeBetaLLMSettings(OpenAIRealtimeBetaLLMService.Settings): - """Settings for AzureRealtimeBetaLLMService.""" - - pass - - -class AzureRealtimeBetaLLMService(OpenAIRealtimeBetaLLMService): - """Azure OpenAI Realtime Beta LLM service with Azure-specific authentication. - - .. deprecated:: 0.0.84 - `AzureRealtimeBetaLLMService` is deprecated, use `AzureRealtimeLLMService` instead. - This class will be removed in version 1.0.0. - - Extends the OpenAI Realtime service to work with Azure OpenAI endpoints, - using Azure's authentication headers and endpoint format. Provides the same - real-time audio and text communication capabilities as the base OpenAI service. - """ - - Settings = AzureRealtimeBetaLLMSettings - _settings: Settings - - def __init__( - self, - *, - api_key: str, - base_url: str, - **kwargs, - ): - """Initialize Azure Realtime Beta LLM service. - - Args: - api_key: The API key for the Azure OpenAI service. - base_url: The full Azure WebSocket endpoint URL including api-version and deployment. - Example: "wss://my-project.openai.azure.com/openai/realtime?api-version=2024-10-01-preview&deployment=my-realtime-deployment" - **kwargs: Additional arguments passed to parent OpenAIRealtimeBetaLLMService. - """ - super().__init__(base_url=base_url, api_key=api_key, **kwargs) - - with warnings.catch_warnings(): - warnings.simplefilter("always") - warnings.warn( - "AzureRealtimeBetaLLMService is deprecated and will be removed in version 1.0.0. " - "Use AzureRealtimeLLMService instead.", - DeprecationWarning, - stacklevel=2, - ) - - self.api_key = api_key - self.base_url = base_url - - async def _connect(self): - try: - if self._websocket: - # Here we assume that if we have a websocket, we are connected. We - # handle disconnections in the send/recv code paths. - return - - logger.info(f"Connecting to {self.base_url}") - self._websocket = await websocket_connect( - uri=self.base_url, - additional_headers={ - "api-key": self.api_key, - }, - ) - self._receive_task = self.create_task(self._receive_task_handler()) - except Exception as e: - await self.push_error(error_msg=f"Error connecting: {e}", exception=e) - self._websocket = None diff --git a/src/pipecat/services/openai_realtime_beta/context.py b/src/pipecat/services/openai_realtime_beta/context.py deleted file mode 100644 index a1bbd6c92..000000000 --- a/src/pipecat/services/openai_realtime_beta/context.py +++ /dev/null @@ -1,272 +0,0 @@ -# -# Copyright (c) 2024-2026, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - -"""OpenAI Realtime LLM context and aggregator implementations.""" - -import copy -import json - -from loguru import logger - -from pipecat.frames.frames import ( - Frame, - FunctionCallResultFrame, - InterimTranscriptionFrame, - LLMMessagesUpdateFrame, - LLMSetToolsFrame, - LLMTextFrame, - TranscriptionFrame, -) -from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext -from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.openai.llm import ( - OpenAIAssistantContextAggregator, - OpenAIUserContextAggregator, -) - -from . import events -from .frames import RealtimeFunctionCallResultFrame, RealtimeMessagesUpdateFrame - - -class OpenAIRealtimeLLMContext(OpenAILLMContext): - """OpenAI Realtime LLM context with session management and message conversion. - - Extends the standard OpenAI LLM context to support real-time session properties, - instruction management, and conversion between standard message formats and - realtime conversation items. - """ - - def __init__(self, messages=None, tools=None, **kwargs): - """Initialize the OpenAIRealtimeLLMContext. - - Args: - messages: Initial conversation messages. Defaults to None. - tools: Available function tools. Defaults to None. - **kwargs: Additional arguments passed to parent OpenAILLMContext. - """ - super().__init__(messages=messages, tools=tools, **kwargs) - self.__setup_local() - - def __setup_local(self): - self.llm_needs_settings_update = True - self.llm_needs_initial_messages = True - self._session_instructions = "" - - return - - @staticmethod - def upgrade_to_realtime(obj: OpenAILLMContext) -> "OpenAIRealtimeLLMContext": - """Upgrade a standard OpenAI LLM context to a realtime context. - - Args: - obj: The OpenAILLMContext instance to upgrade. - - Returns: - The upgraded OpenAIRealtimeLLMContext instance. - """ - if isinstance(obj, OpenAILLMContext) and not isinstance(obj, OpenAIRealtimeLLMContext): - obj.__class__ = OpenAIRealtimeLLMContext - obj.__setup_local() - return obj - - # todo - # - finish implementing all frames - - def from_standard_message(self, message): - """Convert a standard message format to a realtime conversation item. - - Args: - message: The standard message dictionary to convert. - - Returns: - A ConversationItem instance for the realtime API. - """ - if message.get("role") == "user": - content = message.get("content") - if isinstance(message.get("content"), list): - content = "" - for c in message.get("content"): - if c.get("type") == "text": - content += " " + c.get("text") - else: - logger.error( - f"Unhandled content type in context message: {c.get('type')} - {message}" - ) - return events.ConversationItem( - role="user", - type="message", - content=[events.ItemContent(type="input_text", text=content)], - ) - if message.get("role") == "assistant" and message.get("tool_calls"): - tc = message.get("tool_calls")[0] - return events.ConversationItem( - type="function_call", - call_id=tc["id"], - name=tc["function"]["name"], - arguments=tc["function"]["arguments"], - ) - logger.error(f"Unhandled message type in from_standard_message: {message}") - - def get_messages_for_initializing_history(self): - """Get conversation items for initializing the realtime session history. - - Converts the context's messages to a format suitable for the realtime API, - handling system instructions and conversation history packaging. - - Returns: - List of conversation items for session initialization. - """ - # We can't load a long conversation history into the openai realtime api yet. (The API/model - # forgets that it can do audio, if you do a series of `conversation.item.create` calls.) So - # our general strategy until this is fixed is just to put everything into a first "user" - # message as a single input. - if not self.messages: - return [] - - messages = copy.deepcopy(self.messages) - - # If we have a "system" message as our first message, let's pull that out into session - # "instructions" - if messages[0].get("role") == "system": - self.llm_needs_settings_update = True - system = messages.pop(0) - content = system.get("content") - if isinstance(content, str): - self._session_instructions = content - elif isinstance(content, list): - self._session_instructions = content[0].get("text") - if not messages: - return [] - - # If we have just a single "user" item, we can just send it normally - if len(messages) == 1 and messages[0].get("role") == "user": - return [self.from_standard_message(messages[0])] - - # Otherwise, let's pack everything into a single "user" message with a bit of - # explanation for the LLM - intro_text = """ - This is a previously saved conversation. Please treat this conversation history as a - starting point for the current conversation.""" - - trailing_text = """ - This is the end of the previously saved conversation. Please continue the conversation - from here. If the last message is a user instruction or question, act on that instruction - or answer the question. If the last message is an assistant response, simple say that you - are ready to continue the conversation.""" - - return [ - { - "role": "user", - "type": "message", - "content": [ - { - "type": "input_text", - "text": "\n\n".join( - [intro_text, json.dumps(messages, indent=2), trailing_text] - ), - } - ], - } - ] - - def add_user_content_item_as_message(self, item): - """Add a user content item as a standard message to the context. - - Args: - item: The conversation item to add as a user message. - """ - message = { - "role": "user", - "content": [{"type": "text", "text": item.content[0].transcript}], - } - self.add_message(message) - - -class OpenAIRealtimeUserContextAggregator(OpenAIUserContextAggregator): - """User context aggregator for OpenAI Realtime API. - - Handles user input frames and generates appropriate context updates - for the realtime conversation, including message updates and tool settings. - - Args: - context: The OpenAI realtime LLM context. - **kwargs: Additional arguments passed to parent aggregator. - """ - - async def process_frame( - self, frame: Frame, direction: FrameDirection = FrameDirection.DOWNSTREAM - ): - """Process incoming frames and handle realtime-specific frame types. - - Args: - frame: The frame to process. - direction: The direction of frame flow in the pipeline. - """ - await super().process_frame(frame, direction) - # Parent does not push LLMMessagesUpdateFrame. This ensures that in a typical pipeline, - # messages are only processed by the user context aggregator, which is generally what we want. But - # we also need to send new messages over the websocket, so the openai realtime API has them - # in its context. - if isinstance(frame, LLMMessagesUpdateFrame): - await self.push_frame(RealtimeMessagesUpdateFrame(context=self._context)) - - # Parent also doesn't push the LLMSetToolsFrame. - if isinstance(frame, LLMSetToolsFrame): - await self.push_frame(frame, direction) - - async def push_aggregation(self): - """Push user input aggregation. - - Currently ignores all user input coming into the pipeline as realtime - audio input is handled directly by the service. - """ - # for the moment, ignore all user input coming into the pipeline. - # todo: think about whether/how to fix this to allow for text input from - # upstream (transport/transcription, or other sources) - pass - - -class OpenAIRealtimeAssistantContextAggregator(OpenAIAssistantContextAggregator): - """Assistant context aggregator for OpenAI Realtime API. - - Handles assistant output frames from the realtime service, filtering - out duplicate text frames and managing function call results. - - Args: - context: The OpenAI realtime LLM context. - **kwargs: Additional arguments passed to parent aggregator. - """ - - # The LLMAssistantContextAggregator uses TextFrames to aggregate the LLM output, - # but the OpenAIRealtimeLLMService pushes LLMTextFrames and TTSTextFrames. We - # need to override this proces_frame for LLMTextFrame, so that only the TTSTextFrames - # are process. This ensures that the context gets only one set of messages. - # OpenAIRealtimeLLMService also pushes TranscriptionFrames and InterimTranscriptionFrames, - # so we need to ignore pushing those as well, as they're also TextFrames. - async def process_frame(self, frame: Frame, direction: FrameDirection): - """Process assistant frames, filtering out duplicate text content. - - Args: - frame: The frame to process. - direction: The direction of frame flow in the pipeline. - """ - if not isinstance(frame, (LLMTextFrame, TranscriptionFrame, InterimTranscriptionFrame)): - await super().process_frame(frame, direction) - - async def handle_function_call_result(self, frame: FunctionCallResultFrame): - """Handle function call result and notify the realtime service. - - Args: - frame: The function call result frame to handle. - """ - await super().handle_function_call_result(frame) - - # The standard function callback code path pushes the FunctionCallResultFrame from the llm itself, - # so we didn't have a chance to add the result to the openai realtime api context. Let's push a - # special frame to do that. - await self.push_frame( - RealtimeFunctionCallResultFrame(result_frame=frame), FrameDirection.UPSTREAM - ) diff --git a/src/pipecat/services/openai_realtime_beta/events.py b/src/pipecat/services/openai_realtime_beta/events.py deleted file mode 100644 index 596a11a3b..000000000 --- a/src/pipecat/services/openai_realtime_beta/events.py +++ /dev/null @@ -1,978 +0,0 @@ -# -# Copyright (c) 2024-2026, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - -"""Event models and data structures for OpenAI Realtime API communication.""" - -import json -import uuid -from typing import Any, Dict, List, Literal, Optional, Union - -from pydantic import BaseModel, ConfigDict, Field - -# -# session properties -# - - -class InputAudioTranscription(BaseModel): - """Configuration for audio transcription settings.""" - - model: str = "gpt-4o-transcribe" - language: Optional[str] - prompt: Optional[str] - - def __init__( - self, - model: Optional[str] = "gpt-4o-transcribe", - language: Optional[str] = None, - prompt: Optional[str] = None, - ): - """Initialize InputAudioTranscription. - - Args: - model: Transcription model to use (e.g., "gpt-4o-transcribe", "whisper-1"). - language: Optional language code for transcription. - prompt: Optional transcription hint text. - """ - super().__init__(model=model, language=language, prompt=prompt) - - -class TurnDetection(BaseModel): - """Server-side voice activity detection configuration. - - Parameters: - type: Detection type, must be "server_vad". - threshold: Voice activity detection threshold (0.0-1.0). Defaults to 0.5. - prefix_padding_ms: Padding before speech starts in milliseconds. Defaults to 300. - silence_duration_ms: Silence duration to detect speech end in milliseconds. Defaults to 800. - """ - - type: Optional[Literal["server_vad"]] = "server_vad" - threshold: Optional[float] = 0.5 - prefix_padding_ms: Optional[int] = 300 - silence_duration_ms: Optional[int] = 800 - - -class SemanticTurnDetection(BaseModel): - """Semantic-based turn detection configuration. - - Parameters: - type: Detection type, must be "semantic_vad". - eagerness: Turn detection eagerness level. Can be "low", "medium", "high", or "auto". - create_response: Whether to automatically create responses on turn detection. - interrupt_response: Whether to interrupt ongoing responses on turn detection. - """ - - type: Optional[Literal["semantic_vad"]] = "semantic_vad" - eagerness: Optional[Literal["low", "medium", "high", "auto"]] = None - create_response: Optional[bool] = None - interrupt_response: Optional[bool] = None - - -class InputAudioNoiseReduction(BaseModel): - """Input audio noise reduction configuration. - - Parameters: - type: Noise reduction type for different microphone scenarios. - """ - - type: Optional[Literal["near_field", "far_field"]] - - -class SessionProperties(BaseModel): - """Configuration properties for an OpenAI Realtime session. - - Parameters: - modalities: Communication modalities to enable (text, audio, or both). - instructions: System instructions for the assistant. - voice: Voice ID for text-to-speech output. - input_audio_format: Format for input audio data. - output_audio_format: Format for output audio data. - input_audio_transcription: Configuration for input audio transcription. - input_audio_noise_reduction: Configuration for input audio noise reduction. - turn_detection: Turn detection configuration or False to disable. - tools: Available function tools for the assistant. - tool_choice: Tool usage strategy ("auto", "none", or "required"). - temperature: Sampling temperature for response generation. - max_response_output_tokens: Maximum tokens in response or "inf" for unlimited. - """ - - modalities: Optional[List[Literal["text", "audio"]]] = None - instructions: Optional[str] = None - voice: Optional[str] = None - input_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None - output_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None - input_audio_transcription: Optional[InputAudioTranscription] = None - input_audio_noise_reduction: Optional[InputAudioNoiseReduction] = None - # set turn_detection to False to disable turn detection - turn_detection: Optional[Union[TurnDetection, SemanticTurnDetection, bool]] = Field( - default=None - ) - tools: Optional[List[Dict]] = None - tool_choice: Optional[Literal["auto", "none", "required"]] = None - temperature: Optional[float] = None - max_response_output_tokens: Optional[Union[int, Literal["inf"]]] = None - - -# -# context -# - - -class ItemContent(BaseModel): - """Content within a conversation item. - - Parameters: - type: Content type (text, audio, input_text, or input_audio). - text: Text content for text-based items. - audio: Base64-encoded audio data for audio items. - transcript: Transcribed text for audio items. - """ - - type: Literal["text", "audio", "input_text", "input_audio"] - text: Optional[str] = None - audio: Optional[str] = None # base64-encoded audio - transcript: Optional[str] = None - - -class ConversationItem(BaseModel): - """A conversation item in the realtime session. - - Parameters: - id: Unique identifier for the item, auto-generated if not provided. - object: Object type identifier for the realtime API. - type: Item type (message, function_call, or function_call_output). - status: Current status of the item. - role: Speaker role for message items (user, assistant, or system). - content: Content list for message items. - call_id: Function call identifier for function_call items. - name: Function name for function_call items. - arguments: Function arguments as JSON string for function_call items. - output: Function output as JSON string for function_call_output items. - """ - - id: str = Field(default_factory=lambda: str(uuid.uuid4().hex)) - object: Optional[Literal["realtime.item"]] = None - type: Literal["message", "function_call", "function_call_output"] - status: Optional[Literal["completed", "in_progress", "incomplete"]] = None - # role and content are present for message items - role: Optional[Literal["user", "assistant", "system"]] = None - content: Optional[List[ItemContent]] = None - # these four fields are present for function_call items - call_id: Optional[str] = None - name: Optional[str] = None - arguments: Optional[str] = None - output: Optional[str] = None - - -class RealtimeConversation(BaseModel): - """A realtime conversation session. - - Parameters: - id: Unique identifier for the conversation. - object: Object type identifier, always "realtime.conversation". - """ - - id: str - object: Literal["realtime.conversation"] - - -class ResponseProperties(BaseModel): - """Properties for configuring assistant responses. - - Parameters: - modalities: Output modalities for the response. Defaults to ["audio", "text"]. - instructions: Specific instructions for this response. - voice: Voice ID for text-to-speech in this response. - output_audio_format: Audio format for this response. - tools: Available tools for this response. - tool_choice: Tool usage strategy for this response. - temperature: Sampling temperature for this response. - max_response_output_tokens: Maximum tokens for this response. - """ - - modalities: Optional[List[Literal["text", "audio"]]] = ["audio", "text"] - instructions: Optional[str] = None - voice: Optional[str] = None - output_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None - tools: Optional[List[Dict]] = Field(default_factory=list) - tool_choice: Optional[Literal["auto", "none", "required"]] = None - temperature: Optional[float] = None - max_response_output_tokens: Optional[Union[int, Literal["inf"]]] = None - - -# -# error class -# -class RealtimeError(BaseModel): - """Error information from the realtime API. - - Parameters: - type: Error type identifier. - code: Specific error code. - message: Human-readable error message. - param: Parameter name that caused the error, if applicable. - event_id: Event ID associated with the error, if applicable. - """ - - type: str - code: Optional[str] = "" - message: str - param: Optional[str] = None - event_id: Optional[str] = None - - -# -# client events -# - - -class ClientEvent(BaseModel): - """Base class for client events sent to the realtime API. - - Parameters: - event_id: Unique identifier for the event, auto-generated if not provided. - """ - - event_id: str = Field(default_factory=lambda: str(uuid.uuid4())) - - -class SessionUpdateEvent(ClientEvent): - """Event to update session properties. - - Parameters: - type: Event type, always "session.update". - session: Updated session properties. - """ - - type: Literal["session.update"] = "session.update" - session: SessionProperties - - def model_dump(self, *args, **kwargs) -> Dict[str, Any]: - """Serialize the event to a dictionary. - - Handles special serialization for turn_detection where False becomes null. - - Args: - *args: Positional arguments passed to parent model_dump. - **kwargs: Keyword arguments passed to parent model_dump. - - Returns: - Dictionary representation of the event. - """ - dump = super().model_dump(*args, **kwargs) - - # Handle turn_detection so that False is serialized as null - if "turn_detection" in dump["session"]: - if dump["session"]["turn_detection"] is False: - dump["session"]["turn_detection"] = None - - return dump - - -class InputAudioBufferAppendEvent(ClientEvent): - """Event to append audio data to the input buffer. - - Parameters: - type: Event type, always "input_audio_buffer.append". - audio: Base64-encoded audio data to append. - """ - - type: Literal["input_audio_buffer.append"] = "input_audio_buffer.append" - audio: str # base64-encoded audio - - -class InputAudioBufferCommitEvent(ClientEvent): - """Event to commit the current input audio buffer. - - Parameters: - type: Event type, always "input_audio_buffer.commit". - """ - - type: Literal["input_audio_buffer.commit"] = "input_audio_buffer.commit" - - -class InputAudioBufferClearEvent(ClientEvent): - """Event to clear the input audio buffer. - - Parameters: - type: Event type, always "input_audio_buffer.clear". - """ - - type: Literal["input_audio_buffer.clear"] = "input_audio_buffer.clear" - - -class ConversationItemCreateEvent(ClientEvent): - """Event to create a new conversation item. - - Parameters: - type: Event type, always "conversation.item.create". - previous_item_id: ID of the item to insert after, if any. - item: The conversation item to create. - """ - - type: Literal["conversation.item.create"] = "conversation.item.create" - previous_item_id: Optional[str] = None - item: ConversationItem - - -class ConversationItemTruncateEvent(ClientEvent): - """Event to truncate a conversation item's audio content. - - Parameters: - type: Event type, always "conversation.item.truncate". - item_id: ID of the item to truncate. - content_index: Index of the content to truncate within the item. - audio_end_ms: End time in milliseconds for the truncated audio. - """ - - type: Literal["conversation.item.truncate"] = "conversation.item.truncate" - item_id: str - content_index: int - audio_end_ms: int - - -class ConversationItemDeleteEvent(ClientEvent): - """Event to delete a conversation item. - - Parameters: - type: Event type, always "conversation.item.delete". - item_id: ID of the item to delete. - """ - - type: Literal["conversation.item.delete"] = "conversation.item.delete" - item_id: str - - -class ConversationItemRetrieveEvent(ClientEvent): - """Event to retrieve a conversation item by ID. - - Parameters: - type: Event type, always "conversation.item.retrieve". - item_id: ID of the item to retrieve. - """ - - type: Literal["conversation.item.retrieve"] = "conversation.item.retrieve" - item_id: str - - -class ResponseCreateEvent(ClientEvent): - """Event to create a new assistant response. - - Parameters: - type: Event type, always "response.create". - response: Optional response configuration properties. - """ - - type: Literal["response.create"] = "response.create" - response: Optional[ResponseProperties] = None - - -class ResponseCancelEvent(ClientEvent): - """Event to cancel the current assistant response. - - Parameters: - type: Event type, always "response.cancel". - """ - - type: Literal["response.cancel"] = "response.cancel" - - -# -# server events -# - - -class ServerEvent(BaseModel): - """Base class for server events received from the realtime API. - - Parameters: - event_id: Unique identifier for the event. - type: Type of the server event. - """ - - model_config = ConfigDict(arbitrary_types_allowed=True) - - event_id: str - type: str - - -class SessionCreatedEvent(ServerEvent): - """Event indicating a session has been created. - - Parameters: - type: Event type, always "session.created". - session: The created session properties. - """ - - type: Literal["session.created"] - session: SessionProperties - - -class SessionUpdatedEvent(ServerEvent): - """Event indicating a session has been updated. - - Parameters: - type: Event type, always "session.updated". - session: The updated session properties. - """ - - type: Literal["session.updated"] - session: SessionProperties - - -class ConversationCreated(ServerEvent): - """Event indicating a conversation has been created. - - Parameters: - type: Event type, always "conversation.created". - conversation: The created conversation. - """ - - type: Literal["conversation.created"] - conversation: RealtimeConversation - - -class ConversationItemCreated(ServerEvent): - """Event indicating a conversation item has been created. - - Parameters: - type: Event type, always "conversation.item.created". - previous_item_id: ID of the previous item, if any. - item: The created conversation item. - """ - - type: Literal["conversation.item.created"] - previous_item_id: Optional[str] = None - item: ConversationItem - - -class ConversationItemInputAudioTranscriptionDelta(ServerEvent): - """Event containing incremental input audio transcription. - - Parameters: - type: Event type, always "conversation.item.input_audio_transcription.delta". - item_id: ID of the conversation item being transcribed. - content_index: Index of the content within the item. - delta: Incremental transcription text. - """ - - type: Literal["conversation.item.input_audio_transcription.delta"] - item_id: str - content_index: int - delta: str - - -class ConversationItemInputAudioTranscriptionCompleted(ServerEvent): - """Event indicating input audio transcription is complete. - - Parameters: - type: Event type, always "conversation.item.input_audio_transcription.completed". - item_id: ID of the conversation item that was transcribed. - content_index: Index of the content within the item. - transcript: Complete transcription text. - """ - - type: Literal["conversation.item.input_audio_transcription.completed"] - item_id: str - content_index: int - transcript: str - - -class ConversationItemInputAudioTranscriptionFailed(ServerEvent): - """Event indicating input audio transcription failed. - - Parameters: - type: Event type, always "conversation.item.input_audio_transcription.failed". - item_id: ID of the conversation item that failed transcription. - content_index: Index of the content within the item. - error: Error details for the transcription failure. - """ - - type: Literal["conversation.item.input_audio_transcription.failed"] - item_id: str - content_index: int - error: RealtimeError - - -class ConversationItemTruncated(ServerEvent): - """Event indicating a conversation item has been truncated. - - Parameters: - type: Event type, always "conversation.item.truncated". - item_id: ID of the truncated conversation item. - content_index: Index of the content within the item. - audio_end_ms: End time in milliseconds for the truncated audio. - """ - - type: Literal["conversation.item.truncated"] - item_id: str - content_index: int - audio_end_ms: int - - -class ConversationItemDeleted(ServerEvent): - """Event indicating a conversation item has been deleted. - - Parameters: - type: Event type, always "conversation.item.deleted". - item_id: ID of the deleted conversation item. - """ - - type: Literal["conversation.item.deleted"] - item_id: str - - -class ConversationItemRetrieved(ServerEvent): - """Event containing a retrieved conversation item. - - Parameters: - type: Event type, always "conversation.item.retrieved". - item: The retrieved conversation item. - """ - - type: Literal["conversation.item.retrieved"] - item: ConversationItem - - -class ResponseCreated(ServerEvent): - """Event indicating an assistant response has been created. - - Parameters: - type: Event type, always "response.created". - response: The created response object. - """ - - type: Literal["response.created"] - response: "Response" - - -class ResponseDone(ServerEvent): - """Event indicating an assistant response is complete. - - Parameters: - type: Event type, always "response.done". - response: The completed response object. - """ - - type: Literal["response.done"] - response: "Response" - - -class ResponseOutputItemAdded(ServerEvent): - """Event indicating an output item has been added to a response. - - Parameters: - type: Event type, always "response.output_item.added". - response_id: ID of the response. - output_index: Index of the output item. - item: The added conversation item. - """ - - type: Literal["response.output_item.added"] - response_id: str - output_index: int - item: ConversationItem - - -class ResponseOutputItemDone(ServerEvent): - """Event indicating an output item is complete. - - Parameters: - type: Event type, always "response.output_item.done". - response_id: ID of the response. - output_index: Index of the output item. - item: The completed conversation item. - """ - - type: Literal["response.output_item.done"] - response_id: str - output_index: int - item: ConversationItem - - -class ResponseContentPartAdded(ServerEvent): - """Event indicating a content part has been added to a response. - - Parameters: - type: Event type, always "response.content_part.added". - response_id: ID of the response. - item_id: ID of the conversation item. - output_index: Index of the output item. - content_index: Index of the content part. - part: The added content part. - """ - - type: Literal["response.content_part.added"] - response_id: str - item_id: str - output_index: int - content_index: int - part: ItemContent - - -class ResponseContentPartDone(ServerEvent): - """Event indicating a content part is complete. - - Parameters: - type: Event type, always "response.content_part.done". - response_id: ID of the response. - item_id: ID of the conversation item. - output_index: Index of the output item. - content_index: Index of the content part. - part: The completed content part. - """ - - type: Literal["response.content_part.done"] - response_id: str - item_id: str - output_index: int - content_index: int - part: ItemContent - - -class ResponseTextDelta(ServerEvent): - """Event containing incremental text from a response. - - Parameters: - type: Event type, always "response.text.delta". - response_id: ID of the response. - item_id: ID of the conversation item. - output_index: Index of the output item. - content_index: Index of the content part. - delta: Incremental text content. - """ - - type: Literal["response.text.delta"] - response_id: str - item_id: str - output_index: int - content_index: int - delta: str - - -class ResponseTextDone(ServerEvent): - """Event indicating text content is complete. - - Parameters: - type: Event type, always "response.text.done". - response_id: ID of the response. - item_id: ID of the conversation item. - output_index: Index of the output item. - content_index: Index of the content part. - text: Complete text content. - """ - - type: Literal["response.text.done"] - response_id: str - item_id: str - output_index: int - content_index: int - text: str - - -class ResponseAudioTranscriptDelta(ServerEvent): - """Event containing incremental audio transcript from a response. - - Parameters: - type: Event type, always "response.audio_transcript.delta". - response_id: ID of the response. - item_id: ID of the conversation item. - output_index: Index of the output item. - content_index: Index of the content part. - delta: Incremental transcript text. - """ - - type: Literal["response.audio_transcript.delta"] - response_id: str - item_id: str - output_index: int - content_index: int - delta: str - - -class ResponseAudioTranscriptDone(ServerEvent): - """Event indicating audio transcript is complete. - - Parameters: - type: Event type, always "response.audio_transcript.done". - response_id: ID of the response. - item_id: ID of the conversation item. - output_index: Index of the output item. - content_index: Index of the content part. - transcript: Complete transcript text. - """ - - type: Literal["response.audio_transcript.done"] - response_id: str - item_id: str - output_index: int - content_index: int - transcript: str - - -class ResponseAudioDelta(ServerEvent): - """Event containing incremental audio data from a response. - - Parameters: - type: Event type, always "response.audio.delta". - response_id: ID of the response. - item_id: ID of the conversation item. - output_index: Index of the output item. - content_index: Index of the content part. - delta: Base64-encoded incremental audio data. - """ - - type: Literal["response.audio.delta"] - response_id: str - item_id: str - output_index: int - content_index: int - delta: str # base64-encoded audio - - -class ResponseAudioDone(ServerEvent): - """Event indicating audio content is complete. - - Parameters: - type: Event type, always "response.audio.done". - response_id: ID of the response. - item_id: ID of the conversation item. - output_index: Index of the output item. - content_index: Index of the content part. - """ - - type: Literal["response.audio.done"] - response_id: str - item_id: str - output_index: int - content_index: int - - -class ResponseFunctionCallArgumentsDelta(ServerEvent): - """Event containing incremental function call arguments. - - Parameters: - type: Event type, always "response.function_call_arguments.delta". - response_id: ID of the response. - item_id: ID of the conversation item. - output_index: Index of the output item. - call_id: ID of the function call. - delta: Incremental function arguments as JSON. - """ - - type: Literal["response.function_call_arguments.delta"] - response_id: str - item_id: str - output_index: int - call_id: str - delta: str - - -class ResponseFunctionCallArgumentsDone(ServerEvent): - """Event indicating function call arguments are complete. - - Parameters: - type: Event type, always "response.function_call_arguments.done". - response_id: ID of the response. - item_id: ID of the conversation item. - output_index: Index of the output item. - call_id: ID of the function call. - arguments: Complete function arguments as JSON string. - """ - - type: Literal["response.function_call_arguments.done"] - response_id: str - item_id: str - output_index: int - call_id: str - arguments: str - - -class InputAudioBufferSpeechStarted(ServerEvent): - """Event indicating speech has started in the input audio buffer. - - Parameters: - type: Event type, always "input_audio_buffer.speech_started". - audio_start_ms: Start time of speech in milliseconds. - item_id: ID of the associated conversation item. - """ - - type: Literal["input_audio_buffer.speech_started"] - audio_start_ms: int - item_id: str - - -class InputAudioBufferSpeechStopped(ServerEvent): - """Event indicating speech has stopped in the input audio buffer. - - Parameters: - type: Event type, always "input_audio_buffer.speech_stopped". - audio_end_ms: End time of speech in milliseconds. - item_id: ID of the associated conversation item. - """ - - type: Literal["input_audio_buffer.speech_stopped"] - audio_end_ms: int - item_id: str - - -class InputAudioBufferCommitted(ServerEvent): - """Event indicating the input audio buffer has been committed. - - Parameters: - type: Event type, always "input_audio_buffer.committed". - previous_item_id: ID of the previous item, if any. - item_id: ID of the committed conversation item. - """ - - type: Literal["input_audio_buffer.committed"] - previous_item_id: Optional[str] = None - item_id: str - - -class InputAudioBufferCleared(ServerEvent): - """Event indicating the input audio buffer has been cleared. - - Parameters: - type: Event type, always "input_audio_buffer.cleared". - """ - - type: Literal["input_audio_buffer.cleared"] - - -class ErrorEvent(ServerEvent): - """Event indicating an error occurred. - - Parameters: - type: Event type, always "error". - error: Error details. - """ - - type: Literal["error"] - error: RealtimeError - - -class RateLimitsUpdated(ServerEvent): - """Event indicating rate limits have been updated. - - Parameters: - type: Event type, always "rate_limits.updated". - rate_limits: List of rate limit information. - """ - - type: Literal["rate_limits.updated"] - rate_limits: List[Dict[str, Any]] - - -class TokenDetails(BaseModel): - """Detailed token usage information. - - Parameters: - cached_tokens: Number of cached tokens used. Defaults to 0. - text_tokens: Number of text tokens used. Defaults to 0. - audio_tokens: Number of audio tokens used. Defaults to 0. - """ - - model_config = ConfigDict(extra="allow") - - cached_tokens: Optional[int] = 0 - text_tokens: Optional[int] = 0 - audio_tokens: Optional[int] = 0 - - -class Usage(BaseModel): - """Token usage statistics for a response. - - Parameters: - total_tokens: Total number of tokens used. - input_tokens: Number of input tokens used. - output_tokens: Number of output tokens used. - input_token_details: Detailed breakdown of input token usage. - output_token_details: Detailed breakdown of output token usage. - """ - - total_tokens: int - input_tokens: int - output_tokens: int - input_token_details: TokenDetails - output_token_details: TokenDetails - - -class Response(BaseModel): - """A complete assistant response. - - Parameters: - id: Unique identifier for the response. - object: Object type, always "realtime.response". - status: Current status of the response. - status_details: Additional status information. - output: List of conversation items in the response. - usage: Token usage statistics for the response. - """ - - id: str - object: Literal["realtime.response"] - status: Literal["completed", "in_progress", "incomplete", "cancelled", "failed"] - status_details: Any - output: List[ConversationItem] - usage: Optional[Usage] = None - - -_server_event_types = { - "error": ErrorEvent, - "session.created": SessionCreatedEvent, - "session.updated": SessionUpdatedEvent, - "conversation.created": ConversationCreated, - "input_audio_buffer.committed": InputAudioBufferCommitted, - "input_audio_buffer.cleared": InputAudioBufferCleared, - "input_audio_buffer.speech_started": InputAudioBufferSpeechStarted, - "input_audio_buffer.speech_stopped": InputAudioBufferSpeechStopped, - "conversation.item.created": ConversationItemCreated, - "conversation.item.input_audio_transcription.delta": ConversationItemInputAudioTranscriptionDelta, - "conversation.item.input_audio_transcription.completed": ConversationItemInputAudioTranscriptionCompleted, - "conversation.item.input_audio_transcription.failed": ConversationItemInputAudioTranscriptionFailed, - "conversation.item.truncated": ConversationItemTruncated, - "conversation.item.deleted": ConversationItemDeleted, - "conversation.item.retrieved": ConversationItemRetrieved, - "response.created": ResponseCreated, - "response.done": ResponseDone, - "response.output_item.added": ResponseOutputItemAdded, - "response.output_item.done": ResponseOutputItemDone, - "response.content_part.added": ResponseContentPartAdded, - "response.content_part.done": ResponseContentPartDone, - "response.text.delta": ResponseTextDelta, - "response.text.done": ResponseTextDone, - "response.audio_transcript.delta": ResponseAudioTranscriptDelta, - "response.audio_transcript.done": ResponseAudioTranscriptDone, - "response.audio.delta": ResponseAudioDelta, - "response.audio.done": ResponseAudioDone, - "response.function_call_arguments.delta": ResponseFunctionCallArgumentsDelta, - "response.function_call_arguments.done": ResponseFunctionCallArgumentsDone, - "rate_limits.updated": RateLimitsUpdated, -} - - -def parse_server_event(str): - """Parse a server event from JSON string. - - Args: - str: JSON string containing the server event. - - Returns: - Parsed server event object of the appropriate type. - - Raises: - Exception: If the event type is unimplemented or parsing fails. - """ - try: - event = json.loads(str) - event_type = event["type"] - if event_type not in _server_event_types: - raise Exception(f"Unimplemented server event type: {event_type}") - return _server_event_types[event_type].model_validate(event) - except Exception as e: - raise Exception(f"{e} \n\n{str}") diff --git a/src/pipecat/services/openai_realtime_beta/frames.py b/src/pipecat/services/openai_realtime_beta/frames.py deleted file mode 100644 index 02ef64000..000000000 --- a/src/pipecat/services/openai_realtime_beta/frames.py +++ /dev/null @@ -1,37 +0,0 @@ -# -# Copyright (c) 2024-2026, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - -"""Custom frame types for OpenAI Realtime API integration.""" - -from dataclasses import dataclass -from typing import TYPE_CHECKING - -from pipecat.frames.frames import DataFrame, FunctionCallResultFrame - -if TYPE_CHECKING: - from pipecat.services.openai_realtime_beta.context import OpenAIRealtimeLLMContext - - -@dataclass -class RealtimeMessagesUpdateFrame(DataFrame): - """Frame indicating that the realtime context messages have been updated. - - Parameters: - context: The updated OpenAI realtime LLM context. - """ - - context: "OpenAIRealtimeLLMContext" - - -@dataclass -class RealtimeFunctionCallResultFrame(DataFrame): - """Frame containing function call results for the realtime service. - - Parameters: - result_frame: The function call result frame to send to the realtime API. - """ - - result_frame: FunctionCallResultFrame diff --git a/src/pipecat/services/openai_realtime_beta/openai.py b/src/pipecat/services/openai_realtime_beta/openai.py deleted file mode 100644 index 0d20039b1..000000000 --- a/src/pipecat/services/openai_realtime_beta/openai.py +++ /dev/null @@ -1,858 +0,0 @@ -# -# Copyright (c) 2024-2026, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - -"""OpenAI Realtime Beta LLM service implementation with WebSocket support.""" - -import base64 -import json -import time -import warnings -from dataclasses import dataclass -from typing import Optional - -from loguru import logger - -from pipecat.adapters.services.open_ai_realtime_adapter import OpenAIRealtimeLLMAdapter -from pipecat.frames.frames import ( - AggregationType, - BotStoppedSpeakingFrame, - CancelFrame, - EndFrame, - ErrorFrame, - Frame, - InputAudioRawFrame, - InterimTranscriptionFrame, - InterruptionFrame, - LLMContextFrame, - LLMFullResponseEndFrame, - LLMFullResponseStartFrame, - LLMMessagesAppendFrame, - LLMSetToolsFrame, - LLMTextFrame, - LLMUpdateSettingsFrame, - StartFrame, - TranscriptionFrame, - TTSAudioRawFrame, - TTSStartedFrame, - TTSStoppedFrame, - TTSTextFrame, - UserStartedSpeakingFrame, - UserStoppedSpeakingFrame, -) -from pipecat.metrics.metrics import LLMTokenUsage -from pipecat.processors.aggregators.llm_response import ( - LLMAssistantAggregatorParams, - LLMUserAggregatorParams, -) -from pipecat.processors.aggregators.openai_llm_context import ( - OpenAILLMContext, - OpenAILLMContextFrame, -) -from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.llm_service import FunctionCallFromLLM, LLMService -from pipecat.services.openai.llm import OpenAIContextAggregatorPair -from pipecat.services.settings import LLMSettings -from pipecat.transcriptions.language import Language -from pipecat.utils.time import time_now_iso8601 -from pipecat.utils.tracing.service_decorators import traced_openai_realtime, traced_stt - -from . import events -from .context import ( - OpenAIRealtimeAssistantContextAggregator, - OpenAIRealtimeLLMContext, - OpenAIRealtimeUserContextAggregator, -) -from .frames import RealtimeFunctionCallResultFrame, RealtimeMessagesUpdateFrame - -try: - from websockets.asyncio.client import connect as websocket_connect -except ModuleNotFoundError as e: - logger.error(f"Exception: {e}") - logger.error("In order to use OpenAI, you need to `pip install pipecat-ai[openai]`.") - raise Exception(f"Missing module: {e}") - - -@dataclass -class CurrentAudioResponse: - """Tracks the current audio response from the assistant. - - Parameters: - item_id: Unique identifier for the audio response item. - content_index: Index of the audio content within the item. - start_time_ms: Timestamp when the audio response started in milliseconds. - total_size: Total size of audio data received in bytes. Defaults to 0. - """ - - item_id: str - content_index: int - start_time_ms: int - total_size: int = 0 - - -@dataclass -class OpenAIRealtimeBetaLLMSettings(LLMSettings): - """Settings for OpenAIRealtimeBetaLLMService.""" - - pass - - -class OpenAIRealtimeBetaLLMService(LLMService): - """OpenAI Realtime Beta LLM service providing real-time audio and text communication. - - .. deprecated:: 0.0.84 - `OpenAIRealtimeBetaLLMService` is deprecated, use `OpenAIRealtimeLLMService` instead. - This class will be removed in version 1.0.0. - - Implements the OpenAI Realtime API Beta with WebSocket communication for low-latency - bidirectional audio and text interactions. Supports function calling, conversation - management, and real-time transcription. - """ - - Settings = OpenAIRealtimeBetaLLMSettings - _settings: Settings - - # Overriding the default adapter to use the OpenAIRealtimeLLMAdapter one. - adapter_class = OpenAIRealtimeLLMAdapter - - def __init__( - self, - *, - api_key: str, - model: Optional[str] = None, - base_url: str = "wss://api.openai.com/v1/realtime", - session_properties: Optional[events.SessionProperties] = None, - settings: Optional[Settings] = None, - start_audio_paused: bool = False, - send_transcription_frames: bool = True, - **kwargs, - ): - """Initialize the OpenAI Realtime Beta LLM service. - - Args: - api_key: OpenAI API key for authentication. - model: OpenAI model name. - - .. deprecated:: 0.0.105 - Use ``settings=OpenAIRealtimeBetaLLMService.Settings(model=...)`` instead. - - base_url: WebSocket base URL for the realtime API. - Defaults to "wss://api.openai.com/v1/realtime". - session_properties: Configuration properties for the realtime session. - If None, uses default SessionProperties. - settings: Runtime-updatable settings for this service. - start_audio_paused: Whether to start with audio input paused. Defaults to False. - send_transcription_frames: Whether to emit transcription frames. Defaults to True. - **kwargs: Additional arguments passed to parent LLMService. - """ - with warnings.catch_warnings(): - warnings.simplefilter("always") - warnings.warn( - "OpenAIRealtimeBetaLLMService is deprecated and will be removed in version 1.0.0. " - "Use OpenAIRealtimeLLMService instead.", - DeprecationWarning, - stacklevel=2, - ) - - # 1. Initialize default_settings with hardcoded defaults - default_settings = self.Settings( - model="gpt-4o-realtime-preview-2025-06-03", - system_instruction=None, - temperature=None, - max_tokens=None, - top_p=None, - top_k=None, - frequency_penalty=None, - presence_penalty=None, - seed=None, - filter_incomplete_user_turns=False, - user_turn_completion_config=None, - ) - - # 2. Apply direct init arg overrides (deprecated) - if model is not None: - self._warn_init_param_moved_to_settings("model", "model") - default_settings.model = model - # 3. Apply settings delta (canonical API, always wins) - if settings is not None: - default_settings.apply_update(settings) - - full_url = f"{base_url}?model={default_settings.model}" - super().__init__( - base_url=full_url, - settings=default_settings, - **kwargs, - ) - - self.api_key = api_key - self.base_url = full_url - self._session_properties = session_properties or events.SessionProperties() - self._audio_input_paused = start_audio_paused - self._send_transcription_frames = send_transcription_frames - self._websocket = None - self._receive_task = None - self._context = None - - self._disconnecting = False - self._api_session_ready = False - self._run_llm_when_api_session_ready = False - - self._current_assistant_response = None - self._current_audio_response = None - - self._messages_added_manually = {} - self._user_and_response_message_tuple = None - - self._register_event_handler("on_conversation_item_created") - self._register_event_handler("on_conversation_item_updated") - self._retrieve_conversation_item_futures = {} - - def can_generate_metrics(self) -> bool: - """Check if the service can generate usage metrics. - - Returns: - True if metrics generation is supported. - """ - return True - - def set_audio_input_paused(self, paused: bool): - """Set whether audio input is paused. - - Args: - paused: True to pause audio input, False to resume. - """ - self._audio_input_paused = paused - - def _is_modality_enabled(self, modality: str) -> bool: - """Check if a specific modality is enabled, "text" or "audio".""" - modalities = self._session_properties.modalities or ["audio", "text"] - return modality in modalities - - def _get_enabled_modalities(self) -> list[str]: - """Get the list of enabled modalities.""" - return self._session_properties.modalities or ["audio", "text"] - - async def retrieve_conversation_item(self, item_id: str): - """Retrieve a conversation item by ID from the server. - - Args: - item_id: The ID of the conversation item to retrieve. - - Returns: - The retrieved conversation item. - """ - future = self.get_event_loop().create_future() - retrieval_in_flight = False - if not self._retrieve_conversation_item_futures.get(item_id): - self._retrieve_conversation_item_futures[item_id] = [] - else: - retrieval_in_flight = True - self._retrieve_conversation_item_futures[item_id].append(future) - if not retrieval_in_flight: - await self.send_client_event( - # Set event_id to "rci_{item_id}" so that we can identify an - # error later if the retrieval fails. We don't need a UUID - # suffix to the event_id because we're ensuring only one - # in-flight retrieval per item_id. (Note: "rci" = "retrieve - # conversation item") - events.ConversationItemRetrieveEvent(item_id=item_id, event_id=f"rci_{item_id}") - ) - return await future - - # - # standard AIService frame handling - # - - async def start(self, frame: StartFrame): - """Start the service and establish WebSocket connection. - - Args: - frame: The start frame triggering service initialization. - """ - await super().start(frame) - await self._connect() - - async def stop(self, frame: EndFrame): - """Stop the service and close WebSocket connection. - - Args: - frame: The end frame triggering service shutdown. - """ - await super().stop(frame) - await self._disconnect() - - async def cancel(self, frame: CancelFrame): - """Cancel the service and close WebSocket connection. - - Args: - frame: The cancel frame triggering service cancellation. - """ - await super().cancel(frame) - await self._disconnect() - - # - # speech and interruption handling - # - - async def _handle_interruption(self): - # None and False are different. Check for False. None means we're using OpenAI's - # built-in turn detection defaults. - if self._session_properties.turn_detection is False: - await self.send_client_event(events.InputAudioBufferClearEvent()) - await self.send_client_event(events.ResponseCancelEvent()) - await self._truncate_current_audio_response() - await self.stop_all_metrics() - if self._current_assistant_response: - await self.push_frame(LLMFullResponseEndFrame()) - # Only push TTSStoppedFrame if audio modality is enabled - if self._is_modality_enabled("audio"): - await self.push_frame(TTSStoppedFrame()) - - async def _handle_user_started_speaking(self, frame): - pass - - async def _handle_user_stopped_speaking(self, frame): - # None and False are different. Check for False. None means we're using OpenAI's - # built-in turn detection defaults. - if self._session_properties.turn_detection is False: - await self.send_client_event(events.InputAudioBufferCommitEvent()) - await self.send_client_event(events.ResponseCreateEvent()) - - async def _handle_bot_stopped_speaking(self): - self._current_audio_response = None - - def _calculate_audio_duration_ms( - self, total_bytes: int, sample_rate: int = 24000, bytes_per_sample: int = 2 - ) -> int: - """Calculate audio duration in milliseconds based on PCM audio parameters.""" - samples = total_bytes / bytes_per_sample - duration_seconds = samples / sample_rate - return int(duration_seconds * 1000) - - async def _truncate_current_audio_response(self): - """Truncates the current audio response at the appropriate duration. - - Calculates the actual duration of the audio content and truncates at the shorter of - either the wall clock time or the actual audio duration to prevent invalid truncation - requests. - """ - if not self._current_audio_response: - return - - # if the bot is still speaking, truncate the last message - try: - current = self._current_audio_response - self._current_audio_response = None - - # Calculate actual audio duration instead of using wall clock time - audio_duration_ms = self._calculate_audio_duration_ms(current.total_size) - - # Use the shorter of wall clock time or actual audio duration - elapsed_ms = int(time.time() * 1000 - current.start_time_ms) - truncate_ms = min(elapsed_ms, audio_duration_ms) - - logger.trace( - f"Truncating audio: duration={audio_duration_ms}ms, " - f"elapsed={elapsed_ms}ms, truncate={truncate_ms}ms" - ) - - await self.send_client_event( - events.ConversationItemTruncateEvent( - item_id=current.item_id, - content_index=current.content_index, - audio_end_ms=truncate_ms, - ) - ) - except Exception as e: - # Log warning and don't re-raise - allow session to continue - logger.warning(f"Audio truncation failed (non-fatal): {e}") - - # - # frame processing - # - # StartFrame, StopFrame, CancelFrame implemented in base class - # - - async def process_frame(self, frame: Frame, direction: FrameDirection): - """Process incoming frames from the pipeline. - - Args: - frame: The frame to process. - direction: The direction of frame flow in the pipeline. - """ - # Backward-compatible dict path: frame.settings contains SessionProperties - # fields, not our Settings fields, so we construct SessionProperties - # directly. The frame.delta path falls through to super, which calls - # _update_settings → our override handles the rest. - if isinstance(frame, LLMUpdateSettingsFrame) and frame.delta is None: - self._session_properties = events.SessionProperties(**frame.settings) - await self._send_session_update() - await self.push_frame(frame, direction) - return - - await super().process_frame(frame, direction) - - if isinstance(frame, TranscriptionFrame): - pass - elif isinstance(frame, OpenAILLMContextFrame): - context: OpenAIRealtimeLLMContext = OpenAIRealtimeLLMContext.upgrade_to_realtime( - frame.context - ) - if not self._context: - self._context = context - elif frame.context is not self._context: - # If the context has changed, reset the conversation - self._context = context - await self.reset_conversation() - # Run the LLM at next opportunity - await self._create_response() - elif isinstance(frame, LLMContextFrame): - raise NotImplementedError( - "Universal LLMContext is not yet supported for OpenAI Realtime." - ) - elif isinstance(frame, InputAudioRawFrame): - if not self._audio_input_paused: - await self._send_user_audio(frame) - elif isinstance(frame, InterruptionFrame): - await self._handle_interruption() - elif isinstance(frame, UserStartedSpeakingFrame): - await self._handle_user_started_speaking(frame) - elif isinstance(frame, UserStoppedSpeakingFrame): - await self._handle_user_stopped_speaking(frame) - elif isinstance(frame, BotStoppedSpeakingFrame): - await self._handle_bot_stopped_speaking() - elif isinstance(frame, LLMMessagesAppendFrame): - await self._handle_messages_append(frame) - elif isinstance(frame, RealtimeMessagesUpdateFrame): - self._context = frame.context - elif isinstance(frame, LLMSetToolsFrame): - await self._send_session_update() - elif isinstance(frame, RealtimeFunctionCallResultFrame): - await self._handle_function_call_result(frame.result_frame) - - await self.push_frame(frame, direction) - - async def _handle_messages_append(self, frame): - logger.error("!!! NEED TO IMPLEMENT MESSAGES APPEND") - - async def _handle_function_call_result(self, frame): - item = events.ConversationItem( - type="function_call_output", - call_id=frame.tool_call_id, - output=json.dumps(frame.result, ensure_ascii=False), - ) - await self.send_client_event(events.ConversationItemCreateEvent(item=item)) - - # - # websocket communication - # - - async def send_client_event(self, event: events.ClientEvent): - """Send a client event to the OpenAI Realtime API. - - Args: - event: The client event to send. - """ - await self._ws_send(event.model_dump(exclude_none=True)) - - async def _connect(self): - try: - if self._websocket: - # Here we assume that if we have a websocket, we are connected. We - # handle disconnections in the send/recv code paths. - return - self._websocket = await websocket_connect( - uri=self.base_url, - additional_headers={ - "Authorization": f"Bearer {self.api_key}", - "OpenAI-Beta": "realtime=v1", - }, - ) - self._receive_task = self.create_task(self._receive_task_handler()) - except Exception as e: - await self.push_error(error_msg=f"Error connecting: {e}", exception=e) - self._websocket = None - - async def _disconnect(self): - try: - self._disconnecting = True - self._api_session_ready = False - await self.stop_all_metrics() - if self._websocket: - await self._websocket.close() - self._websocket = None - if self._receive_task: - await self.cancel_task(self._receive_task, timeout=1.0) - self._receive_task = None - self._disconnecting = False - except Exception as e: - await self.push_error(error_msg=f"Error disconnecting: {e}", exception=e) - - async def _ws_send(self, realtime_message): - try: - if self._websocket: - await self._websocket.send(json.dumps(realtime_message)) - except Exception as e: - if self._disconnecting: - return - # In server-to-server contexts, a WebSocket error should be quite rare. Given how hard - # it is to recover from a send-side error with proper state management, and that exponential - # backoff for retries can have cost/stability implications for a service cluster, let's just - # treat a send-side error as fatal. - await self.push_error(error_msg=f"Error sending client event: {e}", exception=e) - - async def _update_settings(self, delta): - """Apply a settings delta.""" - changed = await super()._update_settings(delta) - self._warn_unhandled_updated_settings(changed.keys()) - return changed - - async def _send_session_update(self): - settings = self._session_properties - # tools given in the context override the tools in the session properties - if self._context and self._context.tools: - settings.tools = self._context.tools - # instructions in the context come from an initial "system" message in the - # messages list, and override instructions in the session properties - if self._context and self._context._session_instructions: - settings.instructions = self._context._session_instructions - await self.send_client_event(events.SessionUpdateEvent(session=settings)) - - # - # inbound server event handling - # https://platform.openai.com/docs/api-reference/realtime-server-events - # - - async def _receive_task_handler(self): - async for message in self._websocket: - evt = events.parse_server_event(message) - if evt.type == "session.created": - await self._handle_evt_session_created(evt) - elif evt.type == "session.updated": - await self._handle_evt_session_updated(evt) - elif evt.type == "response.audio.delta": - await self._handle_evt_audio_delta(evt) - elif evt.type == "response.audio.done": - await self._handle_evt_audio_done(evt) - elif evt.type == "conversation.item.created": - await self._handle_evt_conversation_item_created(evt) - elif evt.type == "conversation.item.input_audio_transcription.delta": - await self._handle_evt_input_audio_transcription_delta(evt) - elif evt.type == "conversation.item.input_audio_transcription.completed": - await self.handle_evt_input_audio_transcription_completed(evt) - elif evt.type == "conversation.item.retrieved": - await self._handle_conversation_item_retrieved(evt) - elif evt.type == "response.done": - await self._handle_evt_response_done(evt) - elif evt.type == "input_audio_buffer.speech_started": - await self._handle_evt_speech_started(evt) - elif evt.type == "input_audio_buffer.speech_stopped": - await self._handle_evt_speech_stopped(evt) - elif evt.type == "response.text.delta": - await self._handle_evt_text_delta(evt) - elif evt.type == "response.audio_transcript.delta": - await self._handle_evt_audio_transcript_delta(evt) - elif evt.type == "error": - if not await self._maybe_handle_evt_retrieve_conversation_item_error(evt): - if evt.error.code in ( - "response_cancel_not_active", - "conversation_already_has_active_response", - ): - logger.debug(f"{self} {evt.error.message}") - else: - await self._handle_evt_error(evt) - # errors are fatal, so exit the receive loop - return - - @traced_openai_realtime(operation="llm_setup") - async def _handle_evt_session_created(self, evt): - # session.created is received right after connecting. Send a message - # to configure the session properties. - await self._send_session_update() - - async def _handle_evt_session_updated(self, evt): - # If this is our first context frame, run the LLM - self._api_session_ready = True - # Now that we've configured the session, we can run the LLM if we need to. - if self._run_llm_when_api_session_ready: - self._run_llm_when_api_session_ready = False - await self._create_response() - - async def _handle_evt_audio_delta(self, evt): - # note: ttfb is faster by 1/2 RTT than ttfb as measured for other services, since we're getting - # this event from the server - await self.stop_ttfb_metrics() - - if self._current_audio_response and self._current_audio_response.item_id != evt.item_id: - logger.warning( - f"Received a new audio delta for an already completed audio response before receiving the BotStoppedSpeakingFrame." - ) - logger.debug("Forcing previous audio response to None") - self._current_audio_response = None - - if not self._current_audio_response: - self._current_audio_response = CurrentAudioResponse( - item_id=evt.item_id, - content_index=evt.content_index, - start_time_ms=int(time.time() * 1000), - ) - await self.push_frame(TTSStartedFrame()) - audio = base64.b64decode(evt.delta) - self._current_audio_response.total_size += len(audio) - frame = TTSAudioRawFrame( - audio=audio, - sample_rate=24000, - num_channels=1, - ) - await self.push_frame(frame) - - async def _handle_evt_audio_done(self, evt): - if self._current_audio_response: - await self.push_frame(TTSStoppedFrame()) - # Don't clear the self._current_audio_response here. We need to wait until we - # receive a BotStoppedSpeakingFrame from the output transport. - - async def _handle_evt_conversation_item_created(self, evt): - await self._call_event_handler("on_conversation_item_created", evt.item.id, evt.item) - - # This will get sent from the server every time a new "message" is added - # to the server's conversation state, whether we create it via the API - # or the server creates it from LLM output. - if self._messages_added_manually.get(evt.item.id): - del self._messages_added_manually[evt.item.id] - return - - if evt.item.role == "user": - # We need to wait for completion of both user message and response message. Then we'll - # add both to the context. User message is complete when we have a "transcript" field - # that is not None. Response message is complete when we get a "response.done" event. - self._user_and_response_message_tuple = (evt.item, {"done": False, "output": []}) - elif evt.item.role == "assistant": - self._current_assistant_response = evt.item - await self.push_frame(LLMFullResponseStartFrame()) - - async def _handle_evt_input_audio_transcription_delta(self, evt): - if self._send_transcription_frames: - await self.push_frame( - # no way to get a language code? - InterimTranscriptionFrame(evt.delta, "", time_now_iso8601(), result=evt) - ) - - @traced_stt - async def _handle_user_transcription( - self, transcript: str, is_final: bool, language: Optional[Language] = None - ): - """Handle a transcription result with tracing.""" - pass - - async def handle_evt_input_audio_transcription_completed(self, evt): - """Handle completion of input audio transcription. - - Args: - evt: The transcription completed event. - """ - await self._call_event_handler("on_conversation_item_updated", evt.item_id, None) - - if self._send_transcription_frames: - await self.push_frame( - # no way to get a language code? - TranscriptionFrame(evt.transcript, "", time_now_iso8601(), result=evt) - ) - await self._handle_user_transcription(evt.transcript, True, Language.EN) - pair = self._user_and_response_message_tuple - if pair: - user, assistant = pair - user.content[0].transcript = evt.transcript - if assistant["done"]: - self._user_and_response_message_tuple = None - self._context.add_user_content_item_as_message(user) - await self._handle_assistant_output(assistant["output"]) - else: - # User message without preceding conversation.item.created. Bug? - logger.warning(f"Transcript for unknown user message: {evt}") - - async def _handle_conversation_item_retrieved(self, evt: events.ConversationItemRetrieved): - futures = self._retrieve_conversation_item_futures.pop(evt.item.id, None) - if futures: - for future in futures: - future.set_result(evt.item) - - @traced_openai_realtime(operation="llm_response") - async def _handle_evt_response_done(self, evt): - # todo: figure out whether there's anything we need to do for "cancelled" events - # usage metrics - tokens = LLMTokenUsage( - prompt_tokens=evt.response.usage.input_tokens, - completion_tokens=evt.response.usage.output_tokens, - total_tokens=evt.response.usage.total_tokens, - ) - await self.start_llm_usage_metrics(tokens) - await self.stop_processing_metrics() - await self.push_frame(LLMFullResponseEndFrame()) - self._current_assistant_response = None - # error handling - if evt.response.status == "failed": - await self.push_error(ErrorFrame(error=evt.response.status_details["error"]["message"])) - return - # response content - for item in evt.response.output: - await self._call_event_handler("on_conversation_item_updated", item.id, item) - pair = self._user_and_response_message_tuple - if pair: - user, assistant = pair - assistant["done"] = True - assistant["output"] = evt.response.output - if user.content[0].transcript is not None: - self._user_and_response_message_tuple = None - self._context.add_user_content_item_as_message(user) - await self._handle_assistant_output(assistant["output"]) - else: - # Response message without preceding user message. Add it to the context. - await self._handle_assistant_output(evt.response.output) - - async def _handle_evt_text_delta(self, evt): - if evt.delta: - await self.push_frame(LLMTextFrame(evt.delta)) - - async def _handle_evt_audio_transcript_delta(self, evt): - if evt.delta: - await self.push_frame(LLMTextFrame(evt.delta)) - await self.push_frame(TTSTextFrame(evt.delta, aggregated_by=AggregationType.SENTENCE)) - - async def _handle_evt_speech_started(self, evt): - await self._truncate_current_audio_response() - await self.broadcast_frame(UserStartedSpeakingFrame) - await self.broadcast_interruption() - - async def _handle_evt_speech_stopped(self, evt): - await self.start_ttfb_metrics() - await self.start_processing_metrics() - await self.broadcast_frame(UserStoppedSpeakingFrame) - - async def _maybe_handle_evt_retrieve_conversation_item_error(self, evt: events.ErrorEvent): - """Maybe handle an error event related to retrieving a conversation item. - - If the given error event is an error retrieving a conversation item: - - - set an exception on the future that retrieve_conversation_item() is waiting on - - return true - Otherwise: - - return false - """ - if evt.error.code == "item_retrieve_invalid_item_id": - item_id = evt.error.event_id.split("_", 1)[1] # event_id is of the form "rci_{item_id}" - futures = self._retrieve_conversation_item_futures.pop(item_id, None) - if futures: - for future in futures: - future.set_exception(Exception(evt.error.message)) - return True - return False - - async def _handle_evt_error(self, evt): - # Errors are fatal to this connection. Send an ErrorFrame. - await self.push_error(error_msg=f"Error: {evt}") - - async def _handle_assistant_output(self, output): - # We haven't seen intermixed audio and function_call items in the same response. But let's - # try to write logic that handles that, if it does happen. - # Also, the assistant output is pushed as LLMTextFrame and TTSTextFrame to be handled by - # the assistant context aggregator. - function_calls = [item for item in output if item.type == "function_call"] - await self._handle_function_call_items(function_calls) - - async def _handle_function_call_items(self, items): - function_calls = [] - for item in items: - args = json.loads(item.arguments) - function_calls.append( - FunctionCallFromLLM( - context=self._context, - tool_call_id=item.call_id, - function_name=item.name, - arguments=args, - ) - ) - await self.run_function_calls(function_calls) - - # - # state and client events for the current conversation - # https://platform.openai.com/docs/api-reference/realtime-client-events - # - - async def reset_conversation(self): - """Reset the conversation by disconnecting and reconnecting. - - This is the safest way to start a new conversation. Note that this will - fail if called from the receive task. - """ - logger.debug("Resetting conversation") - await self._disconnect() - if self._context: - self._context.llm_needs_settings_update = True - self._context.llm_needs_initial_messages = True - await self._connect() - - @traced_openai_realtime(operation="llm_request") - async def _create_response(self): - if not self._api_session_ready: - self._run_llm_when_api_session_ready = True - return - - if self._context.llm_needs_initial_messages: - messages = self._context.get_messages_for_initializing_history() - for item in messages: - evt = events.ConversationItemCreateEvent(item=item) - self._messages_added_manually[evt.item.id] = True - await self.send_client_event(evt) - self._context.llm_needs_initial_messages = False - - if self._context.llm_needs_settings_update: - await self._send_session_update() - self._context.llm_needs_settings_update = False - - logger.debug(f"Creating response: {self._context.get_messages_for_logging()}") - - await self.push_frame(LLMFullResponseStartFrame()) - await self.start_processing_metrics() - await self.start_ttfb_metrics() - await self.send_client_event( - events.ResponseCreateEvent( - response=events.ResponseProperties(modalities=self._get_enabled_modalities()) - ) - ) - - async def _send_user_audio(self, frame): - payload = base64.b64encode(frame.audio).decode("utf-8") - await self.send_client_event(events.InputAudioBufferAppendEvent(audio=payload)) - - def create_context_aggregator( - self, - context: OpenAILLMContext, - *, - user_params: LLMUserAggregatorParams = LLMUserAggregatorParams(), - assistant_params: LLMAssistantAggregatorParams = LLMAssistantAggregatorParams(), - ) -> OpenAIContextAggregatorPair: - """Create an instance of OpenAIContextAggregatorPair from an OpenAILLMContext. - - Constructor keyword arguments for both the user and assistant aggregators can be provided. - - Args: - context: The LLM context. - user_params: User aggregator parameters. - assistant_params: Assistant aggregator parameters. - - Returns: - OpenAIContextAggregatorPair: A pair of context aggregators, one for - the user and one for the assistant, encapsulated in an - OpenAIContextAggregatorPair. - """ - context.set_llm_adapter(self.get_llm_adapter()) - - OpenAIRealtimeLLMContext.upgrade_to_realtime(context) - user = OpenAIRealtimeUserContextAggregator(context, params=user_params) - - assistant_params.expect_stripped_words = False - assistant = OpenAIRealtimeAssistantContextAggregator(context, params=assistant_params) - return OpenAIContextAggregatorPair(_user=user, _assistant=assistant) diff --git a/src/pipecat/services/perplexity/llm.py b/src/pipecat/services/perplexity/llm.py index 50953691a..95ff23889 100644 --- a/src/pipecat/services/perplexity/llm.py +++ b/src/pipecat/services/perplexity/llm.py @@ -20,7 +20,6 @@ from pipecat.adapters.services.open_ai_adapter import OpenAILLMInvocationParams from pipecat.adapters.services.perplexity_adapter import PerplexityLLMAdapter from pipecat.metrics.metrics import LLMTokenUsage from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.services.openai.base_llm import BaseOpenAILLMService from pipecat.services.openai.llm import OpenAILLMService @@ -126,7 +125,7 @@ class PerplexityLLMService(OpenAILLMService): return params - async def _process_context(self, context: OpenAILLMContext | LLMContext): + async def _process_context(self, context: LLMContext): """Process a context through the LLM and accumulate token usage metrics. This method overrides the parent class implementation to handle diff --git a/src/pipecat/services/sambanova/llm.py b/src/pipecat/services/sambanova/llm.py index 20e17b4f2..435694bd2 100644 --- a/src/pipecat/services/sambanova/llm.py +++ b/src/pipecat/services/sambanova/llm.py @@ -20,7 +20,6 @@ from pipecat.frames.frames import ( ) from pipecat.metrics.metrics import LLMTokenUsage from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.services.llm_service import FunctionCallFromLLM from pipecat.services.openai.base_llm import BaseOpenAILLMService from pipecat.services.openai.llm import OpenAILLMService @@ -138,9 +137,7 @@ class SambaNovaLLMService(OpenAILLMService): # type: ignore return params @traced_llm # type: ignore - async def _process_context( - self, context: OpenAILLMContext | LLMContext - ) -> AsyncStream[ChatCompletionChunk]: + async def _process_context(self, context: LLMContext) -> AsyncStream[ChatCompletionChunk]: """Process OpenAI LLM context and stream chat completion chunks. This method handles the streaming response from SambaNova API, including @@ -163,11 +160,7 @@ class SambaNovaLLMService(OpenAILLMService): # type: ignore await self.start_ttfb_metrics() - chunk_stream = await ( - self._stream_chat_completions_specific_context(context) - if isinstance(context, OpenAILLMContext) - else self._stream_chat_completions_universal_context(context) - ) + chunk_stream = await self.get_chat_completions(context) # Use context manager to ensure stream is closed on cancellation/exception. # Without this, CancelledError during iteration leaves the underlying socket open. diff --git a/src/pipecat/services/ultravox/llm.py b/src/pipecat/services/ultravox/llm.py index fe8a97549..525da40c4 100644 --- a/src/pipecat/services/ultravox/llm.py +++ b/src/pipecat/services/ultravox/llm.py @@ -46,15 +46,7 @@ from pipecat.frames.frames import ( VADUserStoppedSpeakingFrame, ) from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.llm_response import ( - LLMAssistantAggregatorParams, - LLMUserAggregatorParams, -) from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair -from pipecat.processors.aggregators.openai_llm_context import ( - OpenAILLMContext, - OpenAILLMContextFrame, -) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.llm_service import FunctionCallFromLLM, LLMService from pipecat.services.settings import NOT_GIVEN, LLMSettings, _NotGiven @@ -404,13 +396,8 @@ class UltravoxRealtimeLLMService(LLMService): """ await super().process_frame(frame, direction) - if isinstance(frame, (LLMContextFrame, OpenAILLMContextFrame)): - context = ( - frame.context - if isinstance(frame, LLMContextFrame) - else LLMContext.from_openai_context(frame.context) - ) - await self._handle_context(context) + if isinstance(frame, LLMContextFrame): + await self._handle_context(frame.context) elif isinstance(frame, InterruptionFrame): await self.stop_all_metrics() await self.push_frame(frame, direction) @@ -629,40 +616,3 @@ class UltravoxRealtimeLLMService(LLMService): await self.push_frame(LLMFullResponseStartFrame()) self._bot_responding = "text" await self.push_frame(LLMTextFrame(text=text or delta)) - - def create_context_aggregator( - self, - context: OpenAILLMContext, - *, - user_params: LLMUserAggregatorParams = LLMUserAggregatorParams(), - assistant_params: LLMAssistantAggregatorParams = LLMAssistantAggregatorParams(), - ) -> LLMContextAggregatorPair: - """Create an instance of LLMContextAggregatorPair from an OpenAILLMContext. - - Constructor keyword arguments for both the user and assistant aggregators can be provided. - - NOTE: this method exists only for backward compatibility. New code - should instead do:: - - context = LLMContext(...) - context_aggregator = LLMContextAggregatorPair(context) - - Args: - context: The LLM context to use. - user_params: User aggregator parameters. Defaults to LLMUserAggregatorParams(). - assistant_params: Assistant aggregator parameters. Defaults to LLMAssistantAggregatorParams(). - - Returns: - A pair of user and assistant context aggregators. - - .. deprecated:: 0.0.99 - `create_context_aggregator()` is deprecated and will be removed in a future version. - Use the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. - """ - # from_openai_context handles deprecation warning - context = LLMContext.from_openai_context(context) - assistant_params.expect_stripped_words = False - return LLMContextAggregatorPair( - context, user_params=user_params, assistant_params=assistant_params - ) diff --git a/src/pipecat/services/xai/llm.py b/src/pipecat/services/xai/llm.py index 160ad3331..ff6c6825e 100644 --- a/src/pipecat/services/xai/llm.py +++ b/src/pipecat/services/xai/llm.py @@ -18,57 +18,12 @@ from loguru import logger from pipecat.metrics.metrics import LLMTokenUsage from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.llm_response import ( - LLMAssistantAggregatorParams, - LLMUserAggregatorParams, -) -from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.services.openai.base_llm import BaseOpenAILLMService from pipecat.services.openai.llm import ( - OpenAIAssistantContextAggregator, OpenAILLMService, - OpenAIUserContextAggregator, ) -@dataclass -class GrokContextAggregatorPair: - """Pair of context aggregators for user and assistant interactions. - - Provides a convenient container for managing both user and assistant - context aggregators together for Grok LLM interactions. - - .. deprecated:: 0.0.99 - `GrokContextAggregatorPair` is deprecated and will be removed in a future version. - Use the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. - - Parameters: - _user: The user context aggregator instance. - _assistant: The assistant context aggregator instance. - """ - - # Aggregators handle deprecation warnings - _user: OpenAIUserContextAggregator - _assistant: OpenAIAssistantContextAggregator - - def user(self) -> OpenAIUserContextAggregator: - """Get the user context aggregator. - - Returns: - The user context aggregator instance. - """ - return self._user - - def assistant(self) -> OpenAIAssistantContextAggregator: - """Get the assistant context aggregator. - - Returns: - The assistant context aggregator instance. - """ - return self._assistant - - @dataclass class GrokLLMSettings(BaseOpenAILLMService.Settings): """Settings for GrokLLMService.""" @@ -147,7 +102,7 @@ class GrokLLMService(OpenAILLMService): logger.debug(f"Creating Grok client with api {base_url}") return super().create_client(api_key, base_url, **kwargs) - async def _process_context(self, context: OpenAILLMContext | LLMContext): + async def _process_context(self, context: LLMContext): """Process a context through the LLM and accumulate token usage metrics. This method overrides the parent class implementation to handle Grok's @@ -213,38 +168,3 @@ class GrokLLMService(OpenAILLMService): if tokens.reasoning_tokens is not None: self._reasoning_tokens = tokens.reasoning_tokens - - def create_context_aggregator( - self, - context: OpenAILLMContext, - *, - user_params: LLMUserAggregatorParams = LLMUserAggregatorParams(), - assistant_params: LLMAssistantAggregatorParams = LLMAssistantAggregatorParams(), - ) -> GrokContextAggregatorPair: - """Create an instance of GrokContextAggregatorPair from an OpenAILLMContext. - - Constructor keyword arguments for both the user and assistant aggregators - can be provided. - - Args: - context: The LLM context to create aggregators for. - user_params: Parameters for configuring the user aggregator. - assistant_params: Parameters for configuring the assistant aggregator. - - Returns: - GrokContextAggregatorPair: A pair of context aggregators, one for - the user and one for the assistant, encapsulated in an - GrokContextAggregatorPair. - - .. deprecated:: 0.0.99 - `create_context_aggregator()` is deprecated and will be removed in a future version. - Use the universal `LLMContext` and `LLMContextAggregatorPair` instead. - See `OpenAILLMContext` docstring for migration guide. - """ - context.set_llm_adapter(self.get_llm_adapter()) - - # Aggregators handle deprecation warnings - user = OpenAIUserContextAggregator(context, params=user_params) - assistant = OpenAIAssistantContextAggregator(context, params=assistant_params) - - return GrokContextAggregatorPair(_user=user, _assistant=assistant) diff --git a/src/pipecat/services/xai/realtime/llm.py b/src/pipecat/services/xai/realtime/llm.py index 1317e7269..33dbe2f63 100644 --- a/src/pipecat/services/xai/realtime/llm.py +++ b/src/pipecat/services/xai/realtime/llm.py @@ -46,14 +46,9 @@ from pipecat.frames.frames import ( ) from pipecat.metrics.metrics import LLMTokenUsage from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.llm_response import ( - LLMAssistantAggregatorParams, - LLMUserAggregatorParams, -) from pipecat.processors.aggregators.llm_response_universal import ( LLMContextAggregatorPair, ) -from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.processors.frame_processor import FrameDirection from pipecat.services.llm_service import FunctionCallFromLLM, LLMService from pipecat.services.settings import ( @@ -946,26 +941,3 @@ class GrokRealtimeLLMService(LLMService): output=json.dumps(result, ensure_ascii=False), ) await self.send_client_event(events.ConversationItemCreateEvent(item=item)) - - def create_context_aggregator( - self, - context: OpenAILLMContext, - *, - user_params: LLMUserAggregatorParams = LLMUserAggregatorParams(), - assistant_params: LLMAssistantAggregatorParams = LLMAssistantAggregatorParams(), - ) -> LLMContextAggregatorPair: - """Create context aggregators for the Grok Realtime service. - - Args: - context: The LLM context. - user_params: User aggregator parameters. - assistant_params: Assistant aggregator parameters. - - Returns: - LLMContextAggregatorPair for user and assistant context aggregation. - """ - context = LLMContext.from_openai_context(context) - assistant_params.expect_stripped_words = False - return LLMContextAggregatorPair( - context, user_params=user_params, assistant_params=assistant_params - ) diff --git a/src/pipecat/transports/base_input.py b/src/pipecat/transports/base_input.py index d0cd5212c..d4a1241f3 100644 --- a/src/pipecat/transports/base_input.py +++ b/src/pipecat/transports/base_input.py @@ -480,7 +480,7 @@ class BaseInputTransport(FrameProcessor): self, audio_frame: InputAudioRawFrame, vad_state: VADState ) -> VADState: """Handle Voice Activity Detection results and generate appropriate frames.""" - if self._params.turn_analyzer or self._deprecated_openaillmcontext: + if self._params.turn_analyzer: return await self._deprecated_old_handle_vad(audio_frame, vad_state) else: return await self._deprecated_new_handle_vad(audio_frame, vad_state) diff --git a/src/pipecat/utils/tracing/service_attributes.py b/src/pipecat/utils/tracing/service_attributes.py index e5cf4a83f..5be781406 100644 --- a/src/pipecat/utils/tracing/service_attributes.py +++ b/src/pipecat/utils/tracing/service_attributes.py @@ -47,7 +47,6 @@ def _get_provider_name_from_service_name(service_name: str) -> str: "AzureLLMService": "az.ai.openai", # Google "GoogleLLMService": "gcp.gemini", - "GoogleLLMOpenAIBetaService": "gcp.gemini", "GoogleVertexLLMService": "gcp.vertex_ai", # Others "GrokLLMService": "xai", diff --git a/src/pipecat/utils/tracing/service_decorators.py b/src/pipecat/utils/tracing/service_decorators.py index cc353e1a3..8f98026e6 100644 --- a/src/pipecat/utils/tracing/service_decorators.py +++ b/src/pipecat/utils/tracing/service_decorators.py @@ -24,7 +24,6 @@ if TYPE_CHECKING: from opentelemetry import trace from pipecat.processors.aggregators.llm_context import NOT_GIVEN, LLMContext -from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.utils.tracing.service_attributes import ( add_gemini_live_span_attributes, add_llm_span_attributes, @@ -459,40 +458,30 @@ def traced_llm(func: Optional[Callable] = None, *, name: Optional[str] = None) - self.push_frame = traced_push_frame # Get messages for logging - # For OpenAILLMContext: use context's own get_messages_for_logging() method - # For LLMContext: use adapter's get_messages_for_logging() which returns + # Use adapter's get_messages_for_logging() which returns # messages in provider's native format with sensitive data sanitized messages = None serialized_messages = None - if isinstance(context, OpenAILLMContext): - # OpenAILLMContext and subclasses have their own method - messages = context.get_messages_for_logging() - elif isinstance(context, LLMContext): - # Universal LLMContext - use adapter for provider-native format - if hasattr(self, "get_llm_adapter"): - adapter = self.get_llm_adapter() - messages = adapter.get_messages_for_logging(context) + # Use adapter for provider-native format + if hasattr(self, "get_llm_adapter"): + adapter = self.get_llm_adapter() + messages = adapter.get_messages_for_logging(context) # Serialize messages if available if messages: serialized_messages = json.dumps(messages) # Get tools - # For OpenAILLMContext: tools may need adapter conversion if set - # For LLMContext: use adapter's from_standard_tools() to convert ToolsSchema + # Use adapter's from_standard_tools() to convert ToolsSchema tools = None serialized_tools = None tool_count = 0 - if isinstance(context, OpenAILLMContext): - # OpenAILLMContext: tools property handles adapter conversion internally - tools = context.tools - elif isinstance(context, LLMContext): - # Universal LLMContext - use adapter to convert ToolsSchema - if hasattr(self, "get_llm_adapter") and hasattr(context, "tools"): - adapter = self.get_llm_adapter() - tools = adapter.from_standard_tools(context.tools) + # Use adapter to convert ToolsSchema + if hasattr(self, "get_llm_adapter") and hasattr(context, "tools"): + adapter = self.get_llm_adapter() + tools = adapter.from_standard_tools(context.tools) # Serialize and count tools if available # Check if tools is not None and not NOT_GIVEN @@ -501,36 +490,27 @@ def traced_llm(func: Optional[Callable] = None, *, name: Optional[str] = None) - tool_count = len(tools) if isinstance(tools, list) else 1 # Handle system message for different services - system_message = None - if isinstance(context, LLMContext): - # settings.system_instruction takes priority (matches service behavior) - if hasattr(self, "_settings") and getattr( - self._settings, "system_instruction", None - ): - system_message = self._settings.system_instruction - else: - # Fall back to extracting from context messages - ctx_messages = context.get_messages() - if ctx_messages: - first = ctx_messages[0] - if ( - isinstance(first, dict) - and first.get("role") == "system" - ): - content = first.get("content") - if isinstance(content, str): - system_message = content - elif isinstance(content, list): - system_message = " ".join( - part.get("text", "") - for part in content - if isinstance(part, dict) - and part.get("type") == "text" - ) - elif hasattr(context, "system"): - system_message = context.system - elif hasattr(context, "system_message"): - system_message = context.system_message + # settings.system_instruction takes priority (matches service behavior) + if hasattr(self, "_settings") and getattr( + self._settings, "system_instruction", None + ): + system_message = self._settings.system_instruction + else: + # Fall back to extracting from context messages + ctx_messages = context.get_messages() + if ctx_messages: + first = ctx_messages[0] + if isinstance(first, dict) and first.get("role") == "system": + content = first.get("content") + if isinstance(content, str): + system_message = content + elif isinstance(content, list): + system_message = " ".join( + part.get("text", "") + for part in content + if isinstance(part, dict) + and part.get("type") == "text" + ) # Use given_fields() defensively in case a service doesn't # initialize all settings. diff --git a/tests/test_context_aggregators.py b/tests/test_context_aggregators.py deleted file mode 100644 index 37d36bfef..000000000 --- a/tests/test_context_aggregators.py +++ /dev/null @@ -1,1060 +0,0 @@ -# -# Copyright (c) 2024-2026, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - -import json -import unittest -from typing import Any, Optional - -from pipecat.audio.interruptions.min_words_interruption_strategy import MinWordsInterruptionStrategy -from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams -from pipecat.audio.vad.vad_analyzer import VADParams -from pipecat.frames.frames import ( - BotStartedSpeakingFrame, - EmulateUserStartedSpeakingFrame, - EmulateUserStoppedSpeakingFrame, - Frame, - FunctionCallInProgressFrame, - FunctionCallResultFrame, - FunctionCallResultProperties, - InterimTranscriptionFrame, - InterruptionFrame, - LLMContextAssistantTimestampFrame, - LLMContextFrame, - LLMFullResponseEndFrame, - LLMFullResponseStartFrame, - OpenAILLMContextAssistantTimestampFrame, - SpeechControlParamsFrame, - TextFrame, - TranscriptionFrame, - UserStartedSpeakingFrame, - UserStoppedSpeakingFrame, -) -from pipecat.pipeline.pipeline import Pipeline -from pipecat.pipeline.task import PipelineParams -from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.llm_response import ( - LLMAssistantAggregatorParams, - LLMUserAggregatorParams, - LLMUserContextAggregator, -) -from pipecat.processors.aggregators.llm_response_universal import LLMAssistantAggregator -from pipecat.processors.aggregators.openai_llm_context import ( - OpenAILLMContext, - OpenAILLMContextFrame, -) -from pipecat.processors.frame_processor import FrameDirection, FrameProcessor -from pipecat.services.anthropic.llm import ( - AnthropicAssistantContextAggregator, - AnthropicLLMContext, - AnthropicUserContextAggregator, -) -from pipecat.services.aws.llm import ( - AWSBedrockAssistantContextAggregator, - AWSBedrockLLMContext, - AWSBedrockUserContextAggregator, -) -from pipecat.services.google.llm import ( - GoogleAssistantContextAggregator, - GoogleLLMContext, - GoogleUserContextAggregator, -) -from pipecat.services.openai.llm import ( - OpenAIAssistantContextAggregator, - OpenAIUserContextAggregator, -) -from pipecat.tests.utils import SleepFrame, run_test - -AGGREGATION_TIMEOUT = 0.1 -AGGREGATION_SLEEP = 0.15 - - -class BaseTestUserContextAggregator: - CONTEXT_CLASS = None # To be set in subclasses - AGGREGATOR_CLASS = None # To be set in subclasses - EXPECTED_CONTEXT_FRAMES = [OpenAILLMContextFrame] - - def check_message_content(self, context: OpenAILLMContext, index: int, content: str): - assert context.messages[index]["content"] == content - - def check_message_multi_content( - self, context: OpenAILLMContext, content_index: int, index: int, content: str - ): - assert context.messages[index]["content"] == content - - async def test_se(self): - assert self.CONTEXT_CLASS is not None, "CONTEXT_CLASS must be set in a subclass" - assert self.AGGREGATOR_CLASS is not None, "AGGREGATOR_CLASS must be set in a subclass" - - context = self.CONTEXT_CLASS() - aggregator = self.AGGREGATOR_CLASS(context) - frames_to_send = [UserStartedSpeakingFrame(), UserStoppedSpeakingFrame()] - expected_down_frames = [UserStartedSpeakingFrame, UserStoppedSpeakingFrame] - await run_test( - aggregator, - frames_to_send=frames_to_send, - expected_down_frames=expected_down_frames, - ) - - async def test_ste(self): - assert self.CONTEXT_CLASS is not None, "CONTEXT_CLASS must be set in a subclass" - assert self.AGGREGATOR_CLASS is not None, "AGGREGATOR_CLASS must be set in a subclass" - - context = self.CONTEXT_CLASS() - aggregator = self.AGGREGATOR_CLASS(context) - frames_to_send = [ - UserStartedSpeakingFrame(), - TranscriptionFrame(text="Hello!", user_id="cat", timestamp=""), - SleepFrame(), - UserStoppedSpeakingFrame(), - ] - expected_down_frames = [ - UserStartedSpeakingFrame, - *self.EXPECTED_CONTEXT_FRAMES, - UserStoppedSpeakingFrame, - ] - await run_test( - aggregator, - frames_to_send=frames_to_send, - expected_down_frames=expected_down_frames, - ) - self.check_message_content(context, 0, "Hello!") - - async def test_site(self): - assert self.CONTEXT_CLASS is not None, "CONTEXT_CLASS must be set in a subclass" - assert self.AGGREGATOR_CLASS is not None, "AGGREGATOR_CLASS must be set in a subclass" - - context = self.CONTEXT_CLASS() - aggregator = self.AGGREGATOR_CLASS(context) - frames_to_send = [ - UserStartedSpeakingFrame(), - InterimTranscriptionFrame(text="Hello", user_id="cat", timestamp=""), - TranscriptionFrame(text="Hello Pipecat!", user_id="cat", timestamp=""), - SleepFrame(), - UserStoppedSpeakingFrame(), - ] - expected_down_frames = [ - UserStartedSpeakingFrame, - *self.EXPECTED_CONTEXT_FRAMES, - UserStoppedSpeakingFrame, - ] - await run_test( - aggregator, - frames_to_send=frames_to_send, - expected_down_frames=expected_down_frames, - ) - self.check_message_content(context, 0, "Hello Pipecat!") - - async def test_st1iest2e(self): - assert self.CONTEXT_CLASS is not None, "CONTEXT_CLASS must be set in a subclass" - assert self.AGGREGATOR_CLASS is not None, "AGGREGATOR_CLASS must be set in a subclass" - - context = self.CONTEXT_CLASS() - aggregator = self.AGGREGATOR_CLASS(context) - frames_to_send = [ - UserStartedSpeakingFrame(), - TranscriptionFrame(text="Hello Pipecat!", user_id="cat", timestamp=""), - InterimTranscriptionFrame(text="How ", user_id="cat", timestamp=""), - SleepFrame(), - UserStoppedSpeakingFrame(), - UserStartedSpeakingFrame(), - TranscriptionFrame(text="How are you?", user_id="cat", timestamp=""), - SleepFrame(), - UserStoppedSpeakingFrame(), - ] - expected_down_frames = [ - UserStartedSpeakingFrame, - UserStoppedSpeakingFrame, - UserStartedSpeakingFrame, - *self.EXPECTED_CONTEXT_FRAMES, - UserStoppedSpeakingFrame, - ] - await run_test( - aggregator, - frames_to_send=frames_to_send, - expected_down_frames=expected_down_frames, - ) - self.check_message_content(context, 0, "Hello Pipecat! How are you?") - - async def test_siet(self): - assert self.CONTEXT_CLASS is not None, "CONTEXT_CLASS must be set in a subclass" - assert self.AGGREGATOR_CLASS is not None, "AGGREGATOR_CLASS must be set in a subclass" - - context = self.CONTEXT_CLASS() - aggregator = self.AGGREGATOR_CLASS( - context, params=LLMUserAggregatorParams(aggregation_timeout=AGGREGATION_TIMEOUT) - ) - frames_to_send = [ - UserStartedSpeakingFrame(), - InterimTranscriptionFrame(text="How ", user_id="cat", timestamp=""), - SleepFrame(), - UserStoppedSpeakingFrame(), - TranscriptionFrame(text="How are you?", user_id="cat", timestamp=""), - SleepFrame(sleep=AGGREGATION_SLEEP), - ] - expected_down_frames = [ - UserStartedSpeakingFrame, - UserStoppedSpeakingFrame, - *self.EXPECTED_CONTEXT_FRAMES, - ] - await run_test( - aggregator, - frames_to_send=frames_to_send, - expected_down_frames=expected_down_frames, - ) - self.check_message_content(context, 0, "How are you?") - - async def test_sieit(self): - assert self.CONTEXT_CLASS is not None, "CONTEXT_CLASS must be set in a subclass" - assert self.AGGREGATOR_CLASS is not None, "AGGREGATOR_CLASS must be set in a subclass" - - context = self.CONTEXT_CLASS() - aggregator = self.AGGREGATOR_CLASS( - context, params=LLMUserAggregatorParams(aggregation_timeout=AGGREGATION_TIMEOUT) - ) - frames_to_send = [ - UserStartedSpeakingFrame(), - InterimTranscriptionFrame(text="How ", user_id="cat", timestamp=""), - SleepFrame(), - UserStoppedSpeakingFrame(), - InterimTranscriptionFrame(text="are you?", user_id="cat", timestamp=""), - TranscriptionFrame(text="How are you?", user_id="cat", timestamp=""), - SleepFrame(sleep=AGGREGATION_SLEEP), - ] - expected_down_frames = [ - UserStartedSpeakingFrame, - UserStoppedSpeakingFrame, - *self.EXPECTED_CONTEXT_FRAMES, - ] - await run_test( - aggregator, - frames_to_send=frames_to_send, - expected_down_frames=expected_down_frames, - ) - self.check_message_content(context, 0, "How are you?") - - async def test_set(self): - assert self.CONTEXT_CLASS is not None, "CONTEXT_CLASS must be set in a subclass" - assert self.AGGREGATOR_CLASS is not None, "AGGREGATOR_CLASS must be set in a subclass" - - context = self.CONTEXT_CLASS() - aggregator = self.AGGREGATOR_CLASS( - context, params=LLMUserAggregatorParams(aggregation_timeout=AGGREGATION_TIMEOUT) - ) - frames_to_send = [ - UserStartedSpeakingFrame(), - UserStoppedSpeakingFrame(), - TranscriptionFrame(text="How are you?", user_id="cat", timestamp=""), - SleepFrame(sleep=AGGREGATION_SLEEP), - ] - expected_down_frames = [ - UserStartedSpeakingFrame, - UserStoppedSpeakingFrame, - *self.EXPECTED_CONTEXT_FRAMES, - ] - await run_test( - aggregator, - frames_to_send=frames_to_send, - expected_down_frames=expected_down_frames, - ) - self.check_message_content(context, 0, "How are you?") - - async def test_seit(self): - assert self.CONTEXT_CLASS is not None, "CONTEXT_CLASS must be set in a subclass" - assert self.AGGREGATOR_CLASS is not None, "AGGREGATOR_CLASS must be set in a subclass" - - context = self.CONTEXT_CLASS() - aggregator = self.AGGREGATOR_CLASS( - context, params=LLMUserAggregatorParams(aggregation_timeout=AGGREGATION_TIMEOUT) - ) - frames_to_send = [ - UserStartedSpeakingFrame(), - UserStoppedSpeakingFrame(), - InterimTranscriptionFrame(text="How ", user_id="cat", timestamp=""), - TranscriptionFrame(text="How are you?", user_id="cat", timestamp=""), - SleepFrame(sleep=AGGREGATION_SLEEP), - ] - expected_down_frames = [ - UserStartedSpeakingFrame, - UserStoppedSpeakingFrame, - *self.EXPECTED_CONTEXT_FRAMES, - ] - await run_test( - aggregator, - frames_to_send=frames_to_send, - expected_down_frames=expected_down_frames, - ) - self.check_message_content(context, 0, "How are you?") - - async def test_st1et2(self): - assert self.CONTEXT_CLASS is not None, "CONTEXT_CLASS must be set in a subclass" - assert self.AGGREGATOR_CLASS is not None, "AGGREGATOR_CLASS must be set in a subclass" - - context = self.CONTEXT_CLASS() - aggregator = self.AGGREGATOR_CLASS( - context, params=LLMUserAggregatorParams(aggregation_timeout=AGGREGATION_TIMEOUT) - ) - frames_to_send = [ - SpeechControlParamsFrame(vad_params=VADParams(stop_secs=AGGREGATION_TIMEOUT)), - UserStartedSpeakingFrame(), - TranscriptionFrame(text="Hello Pipecat!", user_id="cat", timestamp=""), - SleepFrame(), - UserStoppedSpeakingFrame(), - TranscriptionFrame(text="How are you?", user_id="cat", timestamp=""), - SleepFrame(sleep=AGGREGATION_SLEEP), - ] - expected_down_frames = [ - SpeechControlParamsFrame, - UserStartedSpeakingFrame, - *self.EXPECTED_CONTEXT_FRAMES, - UserStoppedSpeakingFrame, - *self.EXPECTED_CONTEXT_FRAMES, - ] - await run_test( - aggregator, - frames_to_send=frames_to_send, - expected_down_frames=expected_down_frames, - ) - self.check_message_multi_content(context, 0, 0, "Hello Pipecat!") - self.check_message_multi_content(context, 0, 1, "How are you?") - - async def test_set1t2(self): - assert self.CONTEXT_CLASS is not None, "CONTEXT_CLASS must be set in a subclass" - assert self.AGGREGATOR_CLASS is not None, "AGGREGATOR_CLASS must be set in a subclass" - - context = self.CONTEXT_CLASS() - aggregator = self.AGGREGATOR_CLASS( - context, params=LLMUserAggregatorParams(aggregation_timeout=AGGREGATION_TIMEOUT) - ) - frames_to_send = [ - UserStartedSpeakingFrame(), - UserStoppedSpeakingFrame(), - TranscriptionFrame(text="Hello Pipecat!", user_id="cat", timestamp=""), - TranscriptionFrame(text="How are you?", user_id="cat", timestamp=""), - SleepFrame(sleep=AGGREGATION_SLEEP), - ] - expected_down_frames = [ - UserStartedSpeakingFrame, - UserStoppedSpeakingFrame, - *self.EXPECTED_CONTEXT_FRAMES, - ] - await run_test( - aggregator, - frames_to_send=frames_to_send, - expected_down_frames=expected_down_frames, - ) - self.check_message_content(context, 0, "Hello Pipecat! How are you?") - - async def test_siet1it2(self): - assert self.CONTEXT_CLASS is not None, "CONTEXT_CLASS must be set in a subclass" - assert self.AGGREGATOR_CLASS is not None, "AGGREGATOR_CLASS must be set in a subclass" - - context = self.CONTEXT_CLASS() - aggregator = self.AGGREGATOR_CLASS( - context, params=LLMUserAggregatorParams(aggregation_timeout=AGGREGATION_TIMEOUT) - ) - frames_to_send = [ - UserStartedSpeakingFrame(), - InterimTranscriptionFrame(text="Hello ", user_id="cat", timestamp=""), - SleepFrame(), - UserStoppedSpeakingFrame(), - TranscriptionFrame(text="Hello Pipecat!", user_id="cat", timestamp=""), - InterimTranscriptionFrame(text="How ", user_id="cat", timestamp=""), - TranscriptionFrame(text="How are you?", user_id="cat", timestamp=""), - SleepFrame(sleep=AGGREGATION_SLEEP), - ] - expected_down_frames = [ - UserStartedSpeakingFrame, - UserStoppedSpeakingFrame, - *self.EXPECTED_CONTEXT_FRAMES, - ] - await run_test( - aggregator, - frames_to_send=frames_to_send, - expected_down_frames=expected_down_frames, - ) - self.check_message_content(context, 0, "Hello Pipecat! How are you?") - - async def test_t(self): - assert self.CONTEXT_CLASS is not None, "CONTEXT_CLASS must be set in a subclass" - assert self.AGGREGATOR_CLASS is not None, "AGGREGATOR_CLASS must be set in a subclass" - - context = self.CONTEXT_CLASS() - aggregator = self.AGGREGATOR_CLASS( - context - ) # No aggregation timeout; this tests VAD emulation - - frames_to_send = [ - SpeechControlParamsFrame(vad_params=VADParams(stop_secs=AGGREGATION_TIMEOUT)), - TranscriptionFrame(text="Hello!", user_id="cat", timestamp=""), - SleepFrame(sleep=AGGREGATION_SLEEP), - ] - expected_down_frames = [ - SpeechControlParamsFrame, - *self.EXPECTED_CONTEXT_FRAMES, - ] - expected_up_frames = [EmulateUserStartedSpeakingFrame, EmulateUserStoppedSpeakingFrame] - - await run_test( - aggregator, - frames_to_send=frames_to_send, - expected_down_frames=expected_down_frames, - expected_up_frames=expected_up_frames, - ) - self.check_message_content(context, 0, "Hello!") - - async def test_t_with_turn_analyzer(self): - assert self.CONTEXT_CLASS is not None, "CONTEXT_CLASS must be set in a subclass" - assert self.AGGREGATOR_CLASS is not None, "AGGREGATOR_CLASS must be set in a subclass" - - context = self.CONTEXT_CLASS() - aggregator = self.AGGREGATOR_CLASS( - context, params=LLMUserAggregatorParams(turn_emulated_vad_timeout=AGGREGATION_TIMEOUT) - ) - - frames_to_send = [ - SpeechControlParamsFrame( - vad_params=VADParams(stop_secs=0.2), - turn_params=SmartTurnParams(stop_secs=3.0), # Turn analyzer present - ), - TranscriptionFrame(text="Hello!", user_id="cat", timestamp=""), - SleepFrame(sleep=AGGREGATION_SLEEP), - ] - expected_down_frames = [ - SpeechControlParamsFrame, - *self.EXPECTED_CONTEXT_FRAMES, - ] - expected_up_frames = [EmulateUserStartedSpeakingFrame, EmulateUserStoppedSpeakingFrame] - - await run_test( - aggregator, - frames_to_send=frames_to_send, - expected_down_frames=expected_down_frames, - expected_up_frames=expected_up_frames, - ) - self.check_message_content(context, 0, "Hello!") - - async def test_it(self): - assert self.CONTEXT_CLASS is not None, "CONTEXT_CLASS must be set in a subclass" - assert self.AGGREGATOR_CLASS is not None, "AGGREGATOR_CLASS must be set in a subclass" - - context = self.CONTEXT_CLASS() - aggregator = self.AGGREGATOR_CLASS( - context - ) # No aggregation timeout; this tests VAD emulation - frames_to_send = [ - SpeechControlParamsFrame(vad_params=VADParams(stop_secs=AGGREGATION_TIMEOUT)), - InterimTranscriptionFrame(text="Hello ", user_id="cat", timestamp=""), - SleepFrame(), - TranscriptionFrame(text="Hello Pipecat!", user_id="cat", timestamp=""), - SleepFrame(sleep=AGGREGATION_SLEEP), - ] - expected_down_frames = [SpeechControlParamsFrame, *self.EXPECTED_CONTEXT_FRAMES] - expected_up_frames = [EmulateUserStartedSpeakingFrame, EmulateUserStoppedSpeakingFrame] - await run_test( - aggregator, - frames_to_send=frames_to_send, - expected_down_frames=expected_down_frames, - expected_up_frames=expected_up_frames, - ) - self.check_message_content(context, 0, "Hello Pipecat!") - - async def test_sie_delay_it(self): - assert self.CONTEXT_CLASS is not None, "CONTEXT_CLASS must be set in a subclass" - assert self.AGGREGATOR_CLASS is not None, "AGGREGATOR_CLASS must be set in a subclass" - - context = self.CONTEXT_CLASS() - aggregator = self.AGGREGATOR_CLASS( - context, params=LLMUserAggregatorParams(aggregation_timeout=AGGREGATION_TIMEOUT) - ) - frames_to_send = [ - UserStartedSpeakingFrame(), - InterimTranscriptionFrame(text="How ", user_id="cat", timestamp=""), - SleepFrame(), - UserStoppedSpeakingFrame(), - SleepFrame(AGGREGATION_SLEEP), - InterimTranscriptionFrame(text="are you?", user_id="cat", timestamp=""), - TranscriptionFrame(text="How are you?", user_id="cat", timestamp=""), - SleepFrame(sleep=AGGREGATION_SLEEP), - ] - expected_down_frames = [ - UserStartedSpeakingFrame, - UserStoppedSpeakingFrame, - *self.EXPECTED_CONTEXT_FRAMES, - ] - await run_test( - aggregator, - frames_to_send=frames_to_send, - expected_down_frames=expected_down_frames, - ) - self.check_message_content(context, 0, "How are you?") - - async def test_min_words_interruption_strategy_one_word(self): - assert self.CONTEXT_CLASS is not None, "CONTEXT_CLASS must be set in a subclass" - assert self.AGGREGATOR_CLASS is not None, "AGGREGATOR_CLASS must be set in a subclass" - - class ContextProcessor(FrameProcessor): - def __init__(self): - super().__init__() - self.context_received = False - - async def process_frame(self, frame: Frame, direction: FrameDirection): - await super().process_frame(frame, direction) - - if isinstance(frame, OpenAILLMContextFrame): - self.context_received = True - - await self.push_frame(frame, direction) - - context = self.CONTEXT_CLASS() - aggregator = self.AGGREGATOR_CLASS(context) - context_processor = ContextProcessor() - pipeline = Pipeline([aggregator, context_processor]) - - frames_to_send = [ - BotStartedSpeakingFrame(), - UserStartedSpeakingFrame(), - TranscriptionFrame(text="Can", user_id="cat", timestamp=""), - SleepFrame(), - UserStoppedSpeakingFrame(), - ] - expected_down_frames = [ - BotStartedSpeakingFrame, - UserStartedSpeakingFrame, - UserStoppedSpeakingFrame, - ] - await run_test( - pipeline, - frames_to_send=frames_to_send, - expected_down_frames=expected_down_frames, - pipeline_params=PipelineParams( - interruption_strategies=[MinWordsInterruptionStrategy(min_words=2)] - ), - ) - assert not context_processor.context_received - - async def test_min_words_interruption_strategy_two_words(self): - assert self.CONTEXT_CLASS is not None, "CONTEXT_CLASS must be set in a subclass" - assert self.AGGREGATOR_CLASS is not None, "AGGREGATOR_CLASS must be set in a subclass" - - class ContextProcessor(FrameProcessor): - def __init__(self): - super().__init__() - self.context_received = False - - async def process_frame(self, frame: Frame, direction: FrameDirection): - await super().process_frame(frame, direction) - - if isinstance(frame, OpenAILLMContextFrame): - self.context_received = True - elif isinstance(frame, InterruptionFrame): - self.context_received = False - - await self.push_frame(frame, direction) - - context = self.CONTEXT_CLASS() - aggregator = self.AGGREGATOR_CLASS(context) - context_processor = ContextProcessor() - pipeline = Pipeline([aggregator, context_processor]) - - frames_to_send = [ - BotStartedSpeakingFrame(), - UserStartedSpeakingFrame(), - TranscriptionFrame(text="Can you", user_id="cat", timestamp=""), - SleepFrame(), - UserStoppedSpeakingFrame(), - ] - expected_up_frames = [InterruptionFrame] - expected_down_frames = [ - BotStartedSpeakingFrame, - UserStartedSpeakingFrame, - InterruptionFrame, - UserStoppedSpeakingFrame, - *self.EXPECTED_CONTEXT_FRAMES, - ] - await run_test( - pipeline, - frames_to_send=frames_to_send, - expected_up_frames=expected_up_frames, - expected_down_frames=expected_down_frames, - pipeline_params=PipelineParams( - interruption_strategies=[MinWordsInterruptionStrategy(min_words=2)] - ), - ) - self.check_message_content(context, 0, "Can you") - # If the context is not received or it has been cleared by the - # interruption then we have an issue. - assert context_processor.context_received - - -class BaseTestAssistantContextAggregator: - CONTEXT_CLASS = None # To be set in subclasses - AGGREGATOR_CLASS = None # To be set in subclasses - EXPECTED_CONTEXT_FRAMES = None # To be set in subclasses - - def create_assistant_aggregator_params( - self, **kwargs - ) -> Optional[LLMAssistantAggregatorParams]: - return LLMAssistantAggregatorParams(**kwargs) - - def check_message_content(self, context: OpenAILLMContext, index: int, content: str): - assert context.messages[index]["content"] == content - - def check_message_multi_content( - self, context: OpenAILLMContext, content_index: int, index: int, content: str - ): - assert context.messages[index]["content"] == content - - def check_function_call_result(self, context: OpenAILLMContext, index: int, content: str): - assert json.loads(context.messages[index]["content"]) == content - - async def test_empty(self): - assert self.CONTEXT_CLASS is not None, "CONTEXT_CLASS must be set in a subclass" - assert self.AGGREGATOR_CLASS is not None, "AGGREGATOR_CLASS must be set in a subclass" - - context = self.CONTEXT_CLASS() - aggregator = self.AGGREGATOR_CLASS(context) - frames_to_send = [LLMFullResponseStartFrame(), LLMFullResponseEndFrame()] - expected_down_frames = [] - await run_test( - aggregator, - frames_to_send=frames_to_send, - expected_down_frames=expected_down_frames, - ) - - async def test_single_text(self): - assert self.CONTEXT_CLASS is not None, "CONTEXT_CLASS must be set in a subclass" - assert self.AGGREGATOR_CLASS is not None, "AGGREGATOR_CLASS must be set in a subclass" - - context = self.CONTEXT_CLASS() - aggregator = self.AGGREGATOR_CLASS(context) - frames_to_send = [ - LLMFullResponseStartFrame(), - TextFrame(text="Hello Pipecat!"), - LLMFullResponseEndFrame(), - ] - expected_down_frames = [*self.EXPECTED_CONTEXT_FRAMES] - await run_test( - aggregator, - frames_to_send=frames_to_send, - expected_down_frames=expected_down_frames, - ) - self.check_message_content(context, 0, "Hello Pipecat!") - - async def test_multiple_text(self): - assert self.CONTEXT_CLASS is not None, "CONTEXT_CLASS must be set in a subclass" - assert self.AGGREGATOR_CLASS is not None, "AGGREGATOR_CLASS must be set in a subclass" - - context = self.CONTEXT_CLASS() - aggregator = self.AGGREGATOR_CLASS( - context, params=self.create_assistant_aggregator_params(expect_stripped_words=False) - ) - - # The newer LLMAssistantAggregator expects TextFrames to declare - # when they include inter-frame spaces. - def make_text_frame(text: str) -> TextFrame: - frame = TextFrame(text=text) - frame.includes_inter_frame_spaces = True - return frame - - frames_to_send = [ - LLMFullResponseStartFrame(), - make_text_frame("Hello "), - make_text_frame("Pipecat. "), - make_text_frame("How are "), - make_text_frame("you?"), - LLMFullResponseEndFrame(), - ] - expected_down_frames = [*self.EXPECTED_CONTEXT_FRAMES] - await run_test( - aggregator, - frames_to_send=frames_to_send, - expected_down_frames=expected_down_frames, - ) - self.check_message_content(context, 0, "Hello Pipecat. How are you?") - - async def test_multiple_text_stripped(self): - assert self.CONTEXT_CLASS is not None, "CONTEXT_CLASS must be set in a subclass" - assert self.AGGREGATOR_CLASS is not None, "AGGREGATOR_CLASS must be set in a subclass" - - context = self.CONTEXT_CLASS() - aggregator = self.AGGREGATOR_CLASS(context) - frames_to_send = [ - LLMFullResponseStartFrame(), - TextFrame(text="Hello"), - TextFrame(text="Pipecat."), - TextFrame(text="How are"), - TextFrame(text="you?"), - LLMFullResponseEndFrame(), - ] - expected_down_frames = [*self.EXPECTED_CONTEXT_FRAMES] - await run_test( - aggregator, - frames_to_send=frames_to_send, - expected_down_frames=expected_down_frames, - ) - self.check_message_content(context, 0, "Hello Pipecat. How are you?") - - async def test_multiple_llm_responses(self): - assert self.CONTEXT_CLASS is not None, "CONTEXT_CLASS must be set in a subclass" - assert self.AGGREGATOR_CLASS is not None, "AGGREGATOR_CLASS must be set in a subclass" - - context = self.CONTEXT_CLASS() - aggregator = self.AGGREGATOR_CLASS( - context, params=self.create_assistant_aggregator_params(expect_stripped_words=False) - ) - - # The newer LLMAssistantAggregator expects TextFrames to declare - # when they include inter-frame spaces. - def make_text_frame(text: str) -> TextFrame: - frame = TextFrame(text=text) - frame.includes_inter_frame_spaces = True - return frame - - frames_to_send = [ - LLMFullResponseStartFrame(), - make_text_frame("Hello "), - make_text_frame("Pipecat."), - LLMFullResponseEndFrame(), - LLMFullResponseStartFrame(), - make_text_frame(text="How are "), - make_text_frame(text="you?"), - LLMFullResponseEndFrame(), - ] - expected_down_frames = [*self.EXPECTED_CONTEXT_FRAMES, *self.EXPECTED_CONTEXT_FRAMES] - await run_test( - aggregator, - frames_to_send=frames_to_send, - expected_down_frames=expected_down_frames, - ) - self.check_message_multi_content(context, 0, 0, "Hello Pipecat.") - self.check_message_multi_content(context, 0, 1, "How are you?") - - async def test_multiple_llm_responses_interruption(self): - assert self.CONTEXT_CLASS is not None, "CONTEXT_CLASS must be set in a subclass" - assert self.AGGREGATOR_CLASS is not None, "AGGREGATOR_CLASS must be set in a subclass" - - context = self.CONTEXT_CLASS() - aggregator = self.AGGREGATOR_CLASS( - context, params=self.create_assistant_aggregator_params(expect_stripped_words=False) - ) - - # The newer LLMAssistantAggregator expects TextFrames to declare - # when they include inter-frame spaces. - def make_text_frame(text: str) -> TextFrame: - frame = TextFrame(text=text) - frame.includes_inter_frame_spaces = True - return frame - - frames_to_send = [ - LLMFullResponseStartFrame(), - make_text_frame("Hello "), - make_text_frame("Pipecat."), - LLMFullResponseEndFrame(), - SleepFrame(AGGREGATION_SLEEP), - InterruptionFrame(), - LLMFullResponseStartFrame(), - make_text_frame("How are "), - make_text_frame("you?"), - LLMFullResponseEndFrame(), - ] - expected_down_frames = [ - *self.EXPECTED_CONTEXT_FRAMES, - InterruptionFrame, - *self.EXPECTED_CONTEXT_FRAMES, - ] - await run_test( - aggregator, - frames_to_send=frames_to_send, - expected_down_frames=expected_down_frames, - ) - self.check_message_multi_content(context, 0, 0, "Hello Pipecat.") - self.check_message_multi_content(context, 0, 1, "How are you?") - - async def test_function_call(self): - assert self.CONTEXT_CLASS is not None, "CONTEXT_CLASS must be set in a subclass" - assert self.AGGREGATOR_CLASS is not None, "AGGREGATOR_CLASS must be set in a subclass" - - context = self.CONTEXT_CLASS() - aggregator = self.AGGREGATOR_CLASS(context) - frames_to_send = [ - FunctionCallInProgressFrame( - function_name="get_weather", - tool_call_id="1", - arguments={"location": "Los Angeles"}, - cancel_on_interruption=False, - ), - SleepFrame(), - FunctionCallResultFrame( - function_name="get_weather", - tool_call_id="1", - arguments={"location": "Los Angeles"}, - result={"conditions": "Sunny"}, - ), - ] - expected_down_frames = [] - await run_test( - aggregator, - frames_to_send=frames_to_send, - expected_down_frames=expected_down_frames, - ) - self.check_function_call_result(context, -1, {"conditions": "Sunny"}) - - async def test_function_call_on_context_updated(self): - assert self.CONTEXT_CLASS is not None, "CONTEXT_CLASS must be set in a subclass" - assert self.AGGREGATOR_CLASS is not None, "AGGREGATOR_CLASS must be set in a subclass" - - context_updated = False - - async def on_context_updated(): - nonlocal context_updated - context_updated = True - - context = self.CONTEXT_CLASS() - aggregator = self.AGGREGATOR_CLASS(context) - frames_to_send = [ - FunctionCallInProgressFrame( - function_name="get_weather", - tool_call_id="1", - arguments={"location": "Los Angeles"}, - cancel_on_interruption=False, - ), - SleepFrame(), - FunctionCallResultFrame( - function_name="get_weather", - tool_call_id="1", - arguments={"location": "Los Angeles"}, - result={"conditions": "Sunny"}, - properties=FunctionCallResultProperties(on_context_updated=on_context_updated), - ), - SleepFrame(), - ] - expected_down_frames = [] - await run_test( - aggregator, - frames_to_send=frames_to_send, - expected_down_frames=expected_down_frames, - ) - self.check_function_call_result(context, -1, {"conditions": "Sunny"}) - assert context_updated - - -# -# LLMUserContextAggregator -# - - -class TestLLMUserContextAggregator(BaseTestUserContextAggregator, unittest.IsolatedAsyncioTestCase): - CONTEXT_CLASS = OpenAILLMContext - AGGREGATOR_CLASS = LLMUserContextAggregator - - -# -# Anthropic -# - - -class TestAnthropicUserContextAggregator( - BaseTestUserContextAggregator, unittest.IsolatedAsyncioTestCase -): - CONTEXT_CLASS = AnthropicLLMContext - AGGREGATOR_CLASS = AnthropicUserContextAggregator - - def check_message_multi_content( - self, context: OpenAILLMContext, content_index: int, index: int, content: str - ): - messages = context.messages[content_index] - assert messages["content"][index]["text"] == content - - -class TestAnthropicAssistantContextAggregator( - BaseTestAssistantContextAggregator, unittest.IsolatedAsyncioTestCase -): - CONTEXT_CLASS = AnthropicLLMContext - AGGREGATOR_CLASS = AnthropicAssistantContextAggregator - EXPECTED_CONTEXT_FRAMES = [OpenAILLMContextFrame, OpenAILLMContextAssistantTimestampFrame] - - def check_message_multi_content( - self, context: OpenAILLMContext, content_index: int, index: int, content: str - ): - messages = context.messages[content_index] - assert messages["content"][index]["text"] == content - - def check_function_call_result(self, context: OpenAILLMContext, index: int, content: Any): - assert context.messages[index]["content"][0]["content"] == json.dumps(content) - - -# -# AWS (Bedrock) -# - - -class TestAWSBedrockUserContextAggregator( - BaseTestUserContextAggregator, unittest.IsolatedAsyncioTestCase -): - CONTEXT_CLASS = AWSBedrockLLMContext - AGGREGATOR_CLASS = AWSBedrockUserContextAggregator - - def check_message_multi_content( - self, context: OpenAILLMContext, content_index: int, index: int, content: str - ): - messages = context.messages[content_index] - assert messages["content"][index]["text"] == content - - -class TestAWSBedrockAssistantContextAggregator( - BaseTestAssistantContextAggregator, unittest.IsolatedAsyncioTestCase -): - CONTEXT_CLASS = AWSBedrockLLMContext - AGGREGATOR_CLASS = AWSBedrockAssistantContextAggregator - EXPECTED_CONTEXT_FRAMES = [OpenAILLMContextFrame, OpenAILLMContextAssistantTimestampFrame] - - def check_message_multi_content( - self, context: OpenAILLMContext, content_index: int, index: int, content: str - ): - messages = context.messages[content_index] - assert messages["content"][index]["text"] == content - - def check_function_call_result(self, context: OpenAILLMContext, index: int, content: Any): - assert context.messages[index]["content"][0]["toolResult"]["content"][0][ - "text" - ] == json.dumps(content) - - -# -# Google -# - - -class TestGoogleUserContextAggregator( - BaseTestUserContextAggregator, unittest.IsolatedAsyncioTestCase -): - CONTEXT_CLASS = GoogleLLMContext - AGGREGATOR_CLASS = GoogleUserContextAggregator - - def check_message_content(self, context: OpenAILLMContext, index: int, content: str): - obj = context.messages[index].to_json_dict() - assert obj["parts"][0]["text"] == content - - def check_message_multi_content( - self, context: OpenAILLMContext, content_index: int, index: int, content: str - ): - obj = context.messages[index].to_json_dict() - assert obj["parts"][0]["text"] == content - - -class TestGoogleAssistantContextAggregator( - BaseTestAssistantContextAggregator, unittest.IsolatedAsyncioTestCase -): - CONTEXT_CLASS = GoogleLLMContext - AGGREGATOR_CLASS = GoogleAssistantContextAggregator - EXPECTED_CONTEXT_FRAMES = [OpenAILLMContextFrame, OpenAILLMContextAssistantTimestampFrame] - - def check_message_content(self, context: OpenAILLMContext, index: int, content: str): - obj = context.messages[index].to_json_dict() - assert obj["parts"][0]["text"] == content - - def check_message_multi_content( - self, context: OpenAILLMContext, content_index: int, index: int, content: str - ): - obj = context.messages[index].to_json_dict() - assert obj["parts"][0]["text"] == content - - def check_function_call_result(self, context: OpenAILLMContext, index: int, content: Any): - obj = context.messages[index].to_json_dict() - assert obj["parts"][0]["function_response"]["response"]["value"] == json.dumps(content) - - -# -# OpenAI -# - - -class TestOpenAIUserContextAggregator( - BaseTestUserContextAggregator, unittest.IsolatedAsyncioTestCase -): - CONTEXT_CLASS = OpenAILLMContext - AGGREGATOR_CLASS = OpenAIUserContextAggregator - - -class TestOpenAIAssistantContextAggregator( - BaseTestAssistantContextAggregator, unittest.IsolatedAsyncioTestCase -): - CONTEXT_CLASS = OpenAILLMContext - AGGREGATOR_CLASS = OpenAIAssistantContextAggregator - EXPECTED_CONTEXT_FRAMES = [OpenAILLMContextFrame, OpenAILLMContextAssistantTimestampFrame] - - -# -# Universal -# -class TestLLMAssistantAggregator( - BaseTestAssistantContextAggregator, unittest.IsolatedAsyncioTestCase -): - CONTEXT_CLASS = LLMContext - AGGREGATOR_CLASS = LLMAssistantAggregator - EXPECTED_CONTEXT_FRAMES = [LLMContextFrame, LLMContextAssistantTimestampFrame] - - # Override to remove 'expect_stripped_words' parameter, which is deprecated - # for LLMAssistantAggregator - def create_assistant_aggregator_params( - self, **kwargs - ) -> Optional[LLMAssistantAggregatorParams]: - kwargs.pop("expect_stripped_words", None) - return LLMAssistantAggregatorParams(**kwargs) if kwargs else None - - async def test_multiple_text_mixed(self): - assert self.CONTEXT_CLASS is not None, "CONTEXT_CLASS must be set in a subclass" - assert self.AGGREGATOR_CLASS is not None, "AGGREGATOR_CLASS must be set in a subclass" - - context = self.CONTEXT_CLASS() - aggregator = self.AGGREGATOR_CLASS( - context, params=self.create_assistant_aggregator_params(expect_stripped_words=False) - ) - - # The newer LLMAssistantAggregator expects TextFrames to declare - # when they include inter-frame spaces. - def make_text_frame(text: str, includes_spaces: bool) -> TextFrame: - frame = TextFrame(text=text) - frame.includes_inter_frame_spaces = includes_spaces - return frame - - frames_to_send = [ - LLMFullResponseStartFrame(), - make_text_frame("Hello ", includes_spaces=True), - make_text_frame("Pipecat. ", includes_spaces=True), - make_text_frame("Here's some", includes_spaces=True), - make_text_frame( - " code:", includes_spaces=True - ), # Validates ending includes_inter_frame_spaces run with no space - make_text_frame("```python\nprint('Hello, World!')\n```", includes_spaces=False), - make_text_frame( - "```javascript\nconsole.log('Hello, World!');\n```", includes_spaces=False - ), - make_text_frame( - " And some more: ", includes_spaces=True - ), # Validates starting includes_inter_frame_spaces run with a space and ending it with no space - make_text_frame("```html\n
Hello, World!
\n```", includes_spaces=False), - make_text_frame( - "Hope that ", includes_spaces=True - ), # Validates starting includes_inter_frame_spaces run with no space - make_text_frame("helps!", includes_spaces=True), - LLMFullResponseEndFrame(), - ] - expected_down_frames = [*self.EXPECTED_CONTEXT_FRAMES] - await run_test( - aggregator, - frames_to_send=frames_to_send, - expected_down_frames=expected_down_frames, - ) - self.check_message_content( - context, - 0, - "Hello Pipecat. Here's some code: ```python\nprint('Hello, World!')\n``` ```javascript\nconsole.log('Hello, World!');\n``` And some more: ```html\n
Hello, World!
\n``` Hope that helps!", - ) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/test_context_aggregators_universal.py b/tests/test_context_aggregators_universal.py index 7d6f73f2d..59aab4745 100644 --- a/tests/test_context_aggregators_universal.py +++ b/tests/test_context_aggregators_universal.py @@ -4,13 +4,16 @@ # SPDX-License-Identifier: BSD 2-Clause License # +import json import unittest from pipecat.frames.frames import ( BotStartedSpeakingFrame, BotStoppedSpeakingFrame, FunctionCallFromLLM, + FunctionCallInProgressFrame, FunctionCallResultFrame, + FunctionCallResultProperties, FunctionCallsStartedFrame, InterimTranscriptionFrame, InterruptionFrame, @@ -26,6 +29,7 @@ from pipecat.frames.frames import ( LLMThoughtStartFrame, LLMThoughtTextFrame, StartFrame, + TextFrame, TranscriptionFrame, TranslationFrame, UserMuteStartedFrame, @@ -588,6 +592,165 @@ class TestLLMAssistantAggregator(unittest.IsolatedAsyncioTestCase): self.assertTrue(should_stop) self.assertEqual(stop_message.content, "Hello from Pipecat!") + async def test_multiple_text_with_spaces(self): + context = LLMContext() + aggregator = LLMAssistantAggregator(context) + + def make_text_frame(text: str) -> TextFrame: + frame = TextFrame(text=text) + frame.includes_inter_frame_spaces = True + return frame + + frames_to_send = [ + LLMFullResponseStartFrame(), + make_text_frame("Hello "), + make_text_frame("Pipecat. "), + make_text_frame("How are "), + make_text_frame("you?"), + LLMFullResponseEndFrame(), + ] + expected_down_frames = [LLMContextFrame, LLMContextAssistantTimestampFrame] + await run_test( + aggregator, + frames_to_send=frames_to_send, + expected_down_frames=expected_down_frames, + ) + assert context.messages[0]["content"] == "Hello Pipecat. How are you?" + + async def test_multiple_text_stripped(self): + context = LLMContext() + aggregator = LLMAssistantAggregator(context) + frames_to_send = [ + LLMFullResponseStartFrame(), + TextFrame(text="Hello"), + TextFrame(text="Pipecat."), + TextFrame(text="How are"), + TextFrame(text="you?"), + LLMFullResponseEndFrame(), + ] + expected_down_frames = [LLMContextFrame, LLMContextAssistantTimestampFrame] + await run_test( + aggregator, + frames_to_send=frames_to_send, + expected_down_frames=expected_down_frames, + ) + assert context.messages[0]["content"] == "Hello Pipecat. How are you?" + + async def test_multiple_text_mixed_spaces(self): + context = LLMContext() + aggregator = LLMAssistantAggregator(context) + + def make_text_frame(text: str, includes_spaces: bool) -> TextFrame: + frame = TextFrame(text=text) + frame.includes_inter_frame_spaces = includes_spaces + return frame + + frames_to_send = [ + LLMFullResponseStartFrame(), + make_text_frame("Hello ", includes_spaces=True), + make_text_frame("Pipecat. ", includes_spaces=True), + make_text_frame("Here's some", includes_spaces=True), + make_text_frame( + " code:", includes_spaces=True + ), # Validates ending includes_inter_frame_spaces run with no space + make_text_frame("```python\nprint('Hello, World!')\n```", includes_spaces=False), + make_text_frame( + "```javascript\nconsole.log('Hello, World!');\n```", includes_spaces=False + ), + make_text_frame( + " And some more: ", includes_spaces=True + ), # Validates starting includes_inter_frame_spaces run with a space and ending it with no space + make_text_frame("```html\n
Hello, World!
\n```", includes_spaces=False), + make_text_frame( + "Hope that ", includes_spaces=True + ), # Validates starting includes_inter_frame_spaces run with no space + make_text_frame("helps!", includes_spaces=True), + LLMFullResponseEndFrame(), + ] + expected_down_frames = [LLMContextFrame, LLMContextAssistantTimestampFrame] + await run_test( + aggregator, + frames_to_send=frames_to_send, + expected_down_frames=expected_down_frames, + ) + assert context.messages[0]["content"] == ( + "Hello Pipecat. Here's some code: " + "```python\nprint('Hello, World!')\n``` " + "```javascript\nconsole.log('Hello, World!');\n``` " + "And some more: " + "```html\n
Hello, World!
\n``` " + "Hope that helps!" + ) + + async def test_multiple_responses(self): + context = LLMContext() + aggregator = LLMAssistantAggregator(context) + + def make_text_frame(text: str) -> TextFrame: + frame = TextFrame(text=text) + frame.includes_inter_frame_spaces = True + return frame + + frames_to_send = [ + LLMFullResponseStartFrame(), + make_text_frame("Hello "), + make_text_frame("Pipecat."), + LLMFullResponseEndFrame(), + LLMFullResponseStartFrame(), + make_text_frame(text="How are "), + make_text_frame(text="you?"), + LLMFullResponseEndFrame(), + ] + expected_down_frames = [ + LLMContextFrame, + LLMContextAssistantTimestampFrame, + LLMContextFrame, + LLMContextAssistantTimestampFrame, + ] + await run_test( + aggregator, + frames_to_send=frames_to_send, + expected_down_frames=expected_down_frames, + ) + assert context.messages[0]["content"] == "Hello Pipecat." + assert context.messages[1]["content"] == "How are you?" + + async def test_multiple_responses_interruption(self): + context = LLMContext() + aggregator = LLMAssistantAggregator(context) + + def make_text_frame(text: str) -> TextFrame: + frame = TextFrame(text=text) + frame.includes_inter_frame_spaces = True + return frame + + frames_to_send = [ + LLMFullResponseStartFrame(), + make_text_frame("Hello "), + make_text_frame("Pipecat."), + LLMFullResponseEndFrame(), + SleepFrame(0.15), + InterruptionFrame(), + LLMFullResponseStartFrame(), + make_text_frame("How are "), + make_text_frame("you?"), + LLMFullResponseEndFrame(), + ] + expected_down_frames = [ + LLMContextFrame, + LLMContextAssistantTimestampFrame, + InterruptionFrame, + LLMContextFrame, + LLMContextAssistantTimestampFrame, + ] + await run_test( + aggregator, + frames_to_send=frames_to_send, + expected_down_frames=expected_down_frames, + ) + assert context.messages[0]["content"] == "Hello Pipecat." + assert context.messages[1]["content"] == "How are you?" + async def test_interruption(self): context = LLMContext() @@ -635,6 +798,67 @@ class TestLLMAssistantAggregator(unittest.IsolatedAsyncioTestCase): self.assertEqual(stop_messages[0].content, "Hello") self.assertEqual(stop_messages[1].content, "Hello there!") + async def test_function_call(self): + context = LLMContext() + aggregator = LLMAssistantAggregator(context) + frames_to_send = [ + FunctionCallInProgressFrame( + function_name="get_weather", + tool_call_id="1", + arguments={"location": "Los Angeles"}, + cancel_on_interruption=False, + ), + SleepFrame(), + FunctionCallResultFrame( + function_name="get_weather", + tool_call_id="1", + arguments={"location": "Los Angeles"}, + result={"conditions": "Sunny"}, + ), + ] + expected_down_frames = [] + await run_test( + aggregator, + frames_to_send=frames_to_send, + expected_down_frames=expected_down_frames, + ) + assert json.loads(context.messages[-1]["content"]) == {"conditions": "Sunny"} + + async def test_function_call_on_context_updated(self): + context_updated = False + + async def on_context_updated(): + nonlocal context_updated + context_updated = True + + context = LLMContext() + aggregator = LLMAssistantAggregator(context) + frames_to_send = [ + FunctionCallInProgressFrame( + function_name="get_weather", + tool_call_id="1", + arguments={"location": "Los Angeles"}, + cancel_on_interruption=False, + ), + SleepFrame(), + FunctionCallResultFrame( + function_name="get_weather", + tool_call_id="1", + arguments={"location": "Los Angeles"}, + result={"conditions": "Sunny"}, + properties=FunctionCallResultProperties(on_context_updated=on_context_updated), + ), + SleepFrame(), + ] + expected_down_frames = [] + await run_test( + aggregator, + frames_to_send=frames_to_send, + expected_down_frames=expected_down_frames, + ) + assert json.loads(context.messages[-1]["content"]) == {"conditions": "Sunny"} + assert context_updated + async def test_thought(self): context = LLMContext() diff --git a/tests/test_google_llm_openai.py b/tests/test_google_llm_openai.py deleted file mode 100644 index 5e6cee6f8..000000000 --- a/tests/test_google_llm_openai.py +++ /dev/null @@ -1,81 +0,0 @@ -# -# Copyright (c) 2024-2026, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - -"""Unit tests for Google LLM OpenAI Beta service.""" - -import asyncio -import warnings -from unittest.mock import AsyncMock, patch - -import pytest - -from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext - -try: - from pipecat.services.google.openai.llm import GoogleLLMOpenAIBetaService - - google_available = True -except Exception: - google_available = False - - -@pytest.mark.asyncio -@pytest.mark.skipif(not google_available, reason="Google dependencies not installed") -async def test_google_llm_openai_stream_closed_on_cancellation(): - """Test that the stream is closed when CancelledError occurs during iteration. - - This prevents socket leaks when the pipeline is interrupted (e.g., user interruption). - See issue #3639. - """ - with patch.object(GoogleLLMOpenAIBetaService, "create_client"): - with warnings.catch_warnings(): - warnings.simplefilter("ignore", DeprecationWarning) - service = GoogleLLMOpenAIBetaService(api_key="test-key", model="test-model") - service._client = AsyncMock() - - stream_closed = False - - class MockAsyncStream: - """Mock AsyncStream that tracks close() calls and raises CancelledError.""" - - def __init__(self): - self.iteration_count = 0 - - async def __aenter__(self): - return self - - async def __aexit__(self, exc_type, exc_val, exc_tb): - nonlocal stream_closed - stream_closed = True - return False - - def __aiter__(self): - return self - - async def __anext__(self): - self.iteration_count += 1 - if self.iteration_count > 1: - raise asyncio.CancelledError() - mock_chunk = AsyncMock() - mock_chunk.usage = None - mock_chunk.choices = [] - return mock_chunk - - mock_stream = MockAsyncStream() - - service._stream_chat_completions_specific_context = AsyncMock(return_value=mock_stream) - service.start_ttfb_metrics = AsyncMock() - service.stop_ttfb_metrics = AsyncMock() - service.start_llm_usage_metrics = AsyncMock() - - context = OpenAILLMContext( - messages=[{"role": "user", "content": "Hello"}], - ) - - with pytest.raises(asyncio.CancelledError): - await service._process_context(context) - - assert stream_closed, "Stream should be closed even when CancelledError occurs" diff --git a/tests/test_run_inference.py b/tests/test_run_inference.py index ae9d30f8f..2a7f7695e 100644 --- a/tests/test_run_inference.py +++ b/tests/test_run_inference.py @@ -84,61 +84,6 @@ async def test_openai_run_inference_with_llm_context(): ) -@pytest.mark.asyncio -async def test_openai_run_inference_with_openai_llm_context(): - """Test run_inference with OpenAILLMContext returns expected response.""" - # Create service with mocked client and specific parameters - with patch.object(OpenAILLMService, "create_client"): - from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext - from pipecat.services.openai.base_llm import BaseOpenAILLMService - - params = BaseOpenAILLMService.InputParams( - temperature=0.8, max_completion_tokens=150, presence_penalty=0.3, top_p=0.9 - ) - service = OpenAILLMService(model="gpt-4", params=params) - service._client = AsyncMock() - - # Create OpenAILLMContext - context = OpenAILLMContext( - messages=[ - {"role": "system", "content": "You are a helpful assistant"}, - {"role": "user", "content": "Hello, world!"}, - ], - tools=OPENAI_NOT_GIVEN, - tool_choice=OPENAI_NOT_GIVEN, - ) - - # Mock response - mock_response = MagicMock() - mock_response.choices = [MagicMock()] - mock_response.choices[0].message.content = "Hello! How can I help you today?" - service._client.chat.completions.create.return_value = mock_response - - # Execute - result = await service.run_inference(context) - - # Verify - assert result == "Hello! How can I help you today?" - service._client.chat.completions.create.assert_called_once_with( - model="gpt-4", - stream=False, - frequency_penalty=OPENAI_NOT_GIVEN, - presence_penalty=0.3, - seed=OPENAI_NOT_GIVEN, - temperature=0.8, - top_p=0.9, - max_tokens=OPENAI_NOT_GIVEN, - max_completion_tokens=150, - service_tier=OPENAI_NOT_GIVEN, - messages=[ - {"role": "system", "content": "You are a helpful assistant"}, - {"role": "user", "content": "Hello, world!"}, - ], - tools=OPENAI_NOT_GIVEN, - tool_choice=OPENAI_NOT_GIVEN, - ) - - @pytest.mark.asyncio async def test_openai_run_inference_client_exception(): """Test that exceptions from the client are propagated.""" @@ -209,54 +154,6 @@ async def test_anthropic_run_inference_with_llm_context(): ) -@pytest.mark.asyncio -async def test_anthropic_run_inference_with_openai_llm_context(): - """Test run_inference with OpenAILLMContext returns expected response for Anthropic.""" - # Create service with mocked client and specific parameters - from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext - from pipecat.services.anthropic.llm import AnthropicLLMService - - params = AnthropicLLMService.InputParams(max_tokens=1024, temperature=0.7, top_k=40, top_p=0.9) - service = AnthropicLLMService( - api_key="test-key", model="claude-3-sonnet-20240229", params=params - ) - service._client = AsyncMock() - - # Create OpenAILLMContext - context = OpenAILLMContext( - messages=[ - {"role": "system", "content": "You are a helpful assistant"}, - {"role": "user", "content": "Hello, world!"}, - ], - tools=NOT_GIVEN, - tool_choice=NOT_GIVEN, - ) - - # Mock response - mock_response = MagicMock() - mock_response.content = [MagicMock()] - mock_response.content[0].text = "Hello! How can I help you today?" - service._client.beta.messages.create.return_value = mock_response - - # Execute - result = await service.run_inference(context) - - # Verify - assert result == "Hello! How can I help you today?" - service._client.beta.messages.create.assert_called_once_with( - model="claude-3-sonnet-20240229", - max_tokens=1024, - stream=False, - temperature=0.7, - top_k=40, - top_p=0.9, - messages=[{"role": "user", "content": "Hello, world!"}], - system="You are a helpful assistant", - tools=[], - betas=["interleaved-thinking-2025-05-14"], - ) - - @pytest.mark.asyncio async def test_anthropic_run_inference_client_exception(): """Test that exceptions from the Anthropic client are propagated.""" @@ -336,61 +233,6 @@ async def test_google_run_inference_client_exception(): await service.run_inference(mock_context) -@pytest.mark.asyncio -async def test_google_run_inference_with_openai_llm_context(): - """Test run_inference with OpenAILLMContext returns expected response for Google.""" - # Create service with mocked client and specific parameters - from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext - - params = GoogleLLMService.InputParams(max_tokens=256, temperature=0.4, top_k=30, top_p=0.75) - service = GoogleLLMService(api_key="test-key", model="gemini-2.0-flash", params=params) - service._client = AsyncMock() - - # Create OpenAILLMContext - context = OpenAILLMContext( - messages=[ - {"role": "system", "content": "You are a helpful assistant"}, - {"role": "user", "content": "Hello, world!"}, - ], - tools=NOT_GIVEN, - tool_choice=NOT_GIVEN, - ) - - # Mock response - mock_response = MagicMock() - mock_response.candidates = [MagicMock()] - mock_response.candidates[0].content = MagicMock() - mock_response.candidates[0].content.parts = [MagicMock()] - mock_response.candidates[0].content.parts[0].text = "Hello! How can I help you today?" - service._client.aio = AsyncMock() - service._client.aio.models = AsyncMock() - service._client.aio.models.generate_content = AsyncMock(return_value=mock_response) - - # Execute - result = await service.run_inference(context) - - # Verify - assert result == "Hello! How can I help you today?" - - # Verify the call includes configured parameters - call_kwargs = service._client.aio.models.generate_content.call_args.kwargs - assert call_kwargs["model"] == "gemini-2.0-flash" - # Contents is a Google Content object, so check its structure - contents = call_kwargs["contents"] - assert len(contents) == 1 - assert contents[0].role == "user" - assert len(contents[0].parts) == 1 - assert contents[0].parts[0].text == "Hello, world!" - assert "config" in call_kwargs - config = call_kwargs["config"] - # Config is a GenerateContentConfig object, so access attributes - assert config.system_instruction == "You are a helpful assistant" - assert config.temperature == 0.4 - assert config.top_k == 30 - assert config.top_p == 0.75 - assert config.max_output_tokens == 256 - - @pytest.mark.asyncio async def test_aws_bedrock_run_inference_with_llm_context(): """Test run_inference with LLMContext returns expected response for AWS Bedrock.""" @@ -445,57 +287,6 @@ async def test_aws_bedrock_run_inference_with_llm_context(): assert call_kwargs["inferenceConfig"]["topP"] == 0.85 -@pytest.mark.asyncio -async def test_aws_bedrock_run_inference_with_openai_llm_context(): - """Test run_inference with OpenAILLMContext returns expected response for AWS Bedrock.""" - # Create service with specific parameters - from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext - from pipecat.services.aws.llm import AWSBedrockLLMService - - params = AWSBedrockLLMService.InputParams(max_tokens=512, temperature=0.8, top_p=0.95) - service = AWSBedrockLLMService(model="anthropic.claude-3-sonnet-20240229-v1:0", params=params) - - # Create OpenAILLMContext - context = OpenAILLMContext( - messages=[ - {"role": "system", "content": "You are a helpful assistant"}, - {"role": "user", "content": "Hello, world!"}, - ], - tools=NOT_GIVEN, - tool_choice=NOT_GIVEN, - ) - - # Mock the client and response - mock_client = AsyncMock() - mock_response = { - "output": {"message": {"content": [{"text": "Hello! How can I help you today?"}]}} - } - mock_client.converse.return_value = mock_response - - # Patch the _aws_session.client method to be an async context manager - mock_context_manager = AsyncMock() - mock_context_manager.__aenter__ = AsyncMock(return_value=mock_client) - mock_context_manager.__aexit__ = AsyncMock(return_value=None) - - with patch.object(service._aws_session, "client", return_value=mock_context_manager): - # Execute - result = await service.run_inference(context) - - # Verify - assert result == "Hello! How can I help you today?" - - # Verify the call includes configured parameters - call_kwargs = mock_client.converse.call_args.kwargs - assert call_kwargs["modelId"] == "anthropic.claude-3-sonnet-20240229-v1:0" - assert call_kwargs["messages"] == [{"role": "user", "content": [{"text": "Hello, world!"}]}] - assert call_kwargs["system"] == [{"text": "You are a helpful assistant"}] - assert call_kwargs["additionalModelRequestFields"] == {} - assert "inferenceConfig" in call_kwargs - assert call_kwargs["inferenceConfig"]["maxTokens"] == 512 - assert call_kwargs["inferenceConfig"]["temperature"] == 0.8 - assert call_kwargs["inferenceConfig"]["topP"] == 0.95 - - @pytest.mark.asyncio async def test_aws_bedrock_run_inference_client_exception(): """Test that exceptions from the AWS Bedrock client are propagated.""" From 19e521b75afc4059d45babf26e818ca04c6c8385 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Tue, 31 Mar 2026 18:35:48 -0400 Subject: [PATCH 02/11] Simplify LLMContextFrame handling in process_frame methods Now that LLMContextFrame is the only frame that provides a context, remove the intermediate `context = None` / `if context:` pattern and handle context processing directly in the isinstance branch. --- src/pipecat/services/anthropic/llm.py | 6 +----- src/pipecat/services/aws/llm.py | 6 +----- src/pipecat/services/aws/nova_sonic/llm.py | 3 +-- src/pipecat/services/google/llm.py | 7 +------ src/pipecat/services/mem0/memory.py | 5 ----- src/pipecat/services/openai/base_llm.py | 10 +++------- src/pipecat/services/openai/responses/llm.py | 20 ++++++-------------- 7 files changed, 13 insertions(+), 44 deletions(-) diff --git a/src/pipecat/services/anthropic/llm.py b/src/pipecat/services/anthropic/llm.py index 58dc1da07..9c07a8444 100644 --- a/src/pipecat/services/anthropic/llm.py +++ b/src/pipecat/services/anthropic/llm.py @@ -552,18 +552,14 @@ class AnthropicLLMService(LLMService): """ await super().process_frame(frame, direction) - context = None if isinstance(frame, LLMContextFrame): - context = frame.context + await self._process_context(frame.context) elif isinstance(frame, LLMEnablePromptCachingFrame): logger.debug(f"Setting enable prompt caching to: [{frame.enable}]") self._settings.enable_prompt_caching = frame.enable else: await self.push_frame(frame, direction) - if context: - await self._process_context(context) - def _estimate_tokens(self, text: str) -> int: return int(len(re.split(r"[^\w]+", text)) * 1.3) diff --git a/src/pipecat/services/aws/llm.py b/src/pipecat/services/aws/llm.py index 2e2bd3c59..722072592 100644 --- a/src/pipecat/services/aws/llm.py +++ b/src/pipecat/services/aws/llm.py @@ -564,15 +564,11 @@ class AWSBedrockLLMService(LLMService): """ await super().process_frame(frame, direction) - context = None if isinstance(frame, LLMContextFrame): - context = frame.context + await self._process_context(frame.context) else: await self.push_frame(frame, direction) - if context: - await self._process_context(context) - def _estimate_tokens(self, text: str) -> int: return int(len(re.split(r"[^\w]+", text)) * 1.3) diff --git a/src/pipecat/services/aws/nova_sonic/llm.py b/src/pipecat/services/aws/nova_sonic/llm.py index 545c42b1c..9aa36c5db 100644 --- a/src/pipecat/services/aws/nova_sonic/llm.py +++ b/src/pipecat/services/aws/nova_sonic/llm.py @@ -524,8 +524,7 @@ class AWSNovaSonicLLMService(LLMService): await super().process_frame(frame, direction) if isinstance(frame, LLMContextFrame): - context = frame.context - await self._handle_context(context) + await self._handle_context(frame.context) elif isinstance(frame, InputAudioRawFrame): await self._handle_input_audio_frame(frame) elif isinstance(frame, InterruptionFrame): diff --git a/src/pipecat/services/google/llm.py b/src/pipecat/services/google/llm.py index 7af1754a6..b336fcef5 100644 --- a/src/pipecat/services/google/llm.py +++ b/src/pipecat/services/google/llm.py @@ -637,16 +637,11 @@ class GoogleLLMService(LLMService): """ await super().process_frame(frame, direction) - context = None - if isinstance(frame, LLMContextFrame): - context = frame.context + await self._process_context(frame.context) else: await self.push_frame(frame, direction) - if context: - await self._process_context(context) - async def stop(self, frame): """Override stop to gracefully close the client.""" await super().stop(frame) diff --git a/src/pipecat/services/mem0/memory.py b/src/pipecat/services/mem0/memory.py index 86f37be41..91396cab4 100644 --- a/src/pipecat/services/mem0/memory.py +++ b/src/pipecat/services/mem0/memory.py @@ -265,12 +265,8 @@ class Mem0MemoryService(FrameProcessor): """ await super().process_frame(frame, direction) - context = None - if isinstance(frame, LLMContextFrame): context = frame.context - - if context: try: # Get the latest user message to use as a query for memory retrieval context_messages = context.get_messages() @@ -300,5 +296,4 @@ class Mem0MemoryService(FrameProcessor): ) await self.push_frame(frame) # Still pass the original frame through else: - # For non-context frames, just pass them through await self.push_frame(frame, direction) diff --git a/src/pipecat/services/openai/base_llm.py b/src/pipecat/services/openai/base_llm.py index d007615ba..63f4d7328 100644 --- a/src/pipecat/services/openai/base_llm.py +++ b/src/pipecat/services/openai/base_llm.py @@ -532,17 +532,11 @@ class BaseOpenAILLMService(LLMService): """ await super().process_frame(frame, direction) - context = None if isinstance(frame, LLMContextFrame): - context = frame.context - else: - await self.push_frame(frame, direction) - - if context: try: await self.push_frame(LLMFullResponseStartFrame()) await self.start_processing_metrics() - await self._process_context(context) + await self._process_context(frame.context) except httpx.TimeoutException as e: await self._call_event_handler("on_completion_timeout") await self.push_error(error_msg="LLM completion timeout", exception=e) @@ -551,3 +545,5 @@ class BaseOpenAILLMService(LLMService): finally: await self.stop_processing_metrics() await self.push_frame(LLMFullResponseEndFrame()) + else: + await self.push_frame(frame, direction) diff --git a/src/pipecat/services/openai/responses/llm.py b/src/pipecat/services/openai/responses/llm.py index fce6b46d8..e1b4ace78 100644 --- a/src/pipecat/services/openai/responses/llm.py +++ b/src/pipecat/services/openai/responses/llm.py @@ -674,17 +674,11 @@ class OpenAIResponsesLLMService(_BaseOpenAIResponsesLLMService, WebsocketLLMServ """ await super().process_frame(frame, direction) - context = None if isinstance(frame, LLMContextFrame): - context = frame.context - else: - await self.push_frame(frame, direction) - - if context: try: await self.push_frame(LLMFullResponseStartFrame()) await self.start_processing_metrics() - await self._process_context(context) + await self._process_context(frame.context) except asyncio.CancelledError: # The pipeline cancelled us (e.g. due to an interruption). # Ask the server to stop generating and flag that we need @@ -717,6 +711,8 @@ class OpenAIResponsesLLMService(_BaseOpenAIResponsesLLMService, WebsocketLLMServ finally: await self.stop_processing_metrics() await self.push_frame(LLMFullResponseEndFrame()) + else: + await self.push_frame(frame, direction) # -- core inference ------------------------------------------------------- @@ -960,17 +956,11 @@ class OpenAIResponsesHttpLLMService(_BaseOpenAIResponsesLLMService): """ await super().process_frame(frame, direction) - context = None if isinstance(frame, LLMContextFrame): - context = frame.context - else: - await self.push_frame(frame, direction) - - if context: try: await self.push_frame(LLMFullResponseStartFrame()) await self.start_processing_metrics() - await self._process_context(context) + await self._process_context(frame.context) except httpx.TimeoutException as e: await self._call_event_handler("on_completion_timeout") await self.push_error(error_msg="LLM completion timeout", exception=e) @@ -979,6 +969,8 @@ class OpenAIResponsesHttpLLMService(_BaseOpenAIResponsesLLMService): finally: await self.stop_processing_metrics() await self.push_frame(LLMFullResponseEndFrame()) + else: + await self.push_frame(frame, direction) @traced_llm async def _process_context(self, context: LLMContext): From 110c88bf92540e09fa4796fc3e434b47da48f7dd Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Tue, 31 Mar 2026 18:53:55 -0400 Subject: [PATCH 03/11] Remove stale re-export of deleted google.openai subpackage --- src/pipecat/services/google/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/pipecat/services/google/__init__.py b/src/pipecat/services/google/__init__.py index 32b12e367..6c50ae4b4 100644 --- a/src/pipecat/services/google/__init__.py +++ b/src/pipecat/services/google/__init__.py @@ -12,7 +12,6 @@ from .frames import * from .gemini_live import * from .image import * from .llm import * -from .openai import * from .rtvi import * from .stt import * from .tts import * From ebab75765dfb6fb827dfbb36d1191c5f19f7c3dc Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Tue, 31 Mar 2026 18:54:23 -0400 Subject: [PATCH 04/11] Fix stream cancellation tests to mock get_chat_completions The tests were mocking the removed _stream_chat_completions_*_context methods. Update them to mock get_chat_completions instead. --- tests/test_novita_llm.py | 3 +-- tests/test_openai_llm_timeout.py | 6 ++---- tests/test_sambanova_llm.py | 3 +-- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/tests/test_novita_llm.py b/tests/test_novita_llm.py index e0f2c71b4..9aab54cab 100644 --- a/tests/test_novita_llm.py +++ b/tests/test_novita_llm.py @@ -51,8 +51,7 @@ async def test_novita_llm_stream_closed_on_cancellation(): mock_stream = MockAsyncStream() - service._stream_chat_completions_specific_context = AsyncMock(return_value=mock_stream) - service._stream_chat_completions_universal_context = AsyncMock(return_value=mock_stream) + service.get_chat_completions = AsyncMock(return_value=mock_stream) service.start_ttfb_metrics = AsyncMock() service.stop_ttfb_metrics = AsyncMock() service.start_llm_usage_metrics = AsyncMock() diff --git a/tests/test_openai_llm_timeout.py b/tests/test_openai_llm_timeout.py index 8ee776a9b..61264cbf5 100644 --- a/tests/test_openai_llm_timeout.py +++ b/tests/test_openai_llm_timeout.py @@ -171,8 +171,7 @@ async def test_openai_llm_stream_closed_on_cancellation(): mock_stream = MockAsyncStream() # Mock the stream creation methods - service._stream_chat_completions_specific_context = AsyncMock(return_value=mock_stream) - service._stream_chat_completions_universal_context = AsyncMock(return_value=mock_stream) + service.get_chat_completions = AsyncMock(return_value=mock_stream) service.start_ttfb_metrics = AsyncMock() service.stop_ttfb_metrics = AsyncMock() service.start_llm_usage_metrics = AsyncMock() @@ -281,8 +280,7 @@ async def test_openai_llm_async_iterator_closed_on_stream_end(): mock_iterator = MockAsyncIterator() mock_stream = MockAsyncStream(mock_iterator) - service._stream_chat_completions_specific_context = AsyncMock(return_value=mock_stream) - service._stream_chat_completions_universal_context = AsyncMock(return_value=mock_stream) + service.get_chat_completions = AsyncMock(return_value=mock_stream) service.start_ttfb_metrics = AsyncMock() service.stop_ttfb_metrics = AsyncMock() service.start_llm_usage_metrics = AsyncMock() diff --git a/tests/test_sambanova_llm.py b/tests/test_sambanova_llm.py index 6632951fc..57e2e8c50 100644 --- a/tests/test_sambanova_llm.py +++ b/tests/test_sambanova_llm.py @@ -56,8 +56,7 @@ async def test_sambanova_llm_stream_closed_on_cancellation(): mock_stream = MockAsyncStream() - service._stream_chat_completions_specific_context = AsyncMock(return_value=mock_stream) - service._stream_chat_completions_universal_context = AsyncMock(return_value=mock_stream) + service.get_chat_completions = AsyncMock(return_value=mock_stream) service.start_ttfb_metrics = AsyncMock() service.stop_ttfb_metrics = AsyncMock() service.start_llm_usage_metrics = AsyncMock() From 92e34ea6e88e398722a8b36696d1190e35d6a3be Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Tue, 31 Mar 2026 21:00:51 -0400 Subject: [PATCH 05/11] Fix potential UnboundLocalError for system_message in tracing decorator Restore the `system_message = None` initialization that was dropped when collapsing the OpenAILLMContext branch. --- src/pipecat/utils/tracing/service_decorators.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/pipecat/utils/tracing/service_decorators.py b/src/pipecat/utils/tracing/service_decorators.py index 8f98026e6..d87955755 100644 --- a/src/pipecat/utils/tracing/service_decorators.py +++ b/src/pipecat/utils/tracing/service_decorators.py @@ -491,6 +491,7 @@ def traced_llm(func: Optional[Callable] = None, *, name: Optional[str] = None) - # Handle system message for different services # settings.system_instruction takes priority (matches service behavior) + system_message = None if hasattr(self, "_settings") and getattr( self._settings, "system_instruction", None ): From d3021b459067cb1f122acec6598309924c68ec2d Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Tue, 31 Mar 2026 22:06:01 -0400 Subject: [PATCH 06/11] Rename example files to prepend parent folder name, preventing package shadowing Example files like openai.py shadow installed packages when Python adds the script directory to sys.path. Prepend the parent folder name to each example file (e.g. openai.py -> function-calling-openai.py). Also split thinking-and-mcp/ into separate mcp/ and thinking/ directories. --- README.md | 2 +- .../{bot-background-sound.py => audio-bot-background-sound.py} | 0 examples/audio/{sound-effects.py => audio-sound-effects.py} | 0 ...{dedicated-llm.py => context-summarization-dedicated-llm.py} | 0 .../{google.py => context-summarization-google.py} | 0 ...{manual-openai.py => context-summarization-manual-openai.py} | 0 .../{openai.py => context-summarization-openai.py} | 0 ...-and-after-events.py => features-before-and-after-events.py} | 0 ...-llm-evaluation.py => features-concurrent-llm-evaluation.py} | 0 ...urces.py => features-concurrent-llm-rtvi-ignored-sources.py} | 0 ...om-frame-processor.py => features-custom-frame-processor.py} | 0 ...ntainer-local-bot.py => features-gpu-container-local-bot.py} | 0 .../{live-translation.py => features-live-translation.py} | 0 ...ce-switching.py => features-pattern-pair-voice-switching.py} | 0 .../{service-switcher.py => features-service-switcher.py} | 0 .../{switch-languages.py => features-switch-languages.py} | 0 .../features/{switch-voices.py => features-switch-voices.py} | 0 ...user-email-gathering.py => features-user-email-gathering.py} | 0 .../{voicemail-detection.py => features-voicemail-detection.py} | 0 examples/features/{wake-phrase.py => features-wake-phrase.py} | 0 .../{anthropic-video.py => function-calling-anthropic-video.py} | 0 .../{anthropic.py => function-calling-anthropic.py} | 0 .../{aws-video.py => function-calling-aws-video.py} | 0 examples/function-calling/{aws.py => function-calling-aws.py} | 0 .../function-calling/{azure.py => function-calling-azure.py} | 0 .../{cerebras.py => function-calling-cerebras.py} | 0 .../{deepseek.py => function-calling-deepseek.py} | 0 .../function-calling/{direct.py => function-calling-direct.py} | 0 .../{fireworks.py => function-calling-fireworks.py} | 0 .../{google-vertex.py => function-calling-google-vertex.py} | 0 .../{google-video.py => function-calling-google-video.py} | 0 .../function-calling/{google.py => function-calling-google.py} | 0 examples/function-calling/{grok.py => function-calling-grok.py} | 0 examples/function-calling/{groq.py => function-calling-groq.py} | 0 .../{mistral.py => function-calling-mistral.py} | 0 .../{moondream-video.py => function-calling-moondream-video.py} | 0 .../function-calling/{nebius.py => function-calling-nebius.py} | 0 .../function-calling/{novita.py => function-calling-novita.py} | 0 .../function-calling/{nvidia.py => function-calling-nvidia.py} | 0 .../function-calling/{ollama.py => function-calling-ollama.py} | 0 ...ponses-http.py => function-calling-openai-responses-http.py} | 0 ...-http.py => function-calling-openai-responses-video-http.py} | 0 ...nses-video.py => function-calling-openai-responses-video.py} | 0 ...openai-responses.py => function-calling-openai-responses.py} | 0 .../{openai-video.py => function-calling-openai-video.py} | 0 .../function-calling/{openai.py => function-calling-openai.py} | 0 .../{openrouter.py => function-calling-openrouter.py} | 0 .../{perplexity.py => function-calling-perplexity.py} | 0 examples/function-calling/{qwen.py => function-calling-qwen.py} | 0 .../{sambanova.py => function-calling-sambanova.py} | 0 .../function-calling/{sarvam.py => function-calling-sarvam.py} | 0 .../{together.py => function-calling-together.py} | 0 .../multiple-mcp.py => mcp/mcp-multiple-mcp.py} | 0 examples/{thinking-and-mcp => mcp}/mcp-stdio.py | 0 .../mcp-streamable-http-gemini-live.py | 0 examples/{thinking-and-mcp => mcp}/mcp-streamable-http.py | 0 .../{heartbeats.py => observability-heartbeats.py} | 0 .../observability/{observer.py => observability-observer.py} | 0 .../{sentry-metrics.py => observability-sentry-metrics.py} | 0 .../{anthropic.py => persistent-context-anthropic.py} | 0 .../{aws-nova-sonic.py => persistent-context-aws-nova-sonic.py} | 0 .../{gemini.py => persistent-context-gemini.py} | 0 .../{grok-realtime.py => persistent-context-grok-realtime.py} | 0 ...openai-realtime.py => persistent-context-openai-realtime.py} | 0 ...nses-http.py => persistent-context-openai-responses-http.py} | 0 ...enai-responses.py => persistent-context-openai-responses.py} | 0 .../{openai.py => persistent-context-openai.py} | 0 ...i-grounding-metadata.py => rag-gemini-grounding-metadata.py} | 0 examples/rag/{gemini-rag.py => rag-gemini.py} | 0 examples/rag/{mem0.py => rag-mem0.py} | 0 .../realtime/{aws-nova-sonic.py => realtime-aws-nova-sonic.py} | 0 examples/realtime/{azure.py => realtime-azure.py} | 0 ...mini-live-files-api.py => realtime-gemini-live-files-api.py} | 0 ...tion-calling.py => realtime-gemini-live-function-calling.py} | 0 ...e-google-search.py => realtime-gemini-live-google-search.py} | 0 ...ive-graceful-end.py => realtime-gemini-live-graceful-end.py} | 0 ...g-metadata.py => realtime-gemini-live-grounding-metadata.py} | 0 ...mini-live-local-vad.py => realtime-gemini-live-local-vad.py} | 0 ...lling.py => realtime-gemini-live-vertex-function-calling.py} | 0 .../{gemini-live-video.py => realtime-gemini-live-video.py} | 0 examples/realtime/{gemini-live.py => realtime-gemini-live.py} | 0 examples/realtime/{grok.py => realtime-grok.py} | 0 .../{openai-live-video.py => realtime-openai-live-video.py} | 0 examples/realtime/{openai-text.py => realtime-openai-text.py} | 0 examples/realtime/{openai.py => realtime-openai.py} | 0 .../realtime/{ultravox-text.py => realtime-ultravox-text.py} | 0 examples/realtime/{ultravox.py => realtime-ultravox.py} | 0 examples/{thinking-and-mcp => thinking}/thinking-anthropic.py | 0 .../thinking-functions-anthropic.py | 0 .../{thinking-and-mcp => thinking}/thinking-functions-google.py | 0 examples/{thinking-and-mcp => thinking}/thinking-google.py | 0 .../{assemblyai.py => transcription-assemblyai.py} | 0 examples/transcription/{azure.py => transcription-azure.py} | 0 .../transcription/{cartesia.py => transcription-cartesia.py} | 0 .../transcription/{deepgram.py => transcription-deepgram.py} | 0 .../{elevenlabs.py => transcription-elevenlabs.py} | 0 ...ladia-translation.py => transcription-gladia-translation.py} | 0 .../{gladia-transcription.py => transcription-gladia.py} | 0 .../{google-llm.py => transcription-google-llm.py} | 0 examples/transcription/{gradium.py => transcription-gradium.py} | 0 examples/transcription/{openai.py => transcription-openai.py} | 0 examples/transcription/{soniox.py => transcription-soniox.py} | 0 .../{speechmatics.py => transcription-speechmatics.py} | 0 .../{whisper-local.py => transcription-whisper-local.py} | 0 .../{whisper-mlx.py => transcription-whisper-mlx.py} | 0 examples/transcription/{whisper.py => transcription-whisper.py} | 0 examples/transports/{daily.py => transports-daily.py} | 0 examples/transports/{livekit.py => transports-livekit.py} | 0 .../transports/{small-webrtc.py => transports-small-webrtc.py} | 0 ...{detect-user-idle.py => turn-management-detect-user-idle.py} | 0 ...lete-turns.py => turn-management-filter-incomplete-turns.py} | 0 ...ruption-config.py => turn-management-interruption-config.py} | 0 ...cal-coreml.py => turn-management-smart-turn-local-coreml.py} | 0 ...{smart-turn-local.py => turn-management-smart-turn-local.py} | 0 ...ng-observer.py => turn-management-turn-tracking-observer.py} | 0 ...sistant-turns.py => turn-management-user-assistant-turns.py} | 0 ...r-mute-strategy.py => turn-management-user-mute-strategy.py} | 0 examples/update-settings/llm/{anthropic.py => llm-anthropic.py} | 0 .../update-settings/llm/{aws-bedrock.py => llm-aws-bedrock.py} | 0 .../llm/{aws-nova-sonic.py => llm-aws-nova-sonic.py} | 0 .../llm/{azure-realtime.py => llm-azure-realtime.py} | 0 examples/update-settings/llm/{azure.py => llm-azure.py} | 0 examples/update-settings/llm/{cerebras.py => llm-cerebras.py} | 0 examples/update-settings/llm/{deepseek.py => llm-deepseek.py} | 0 examples/update-settings/llm/{fireworks.py => llm-fireworks.py} | 0 .../llm/{gemini-live-vertex.py => llm-gemini-live-vertex.py} | 0 .../update-settings/llm/{gemini-live.py => llm-gemini-live.py} | 0 .../llm/{google-vertex.py => llm-google-vertex.py} | 0 examples/update-settings/llm/{google.py => llm-google.py} | 0 .../llm/{grok-realtime.py => llm-grok-realtime.py} | 0 examples/update-settings/llm/{grok.py => llm-grok.py} | 0 examples/update-settings/llm/{groq.py => llm-groq.py} | 0 examples/update-settings/llm/{mistral.py => llm-mistral.py} | 0 examples/update-settings/llm/{nvidia.py => llm-nvidia.py} | 0 examples/update-settings/llm/{ollama.py => llm-ollama.py} | 0 .../llm/{openai-realtime.py => llm-openai-realtime.py} | 0 .../{openai-responses-http.py => llm-openai-responses-http.py} | 0 .../llm/{openai-responses.py => llm-openai-responses.py} | 0 examples/update-settings/llm/{openai.py => llm-openai.py} | 0 .../update-settings/llm/{openrouter.py => llm-openrouter.py} | 0 .../update-settings/llm/{perplexity.py => llm-perplexity.py} | 0 examples/update-settings/llm/{qwen.py => llm-qwen.py} | 0 examples/update-settings/llm/{sambanova.py => llm-sambanova.py} | 0 examples/update-settings/llm/{sarvam.py => llm-sarvam.py} | 0 examples/update-settings/llm/{together.py => llm-together.py} | 0 .../llm/{ultravox-realtime.py => llm-ultravox-realtime.py} | 0 .../update-settings/stt/{assemblyai.py => stt-assemblyai.py} | 0 .../stt/{aws-transcribe.py => stt-aws-transcribe.py} | 0 examples/update-settings/stt/{azure.py => stt-azure.py} | 0 examples/update-settings/stt/{cartesia.py => stt-cartesia.py} | 0 .../stt/{deepgram-flux.py => stt-deepgram-flux.py} | 0 .../stt/{deepgram-sagemaker.py => stt-deepgram-sagemaker.py} | 0 examples/update-settings/stt/{deepgram.py => stt-deepgram.py} | 0 .../stt/{elevenlabs-realtime.py => stt-elevenlabs-realtime.py} | 0 .../update-settings/stt/{elevenlabs.py => stt-elevenlabs.py} | 0 examples/update-settings/stt/{fal.py => stt-fal.py} | 0 examples/update-settings/stt/{gladia.py => stt-gladia.py} | 0 examples/update-settings/stt/{google.py => stt-google.py} | 0 examples/update-settings/stt/{gradium.py => stt-gradium.py} | 0 examples/update-settings/stt/{groq.py => stt-groq.py} | 0 .../stt/{nvidia-segmented.py => stt-nvidia-segmented.py} | 0 examples/update-settings/stt/{nvidia.py => stt-nvidia.py} | 0 .../stt/{openai-realtime.py => stt-openai-realtime.py} | 0 examples/update-settings/stt/{sarvam.py => stt-sarvam.py} | 0 examples/update-settings/stt/{soniox.py => stt-soniox.py} | 0 .../stt/{speechmatics.py => stt-speechmatics.py} | 0 .../update-settings/stt/{whisper-api.py => stt-whisper-api.py} | 0 .../update-settings/stt/{whisper-mlx.py => stt-whisper-mlx.py} | 0 examples/update-settings/stt/{whisper.py => stt-whisper.py} | 0 .../tts/{asyncai-http.py => tts-asyncai-http.py} | 0 examples/update-settings/tts/{asyncai.py => tts-asyncai.py} | 0 examples/update-settings/tts/{aws-polly.py => tts-aws-polly.py} | 0 .../update-settings/tts/{azure-http.py => tts-azure-http.py} | 0 examples/update-settings/tts/{azure.py => tts-azure.py} | 0 examples/update-settings/tts/{camb.py => tts-camb.py} | 0 .../tts/{cartesia-http.py => tts-cartesia-http.py} | 0 examples/update-settings/tts/{cartesia.py => tts-cartesia.py} | 0 .../tts/{deepgram-http.py => tts-deepgram-http.py} | 0 .../tts/{deepgram-sagemaker.py => tts-deepgram-sagemaker.py} | 0 examples/update-settings/tts/{deepgram.py => tts-deepgram.py} | 0 .../tts/{elevenlabs-http.py => tts-elevenlabs-http.py} | 0 .../update-settings/tts/{elevenlabs.py => tts-elevenlabs.py} | 0 examples/update-settings/tts/{fish.py => tts-fish.py} | 0 examples/update-settings/tts/{gemini.py => tts-gemini.py} | 0 .../update-settings/tts/{google-http.py => tts-google-http.py} | 0 .../tts/{google-stream.py => tts-google-stream.py} | 0 examples/update-settings/tts/{gradium.py => tts-gradium.py} | 0 examples/update-settings/tts/{groq.py => tts-groq.py} | 0 examples/update-settings/tts/{hume.py => tts-hume.py} | 0 .../tts/{inworld-http.py => tts-inworld-http.py} | 0 examples/update-settings/tts/{inworld.py => tts-inworld.py} | 0 examples/update-settings/tts/{kokoro.py => tts-kokoro.py} | 0 examples/update-settings/tts/{lmnt.py => tts-lmnt.py} | 0 examples/update-settings/tts/{minimax.py => tts-minimax.py} | 0 .../tts/{neuphonic-http.py => tts-neuphonic-http.py} | 0 examples/update-settings/tts/{neuphonic.py => tts-neuphonic.py} | 0 examples/update-settings/tts/{nvidia.py => tts-nvidia.py} | 0 examples/update-settings/tts/{openai.py => tts-openai.py} | 0 .../update-settings/tts/{piper-http.py => tts-piper-http.py} | 0 examples/update-settings/tts/{piper.py => tts-piper.py} | 0 .../update-settings/tts/{resembleai.py => tts-resembleai.py} | 0 examples/update-settings/tts/{rime-http.py => tts-rime-http.py} | 0 examples/update-settings/tts/{rime.py => tts-rime.py} | 0 .../update-settings/tts/{sarvam-http.py => tts-sarvam-http.py} | 0 examples/update-settings/tts/{sarvam.py => tts-sarvam.py} | 0 .../tts/{speechmatics.py => tts-speechmatics.py} | 0 examples/update-settings/tts/{xtts.py => tts-xtts.py} | 0 .../{heygen-transport.py => video-avatar-heygen-transport.py} | 0 ...en-video-service.py => video-avatar-heygen-video-service.py} | 0 ...nslice-transport.py => video-avatar-lemonslice-transport.py} | 0 .../{simli-layer.py => video-avatar-simli-layer.py} | 0 .../{tavus-transport.py => video-avatar-tavus-transport.py} | 0 ...vus-video-service.py => video-avatar-tavus-video-service.py} | 0 ...om-video-track.py => video-processing-custom-video-track.py} | 0 ...treamer-filesrc.py => video-processing-gstreamer-filesrc.py} | 0 ...deotestsrc.py => video-processing-gstreamer-videotestsrc.py} | 0 .../{local-mirror.py => video-processing-local-mirror.py} | 0 .../video-processing/{mirror.py => video-processing-mirror.py} | 0 examples/vision/{anthropic.py => vision-anthropic.py} | 0 examples/vision/{aws.py => vision-aws.py} | 0 examples/vision/{gemini-flash.py => vision-gemini-flash.py} | 0 examples/vision/{moondream.py => vision-moondream.py} | 0 ...openai-responses-http.py => vision-openai-responses-http.py} | 0 .../vision/{openai-responses.py => vision-openai-responses.py} | 0 examples/vision/{openai.py => vision-openai.py} | 0 examples/voice/{aicoustics.py => voice-aicoustics.py} | 0 ...yai-turn-detection.py => voice-assemblyai-turn-detection.py} | 0 examples/voice/{assemblyai.py => voice-assemblyai.py} | 0 examples/voice/{asyncai-http.py => voice-asyncai-http.py} | 0 examples/voice/{asyncai.py => voice-asyncai.py} | 0 examples/voice/{aws-strands.py => voice-aws-strands.py} | 0 examples/voice/{aws.py => voice-aws.py} | 0 examples/voice/{azure-http.py => voice-azure-http.py} | 0 examples/voice/{azure.py => voice-azure.py} | 0 examples/voice/{camb.py => voice-camb.py} | 0 examples/voice/{cartesia-http.py => voice-cartesia-http.py} | 0 examples/voice/{cartesia.py => voice-cartesia.py} | 0 ...pgram-flux-sagemaker.py => voice-deepgram-flux-sagemaker.py} | 0 examples/voice/{deepgram-flux.py => voice-deepgram-flux.py} | 0 examples/voice/{deepgram-http.py => voice-deepgram-http.py} | 0 .../{deepgram-sagemaker.py => voice-deepgram-sagemaker.py} | 0 examples/voice/{deepgram-vad.py => voice-deepgram-vad.py} | 0 examples/voice/{deepgram.py => voice-deepgram.py} | 0 examples/voice/{elevenlabs-http.py => voice-elevenlabs-http.py} | 0 examples/voice/{elevenlabs.py => voice-elevenlabs.py} | 0 examples/voice/{fal.py => voice-fal.py} | 0 examples/voice/{fish.py => voice-fish.py} | 0 examples/voice/{gladia-vad.py => voice-gladia-vad.py} | 0 examples/voice/{gladia.py => voice-gladia.py} | 0 examples/voice/{google-audio-in.py => voice-google-audio-in.py} | 0 .../voice/{google-gemini-tts.py => voice-google-gemini-tts.py} | 0 examples/voice/{google-http.py => voice-google-http.py} | 0 examples/voice/{google-image.py => voice-google-image.py} | 0 examples/voice/{google.py => voice-google.py} | 0 examples/voice/{gradium.py => voice-gradium.py} | 0 examples/voice/{groq.py => voice-groq.py} | 0 examples/voice/{hume.py => voice-hume.py} | 0 examples/voice/{inworld-http.py => voice-inworld-http.py} | 0 examples/voice/{inworld.py => voice-inworld.py} | 0 examples/voice/{kokoro.py => voice-kokoro.py} | 0 examples/voice/{krisp-viva.py => voice-krisp-viva.py} | 0 examples/voice/{langchain.py => voice-langchain.py} | 0 examples/voice/{lmnt.py => voice-lmnt.py} | 0 examples/voice/{minimax.py => voice-minimax.py} | 0 examples/voice/{neuphonic-http.py => voice-neuphonic-http.py} | 0 examples/voice/{neuphonic.py => voice-neuphonic.py} | 0 examples/voice/{nvidia.py => voice-nvidia.py} | 0 examples/voice/{openai-http.py => voice-openai-http.py} | 0 ...{openai-responses-http.py => voice-openai-responses-http.py} | 0 .../voice/{openai-responses.py => voice-openai-responses.py} | 0 examples/voice/{openai.py => voice-openai.py} | 0 examples/voice/{piper.py => voice-piper.py} | 0 examples/voice/{resemble.py => voice-resemble.py} | 0 examples/voice/{rime-http.py => voice-rime-http.py} | 0 examples/voice/{rime.py => voice-rime.py} | 0 examples/voice/{sarvam-http.py => voice-sarvam-http.py} | 0 examples/voice/{sarvam.py => voice-sarvam.py} | 0 examples/voice/{smallest.py => voice-smallest.py} | 0 examples/voice/{soniox.py => voice-soniox.py} | 0 .../voice/{speechmatics-vad.py => voice-speechmatics-vad.py} | 0 examples/voice/{speechmatics.py => voice-speechmatics.py} | 0 examples/voice/{xai.py => voice-xai.py} | 0 examples/voice/{xtts.py => voice-xtts.py} | 0 283 files changed, 1 insertion(+), 1 deletion(-) rename examples/audio/{bot-background-sound.py => audio-bot-background-sound.py} (100%) rename examples/audio/{sound-effects.py => audio-sound-effects.py} (100%) rename examples/context-summarization/{dedicated-llm.py => context-summarization-dedicated-llm.py} (100%) rename examples/context-summarization/{google.py => context-summarization-google.py} (100%) rename examples/context-summarization/{manual-openai.py => context-summarization-manual-openai.py} (100%) rename examples/context-summarization/{openai.py => context-summarization-openai.py} (100%) rename examples/features/{before-and-after-events.py => features-before-and-after-events.py} (100%) rename examples/features/{concurrent-llm-evaluation.py => features-concurrent-llm-evaluation.py} (100%) rename examples/features/{concurrent-llm-rtvi-ignored-sources.py => features-concurrent-llm-rtvi-ignored-sources.py} (100%) rename examples/features/{custom-frame-processor.py => features-custom-frame-processor.py} (100%) rename examples/features/{gpu-container-local-bot.py => features-gpu-container-local-bot.py} (100%) rename examples/features/{live-translation.py => features-live-translation.py} (100%) rename examples/features/{pattern-pair-voice-switching.py => features-pattern-pair-voice-switching.py} (100%) rename examples/features/{service-switcher.py => features-service-switcher.py} (100%) rename examples/features/{switch-languages.py => features-switch-languages.py} (100%) rename examples/features/{switch-voices.py => features-switch-voices.py} (100%) rename examples/features/{user-email-gathering.py => features-user-email-gathering.py} (100%) rename examples/features/{voicemail-detection.py => features-voicemail-detection.py} (100%) rename examples/features/{wake-phrase.py => features-wake-phrase.py} (100%) rename examples/function-calling/{anthropic-video.py => function-calling-anthropic-video.py} (100%) rename examples/function-calling/{anthropic.py => function-calling-anthropic.py} (100%) rename examples/function-calling/{aws-video.py => function-calling-aws-video.py} (100%) rename examples/function-calling/{aws.py => function-calling-aws.py} (100%) rename examples/function-calling/{azure.py => function-calling-azure.py} (100%) rename examples/function-calling/{cerebras.py => function-calling-cerebras.py} (100%) rename examples/function-calling/{deepseek.py => function-calling-deepseek.py} (100%) rename examples/function-calling/{direct.py => function-calling-direct.py} (100%) rename examples/function-calling/{fireworks.py => function-calling-fireworks.py} (100%) rename examples/function-calling/{google-vertex.py => function-calling-google-vertex.py} (100%) rename examples/function-calling/{google-video.py => function-calling-google-video.py} (100%) rename examples/function-calling/{google.py => function-calling-google.py} (100%) rename examples/function-calling/{grok.py => function-calling-grok.py} (100%) rename examples/function-calling/{groq.py => function-calling-groq.py} (100%) rename examples/function-calling/{mistral.py => function-calling-mistral.py} (100%) rename examples/function-calling/{moondream-video.py => function-calling-moondream-video.py} (100%) rename examples/function-calling/{nebius.py => function-calling-nebius.py} (100%) rename examples/function-calling/{novita.py => function-calling-novita.py} (100%) rename examples/function-calling/{nvidia.py => function-calling-nvidia.py} (100%) rename examples/function-calling/{ollama.py => function-calling-ollama.py} (100%) rename examples/function-calling/{openai-responses-http.py => function-calling-openai-responses-http.py} (100%) rename examples/function-calling/{openai-responses-video-http.py => function-calling-openai-responses-video-http.py} (100%) rename examples/function-calling/{openai-responses-video.py => function-calling-openai-responses-video.py} (100%) rename examples/function-calling/{openai-responses.py => function-calling-openai-responses.py} (100%) rename examples/function-calling/{openai-video.py => function-calling-openai-video.py} (100%) rename examples/function-calling/{openai.py => function-calling-openai.py} (100%) rename examples/function-calling/{openrouter.py => function-calling-openrouter.py} (100%) rename examples/function-calling/{perplexity.py => function-calling-perplexity.py} (100%) rename examples/function-calling/{qwen.py => function-calling-qwen.py} (100%) rename examples/function-calling/{sambanova.py => function-calling-sambanova.py} (100%) rename examples/function-calling/{sarvam.py => function-calling-sarvam.py} (100%) rename examples/function-calling/{together.py => function-calling-together.py} (100%) rename examples/{thinking-and-mcp/multiple-mcp.py => mcp/mcp-multiple-mcp.py} (100%) rename examples/{thinking-and-mcp => mcp}/mcp-stdio.py (100%) rename examples/{thinking-and-mcp => mcp}/mcp-streamable-http-gemini-live.py (100%) rename examples/{thinking-and-mcp => mcp}/mcp-streamable-http.py (100%) rename examples/observability/{heartbeats.py => observability-heartbeats.py} (100%) rename examples/observability/{observer.py => observability-observer.py} (100%) rename examples/observability/{sentry-metrics.py => observability-sentry-metrics.py} (100%) rename examples/persistent-context/{anthropic.py => persistent-context-anthropic.py} (100%) rename examples/persistent-context/{aws-nova-sonic.py => persistent-context-aws-nova-sonic.py} (100%) rename examples/persistent-context/{gemini.py => persistent-context-gemini.py} (100%) rename examples/persistent-context/{grok-realtime.py => persistent-context-grok-realtime.py} (100%) rename examples/persistent-context/{openai-realtime.py => persistent-context-openai-realtime.py} (100%) rename examples/persistent-context/{openai-responses-http.py => persistent-context-openai-responses-http.py} (100%) rename examples/persistent-context/{openai-responses.py => persistent-context-openai-responses.py} (100%) rename examples/persistent-context/{openai.py => persistent-context-openai.py} (100%) rename examples/rag/{gemini-grounding-metadata.py => rag-gemini-grounding-metadata.py} (100%) rename examples/rag/{gemini-rag.py => rag-gemini.py} (100%) rename examples/rag/{mem0.py => rag-mem0.py} (100%) rename examples/realtime/{aws-nova-sonic.py => realtime-aws-nova-sonic.py} (100%) rename examples/realtime/{azure.py => realtime-azure.py} (100%) rename examples/realtime/{gemini-live-files-api.py => realtime-gemini-live-files-api.py} (100%) rename examples/realtime/{gemini-live-function-calling.py => realtime-gemini-live-function-calling.py} (100%) rename examples/realtime/{gemini-live-google-search.py => realtime-gemini-live-google-search.py} (100%) rename examples/realtime/{gemini-live-graceful-end.py => realtime-gemini-live-graceful-end.py} (100%) rename examples/realtime/{gemini-live-grounding-metadata.py => realtime-gemini-live-grounding-metadata.py} (100%) rename examples/realtime/{gemini-live-local-vad.py => realtime-gemini-live-local-vad.py} (100%) rename examples/realtime/{gemini-live-vertex-function-calling.py => realtime-gemini-live-vertex-function-calling.py} (100%) rename examples/realtime/{gemini-live-video.py => realtime-gemini-live-video.py} (100%) rename examples/realtime/{gemini-live.py => realtime-gemini-live.py} (100%) rename examples/realtime/{grok.py => realtime-grok.py} (100%) rename examples/realtime/{openai-live-video.py => realtime-openai-live-video.py} (100%) rename examples/realtime/{openai-text.py => realtime-openai-text.py} (100%) rename examples/realtime/{openai.py => realtime-openai.py} (100%) rename examples/realtime/{ultravox-text.py => realtime-ultravox-text.py} (100%) rename examples/realtime/{ultravox.py => realtime-ultravox.py} (100%) rename examples/{thinking-and-mcp => thinking}/thinking-anthropic.py (100%) rename examples/{thinking-and-mcp => thinking}/thinking-functions-anthropic.py (100%) rename examples/{thinking-and-mcp => thinking}/thinking-functions-google.py (100%) rename examples/{thinking-and-mcp => thinking}/thinking-google.py (100%) rename examples/transcription/{assemblyai.py => transcription-assemblyai.py} (100%) rename examples/transcription/{azure.py => transcription-azure.py} (100%) rename examples/transcription/{cartesia.py => transcription-cartesia.py} (100%) rename examples/transcription/{deepgram.py => transcription-deepgram.py} (100%) rename examples/transcription/{elevenlabs.py => transcription-elevenlabs.py} (100%) rename examples/transcription/{gladia-translation.py => transcription-gladia-translation.py} (100%) rename examples/transcription/{gladia-transcription.py => transcription-gladia.py} (100%) rename examples/transcription/{google-llm.py => transcription-google-llm.py} (100%) rename examples/transcription/{gradium.py => transcription-gradium.py} (100%) rename examples/transcription/{openai.py => transcription-openai.py} (100%) rename examples/transcription/{soniox.py => transcription-soniox.py} (100%) rename examples/transcription/{speechmatics.py => transcription-speechmatics.py} (100%) rename examples/transcription/{whisper-local.py => transcription-whisper-local.py} (100%) rename examples/transcription/{whisper-mlx.py => transcription-whisper-mlx.py} (100%) rename examples/transcription/{whisper.py => transcription-whisper.py} (100%) rename examples/transports/{daily.py => transports-daily.py} (100%) rename examples/transports/{livekit.py => transports-livekit.py} (100%) rename examples/transports/{small-webrtc.py => transports-small-webrtc.py} (100%) rename examples/turn-management/{detect-user-idle.py => turn-management-detect-user-idle.py} (100%) rename examples/turn-management/{filter-incomplete-turns.py => turn-management-filter-incomplete-turns.py} (100%) rename examples/turn-management/{interruption-config.py => turn-management-interruption-config.py} (100%) rename examples/turn-management/{smart-turn-local-coreml.py => turn-management-smart-turn-local-coreml.py} (100%) rename examples/turn-management/{smart-turn-local.py => turn-management-smart-turn-local.py} (100%) rename examples/turn-management/{turn-tracking-observer.py => turn-management-turn-tracking-observer.py} (100%) rename examples/turn-management/{user-assistant-turns.py => turn-management-user-assistant-turns.py} (100%) rename examples/turn-management/{user-mute-strategy.py => turn-management-user-mute-strategy.py} (100%) rename examples/update-settings/llm/{anthropic.py => llm-anthropic.py} (100%) rename examples/update-settings/llm/{aws-bedrock.py => llm-aws-bedrock.py} (100%) rename examples/update-settings/llm/{aws-nova-sonic.py => llm-aws-nova-sonic.py} (100%) rename examples/update-settings/llm/{azure-realtime.py => llm-azure-realtime.py} (100%) rename examples/update-settings/llm/{azure.py => llm-azure.py} (100%) rename examples/update-settings/llm/{cerebras.py => llm-cerebras.py} (100%) rename examples/update-settings/llm/{deepseek.py => llm-deepseek.py} (100%) rename examples/update-settings/llm/{fireworks.py => llm-fireworks.py} (100%) rename examples/update-settings/llm/{gemini-live-vertex.py => llm-gemini-live-vertex.py} (100%) rename examples/update-settings/llm/{gemini-live.py => llm-gemini-live.py} (100%) rename examples/update-settings/llm/{google-vertex.py => llm-google-vertex.py} (100%) rename examples/update-settings/llm/{google.py => llm-google.py} (100%) rename examples/update-settings/llm/{grok-realtime.py => llm-grok-realtime.py} (100%) rename examples/update-settings/llm/{grok.py => llm-grok.py} (100%) rename examples/update-settings/llm/{groq.py => llm-groq.py} (100%) rename examples/update-settings/llm/{mistral.py => llm-mistral.py} (100%) rename examples/update-settings/llm/{nvidia.py => llm-nvidia.py} (100%) rename examples/update-settings/llm/{ollama.py => llm-ollama.py} (100%) rename examples/update-settings/llm/{openai-realtime.py => llm-openai-realtime.py} (100%) rename examples/update-settings/llm/{openai-responses-http.py => llm-openai-responses-http.py} (100%) rename examples/update-settings/llm/{openai-responses.py => llm-openai-responses.py} (100%) rename examples/update-settings/llm/{openai.py => llm-openai.py} (100%) rename examples/update-settings/llm/{openrouter.py => llm-openrouter.py} (100%) rename examples/update-settings/llm/{perplexity.py => llm-perplexity.py} (100%) rename examples/update-settings/llm/{qwen.py => llm-qwen.py} (100%) rename examples/update-settings/llm/{sambanova.py => llm-sambanova.py} (100%) rename examples/update-settings/llm/{sarvam.py => llm-sarvam.py} (100%) rename examples/update-settings/llm/{together.py => llm-together.py} (100%) rename examples/update-settings/llm/{ultravox-realtime.py => llm-ultravox-realtime.py} (100%) rename examples/update-settings/stt/{assemblyai.py => stt-assemblyai.py} (100%) rename examples/update-settings/stt/{aws-transcribe.py => stt-aws-transcribe.py} (100%) rename examples/update-settings/stt/{azure.py => stt-azure.py} (100%) rename examples/update-settings/stt/{cartesia.py => stt-cartesia.py} (100%) rename examples/update-settings/stt/{deepgram-flux.py => stt-deepgram-flux.py} (100%) rename examples/update-settings/stt/{deepgram-sagemaker.py => stt-deepgram-sagemaker.py} (100%) rename examples/update-settings/stt/{deepgram.py => stt-deepgram.py} (100%) rename examples/update-settings/stt/{elevenlabs-realtime.py => stt-elevenlabs-realtime.py} (100%) rename examples/update-settings/stt/{elevenlabs.py => stt-elevenlabs.py} (100%) rename examples/update-settings/stt/{fal.py => stt-fal.py} (100%) rename examples/update-settings/stt/{gladia.py => stt-gladia.py} (100%) rename examples/update-settings/stt/{google.py => stt-google.py} (100%) rename examples/update-settings/stt/{gradium.py => stt-gradium.py} (100%) rename examples/update-settings/stt/{groq.py => stt-groq.py} (100%) rename examples/update-settings/stt/{nvidia-segmented.py => stt-nvidia-segmented.py} (100%) rename examples/update-settings/stt/{nvidia.py => stt-nvidia.py} (100%) rename examples/update-settings/stt/{openai-realtime.py => stt-openai-realtime.py} (100%) rename examples/update-settings/stt/{sarvam.py => stt-sarvam.py} (100%) rename examples/update-settings/stt/{soniox.py => stt-soniox.py} (100%) rename examples/update-settings/stt/{speechmatics.py => stt-speechmatics.py} (100%) rename examples/update-settings/stt/{whisper-api.py => stt-whisper-api.py} (100%) rename examples/update-settings/stt/{whisper-mlx.py => stt-whisper-mlx.py} (100%) rename examples/update-settings/stt/{whisper.py => stt-whisper.py} (100%) rename examples/update-settings/tts/{asyncai-http.py => tts-asyncai-http.py} (100%) rename examples/update-settings/tts/{asyncai.py => tts-asyncai.py} (100%) rename examples/update-settings/tts/{aws-polly.py => tts-aws-polly.py} (100%) rename examples/update-settings/tts/{azure-http.py => tts-azure-http.py} (100%) rename examples/update-settings/tts/{azure.py => tts-azure.py} (100%) rename examples/update-settings/tts/{camb.py => tts-camb.py} (100%) rename examples/update-settings/tts/{cartesia-http.py => tts-cartesia-http.py} (100%) rename examples/update-settings/tts/{cartesia.py => tts-cartesia.py} (100%) rename examples/update-settings/tts/{deepgram-http.py => tts-deepgram-http.py} (100%) rename examples/update-settings/tts/{deepgram-sagemaker.py => tts-deepgram-sagemaker.py} (100%) rename examples/update-settings/tts/{deepgram.py => tts-deepgram.py} (100%) rename examples/update-settings/tts/{elevenlabs-http.py => tts-elevenlabs-http.py} (100%) rename examples/update-settings/tts/{elevenlabs.py => tts-elevenlabs.py} (100%) rename examples/update-settings/tts/{fish.py => tts-fish.py} (100%) rename examples/update-settings/tts/{gemini.py => tts-gemini.py} (100%) rename examples/update-settings/tts/{google-http.py => tts-google-http.py} (100%) rename examples/update-settings/tts/{google-stream.py => tts-google-stream.py} (100%) rename examples/update-settings/tts/{gradium.py => tts-gradium.py} (100%) rename examples/update-settings/tts/{groq.py => tts-groq.py} (100%) rename examples/update-settings/tts/{hume.py => tts-hume.py} (100%) rename examples/update-settings/tts/{inworld-http.py => tts-inworld-http.py} (100%) rename examples/update-settings/tts/{inworld.py => tts-inworld.py} (100%) rename examples/update-settings/tts/{kokoro.py => tts-kokoro.py} (100%) rename examples/update-settings/tts/{lmnt.py => tts-lmnt.py} (100%) rename examples/update-settings/tts/{minimax.py => tts-minimax.py} (100%) rename examples/update-settings/tts/{neuphonic-http.py => tts-neuphonic-http.py} (100%) rename examples/update-settings/tts/{neuphonic.py => tts-neuphonic.py} (100%) rename examples/update-settings/tts/{nvidia.py => tts-nvidia.py} (100%) rename examples/update-settings/tts/{openai.py => tts-openai.py} (100%) rename examples/update-settings/tts/{piper-http.py => tts-piper-http.py} (100%) rename examples/update-settings/tts/{piper.py => tts-piper.py} (100%) rename examples/update-settings/tts/{resembleai.py => tts-resembleai.py} (100%) rename examples/update-settings/tts/{rime-http.py => tts-rime-http.py} (100%) rename examples/update-settings/tts/{rime.py => tts-rime.py} (100%) rename examples/update-settings/tts/{sarvam-http.py => tts-sarvam-http.py} (100%) rename examples/update-settings/tts/{sarvam.py => tts-sarvam.py} (100%) rename examples/update-settings/tts/{speechmatics.py => tts-speechmatics.py} (100%) rename examples/update-settings/tts/{xtts.py => tts-xtts.py} (100%) rename examples/video-avatar/{heygen-transport.py => video-avatar-heygen-transport.py} (100%) rename examples/video-avatar/{heygen-video-service.py => video-avatar-heygen-video-service.py} (100%) rename examples/video-avatar/{lemonslice-transport.py => video-avatar-lemonslice-transport.py} (100%) rename examples/video-avatar/{simli-layer.py => video-avatar-simli-layer.py} (100%) rename examples/video-avatar/{tavus-transport.py => video-avatar-tavus-transport.py} (100%) rename examples/video-avatar/{tavus-video-service.py => video-avatar-tavus-video-service.py} (100%) rename examples/video-processing/{custom-video-track.py => video-processing-custom-video-track.py} (100%) rename examples/video-processing/{gstreamer-filesrc.py => video-processing-gstreamer-filesrc.py} (100%) rename examples/video-processing/{gstreamer-videotestsrc.py => video-processing-gstreamer-videotestsrc.py} (100%) rename examples/video-processing/{local-mirror.py => video-processing-local-mirror.py} (100%) rename examples/video-processing/{mirror.py => video-processing-mirror.py} (100%) rename examples/vision/{anthropic.py => vision-anthropic.py} (100%) rename examples/vision/{aws.py => vision-aws.py} (100%) rename examples/vision/{gemini-flash.py => vision-gemini-flash.py} (100%) rename examples/vision/{moondream.py => vision-moondream.py} (100%) rename examples/vision/{openai-responses-http.py => vision-openai-responses-http.py} (100%) rename examples/vision/{openai-responses.py => vision-openai-responses.py} (100%) rename examples/vision/{openai.py => vision-openai.py} (100%) rename examples/voice/{aicoustics.py => voice-aicoustics.py} (100%) rename examples/voice/{assemblyai-turn-detection.py => voice-assemblyai-turn-detection.py} (100%) rename examples/voice/{assemblyai.py => voice-assemblyai.py} (100%) rename examples/voice/{asyncai-http.py => voice-asyncai-http.py} (100%) rename examples/voice/{asyncai.py => voice-asyncai.py} (100%) rename examples/voice/{aws-strands.py => voice-aws-strands.py} (100%) rename examples/voice/{aws.py => voice-aws.py} (100%) rename examples/voice/{azure-http.py => voice-azure-http.py} (100%) rename examples/voice/{azure.py => voice-azure.py} (100%) rename examples/voice/{camb.py => voice-camb.py} (100%) rename examples/voice/{cartesia-http.py => voice-cartesia-http.py} (100%) rename examples/voice/{cartesia.py => voice-cartesia.py} (100%) rename examples/voice/{deepgram-flux-sagemaker.py => voice-deepgram-flux-sagemaker.py} (100%) rename examples/voice/{deepgram-flux.py => voice-deepgram-flux.py} (100%) rename examples/voice/{deepgram-http.py => voice-deepgram-http.py} (100%) rename examples/voice/{deepgram-sagemaker.py => voice-deepgram-sagemaker.py} (100%) rename examples/voice/{deepgram-vad.py => voice-deepgram-vad.py} (100%) rename examples/voice/{deepgram.py => voice-deepgram.py} (100%) rename examples/voice/{elevenlabs-http.py => voice-elevenlabs-http.py} (100%) rename examples/voice/{elevenlabs.py => voice-elevenlabs.py} (100%) rename examples/voice/{fal.py => voice-fal.py} (100%) rename examples/voice/{fish.py => voice-fish.py} (100%) rename examples/voice/{gladia-vad.py => voice-gladia-vad.py} (100%) rename examples/voice/{gladia.py => voice-gladia.py} (100%) rename examples/voice/{google-audio-in.py => voice-google-audio-in.py} (100%) rename examples/voice/{google-gemini-tts.py => voice-google-gemini-tts.py} (100%) rename examples/voice/{google-http.py => voice-google-http.py} (100%) rename examples/voice/{google-image.py => voice-google-image.py} (100%) rename examples/voice/{google.py => voice-google.py} (100%) rename examples/voice/{gradium.py => voice-gradium.py} (100%) rename examples/voice/{groq.py => voice-groq.py} (100%) rename examples/voice/{hume.py => voice-hume.py} (100%) rename examples/voice/{inworld-http.py => voice-inworld-http.py} (100%) rename examples/voice/{inworld.py => voice-inworld.py} (100%) rename examples/voice/{kokoro.py => voice-kokoro.py} (100%) rename examples/voice/{krisp-viva.py => voice-krisp-viva.py} (100%) rename examples/voice/{langchain.py => voice-langchain.py} (100%) rename examples/voice/{lmnt.py => voice-lmnt.py} (100%) rename examples/voice/{minimax.py => voice-minimax.py} (100%) rename examples/voice/{neuphonic-http.py => voice-neuphonic-http.py} (100%) rename examples/voice/{neuphonic.py => voice-neuphonic.py} (100%) rename examples/voice/{nvidia.py => voice-nvidia.py} (100%) rename examples/voice/{openai-http.py => voice-openai-http.py} (100%) rename examples/voice/{openai-responses-http.py => voice-openai-responses-http.py} (100%) rename examples/voice/{openai-responses.py => voice-openai-responses.py} (100%) rename examples/voice/{openai.py => voice-openai.py} (100%) rename examples/voice/{piper.py => voice-piper.py} (100%) rename examples/voice/{resemble.py => voice-resemble.py} (100%) rename examples/voice/{rime-http.py => voice-rime-http.py} (100%) rename examples/voice/{rime.py => voice-rime.py} (100%) rename examples/voice/{sarvam-http.py => voice-sarvam-http.py} (100%) rename examples/voice/{sarvam.py => voice-sarvam.py} (100%) rename examples/voice/{smallest.py => voice-smallest.py} (100%) rename examples/voice/{soniox.py => voice-soniox.py} (100%) rename examples/voice/{speechmatics-vad.py => voice-speechmatics-vad.py} (100%) rename examples/voice/{speechmatics.py => voice-speechmatics.py} (100%) rename examples/voice/{xai.py => voice-xai.py} (100%) rename examples/voice/{xtts.py => voice-xtts.py} (100%) diff --git a/README.md b/README.md index af165a94f..f900ff5d4 100644 --- a/README.md +++ b/README.md @@ -80,7 +80,7 @@ Catch new features, interviews, and how-tos on our [Pipecat TV](https://www.yout
  - +

## 🧩 Available services diff --git a/examples/audio/bot-background-sound.py b/examples/audio/audio-bot-background-sound.py similarity index 100% rename from examples/audio/bot-background-sound.py rename to examples/audio/audio-bot-background-sound.py diff --git a/examples/audio/sound-effects.py b/examples/audio/audio-sound-effects.py similarity index 100% rename from examples/audio/sound-effects.py rename to examples/audio/audio-sound-effects.py diff --git a/examples/context-summarization/dedicated-llm.py b/examples/context-summarization/context-summarization-dedicated-llm.py similarity index 100% rename from examples/context-summarization/dedicated-llm.py rename to examples/context-summarization/context-summarization-dedicated-llm.py diff --git a/examples/context-summarization/google.py b/examples/context-summarization/context-summarization-google.py similarity index 100% rename from examples/context-summarization/google.py rename to examples/context-summarization/context-summarization-google.py diff --git a/examples/context-summarization/manual-openai.py b/examples/context-summarization/context-summarization-manual-openai.py similarity index 100% rename from examples/context-summarization/manual-openai.py rename to examples/context-summarization/context-summarization-manual-openai.py diff --git a/examples/context-summarization/openai.py b/examples/context-summarization/context-summarization-openai.py similarity index 100% rename from examples/context-summarization/openai.py rename to examples/context-summarization/context-summarization-openai.py diff --git a/examples/features/before-and-after-events.py b/examples/features/features-before-and-after-events.py similarity index 100% rename from examples/features/before-and-after-events.py rename to examples/features/features-before-and-after-events.py diff --git a/examples/features/concurrent-llm-evaluation.py b/examples/features/features-concurrent-llm-evaluation.py similarity index 100% rename from examples/features/concurrent-llm-evaluation.py rename to examples/features/features-concurrent-llm-evaluation.py diff --git a/examples/features/concurrent-llm-rtvi-ignored-sources.py b/examples/features/features-concurrent-llm-rtvi-ignored-sources.py similarity index 100% rename from examples/features/concurrent-llm-rtvi-ignored-sources.py rename to examples/features/features-concurrent-llm-rtvi-ignored-sources.py diff --git a/examples/features/custom-frame-processor.py b/examples/features/features-custom-frame-processor.py similarity index 100% rename from examples/features/custom-frame-processor.py rename to examples/features/features-custom-frame-processor.py diff --git a/examples/features/gpu-container-local-bot.py b/examples/features/features-gpu-container-local-bot.py similarity index 100% rename from examples/features/gpu-container-local-bot.py rename to examples/features/features-gpu-container-local-bot.py diff --git a/examples/features/live-translation.py b/examples/features/features-live-translation.py similarity index 100% rename from examples/features/live-translation.py rename to examples/features/features-live-translation.py diff --git a/examples/features/pattern-pair-voice-switching.py b/examples/features/features-pattern-pair-voice-switching.py similarity index 100% rename from examples/features/pattern-pair-voice-switching.py rename to examples/features/features-pattern-pair-voice-switching.py diff --git a/examples/features/service-switcher.py b/examples/features/features-service-switcher.py similarity index 100% rename from examples/features/service-switcher.py rename to examples/features/features-service-switcher.py diff --git a/examples/features/switch-languages.py b/examples/features/features-switch-languages.py similarity index 100% rename from examples/features/switch-languages.py rename to examples/features/features-switch-languages.py diff --git a/examples/features/switch-voices.py b/examples/features/features-switch-voices.py similarity index 100% rename from examples/features/switch-voices.py rename to examples/features/features-switch-voices.py diff --git a/examples/features/user-email-gathering.py b/examples/features/features-user-email-gathering.py similarity index 100% rename from examples/features/user-email-gathering.py rename to examples/features/features-user-email-gathering.py diff --git a/examples/features/voicemail-detection.py b/examples/features/features-voicemail-detection.py similarity index 100% rename from examples/features/voicemail-detection.py rename to examples/features/features-voicemail-detection.py diff --git a/examples/features/wake-phrase.py b/examples/features/features-wake-phrase.py similarity index 100% rename from examples/features/wake-phrase.py rename to examples/features/features-wake-phrase.py diff --git a/examples/function-calling/anthropic-video.py b/examples/function-calling/function-calling-anthropic-video.py similarity index 100% rename from examples/function-calling/anthropic-video.py rename to examples/function-calling/function-calling-anthropic-video.py diff --git a/examples/function-calling/anthropic.py b/examples/function-calling/function-calling-anthropic.py similarity index 100% rename from examples/function-calling/anthropic.py rename to examples/function-calling/function-calling-anthropic.py diff --git a/examples/function-calling/aws-video.py b/examples/function-calling/function-calling-aws-video.py similarity index 100% rename from examples/function-calling/aws-video.py rename to examples/function-calling/function-calling-aws-video.py diff --git a/examples/function-calling/aws.py b/examples/function-calling/function-calling-aws.py similarity index 100% rename from examples/function-calling/aws.py rename to examples/function-calling/function-calling-aws.py diff --git a/examples/function-calling/azure.py b/examples/function-calling/function-calling-azure.py similarity index 100% rename from examples/function-calling/azure.py rename to examples/function-calling/function-calling-azure.py diff --git a/examples/function-calling/cerebras.py b/examples/function-calling/function-calling-cerebras.py similarity index 100% rename from examples/function-calling/cerebras.py rename to examples/function-calling/function-calling-cerebras.py diff --git a/examples/function-calling/deepseek.py b/examples/function-calling/function-calling-deepseek.py similarity index 100% rename from examples/function-calling/deepseek.py rename to examples/function-calling/function-calling-deepseek.py diff --git a/examples/function-calling/direct.py b/examples/function-calling/function-calling-direct.py similarity index 100% rename from examples/function-calling/direct.py rename to examples/function-calling/function-calling-direct.py diff --git a/examples/function-calling/fireworks.py b/examples/function-calling/function-calling-fireworks.py similarity index 100% rename from examples/function-calling/fireworks.py rename to examples/function-calling/function-calling-fireworks.py diff --git a/examples/function-calling/google-vertex.py b/examples/function-calling/function-calling-google-vertex.py similarity index 100% rename from examples/function-calling/google-vertex.py rename to examples/function-calling/function-calling-google-vertex.py diff --git a/examples/function-calling/google-video.py b/examples/function-calling/function-calling-google-video.py similarity index 100% rename from examples/function-calling/google-video.py rename to examples/function-calling/function-calling-google-video.py diff --git a/examples/function-calling/google.py b/examples/function-calling/function-calling-google.py similarity index 100% rename from examples/function-calling/google.py rename to examples/function-calling/function-calling-google.py diff --git a/examples/function-calling/grok.py b/examples/function-calling/function-calling-grok.py similarity index 100% rename from examples/function-calling/grok.py rename to examples/function-calling/function-calling-grok.py diff --git a/examples/function-calling/groq.py b/examples/function-calling/function-calling-groq.py similarity index 100% rename from examples/function-calling/groq.py rename to examples/function-calling/function-calling-groq.py diff --git a/examples/function-calling/mistral.py b/examples/function-calling/function-calling-mistral.py similarity index 100% rename from examples/function-calling/mistral.py rename to examples/function-calling/function-calling-mistral.py diff --git a/examples/function-calling/moondream-video.py b/examples/function-calling/function-calling-moondream-video.py similarity index 100% rename from examples/function-calling/moondream-video.py rename to examples/function-calling/function-calling-moondream-video.py diff --git a/examples/function-calling/nebius.py b/examples/function-calling/function-calling-nebius.py similarity index 100% rename from examples/function-calling/nebius.py rename to examples/function-calling/function-calling-nebius.py diff --git a/examples/function-calling/novita.py b/examples/function-calling/function-calling-novita.py similarity index 100% rename from examples/function-calling/novita.py rename to examples/function-calling/function-calling-novita.py diff --git a/examples/function-calling/nvidia.py b/examples/function-calling/function-calling-nvidia.py similarity index 100% rename from examples/function-calling/nvidia.py rename to examples/function-calling/function-calling-nvidia.py diff --git a/examples/function-calling/ollama.py b/examples/function-calling/function-calling-ollama.py similarity index 100% rename from examples/function-calling/ollama.py rename to examples/function-calling/function-calling-ollama.py diff --git a/examples/function-calling/openai-responses-http.py b/examples/function-calling/function-calling-openai-responses-http.py similarity index 100% rename from examples/function-calling/openai-responses-http.py rename to examples/function-calling/function-calling-openai-responses-http.py diff --git a/examples/function-calling/openai-responses-video-http.py b/examples/function-calling/function-calling-openai-responses-video-http.py similarity index 100% rename from examples/function-calling/openai-responses-video-http.py rename to examples/function-calling/function-calling-openai-responses-video-http.py diff --git a/examples/function-calling/openai-responses-video.py b/examples/function-calling/function-calling-openai-responses-video.py similarity index 100% rename from examples/function-calling/openai-responses-video.py rename to examples/function-calling/function-calling-openai-responses-video.py diff --git a/examples/function-calling/openai-responses.py b/examples/function-calling/function-calling-openai-responses.py similarity index 100% rename from examples/function-calling/openai-responses.py rename to examples/function-calling/function-calling-openai-responses.py diff --git a/examples/function-calling/openai-video.py b/examples/function-calling/function-calling-openai-video.py similarity index 100% rename from examples/function-calling/openai-video.py rename to examples/function-calling/function-calling-openai-video.py diff --git a/examples/function-calling/openai.py b/examples/function-calling/function-calling-openai.py similarity index 100% rename from examples/function-calling/openai.py rename to examples/function-calling/function-calling-openai.py diff --git a/examples/function-calling/openrouter.py b/examples/function-calling/function-calling-openrouter.py similarity index 100% rename from examples/function-calling/openrouter.py rename to examples/function-calling/function-calling-openrouter.py diff --git a/examples/function-calling/perplexity.py b/examples/function-calling/function-calling-perplexity.py similarity index 100% rename from examples/function-calling/perplexity.py rename to examples/function-calling/function-calling-perplexity.py diff --git a/examples/function-calling/qwen.py b/examples/function-calling/function-calling-qwen.py similarity index 100% rename from examples/function-calling/qwen.py rename to examples/function-calling/function-calling-qwen.py diff --git a/examples/function-calling/sambanova.py b/examples/function-calling/function-calling-sambanova.py similarity index 100% rename from examples/function-calling/sambanova.py rename to examples/function-calling/function-calling-sambanova.py diff --git a/examples/function-calling/sarvam.py b/examples/function-calling/function-calling-sarvam.py similarity index 100% rename from examples/function-calling/sarvam.py rename to examples/function-calling/function-calling-sarvam.py diff --git a/examples/function-calling/together.py b/examples/function-calling/function-calling-together.py similarity index 100% rename from examples/function-calling/together.py rename to examples/function-calling/function-calling-together.py diff --git a/examples/thinking-and-mcp/multiple-mcp.py b/examples/mcp/mcp-multiple-mcp.py similarity index 100% rename from examples/thinking-and-mcp/multiple-mcp.py rename to examples/mcp/mcp-multiple-mcp.py diff --git a/examples/thinking-and-mcp/mcp-stdio.py b/examples/mcp/mcp-stdio.py similarity index 100% rename from examples/thinking-and-mcp/mcp-stdio.py rename to examples/mcp/mcp-stdio.py diff --git a/examples/thinking-and-mcp/mcp-streamable-http-gemini-live.py b/examples/mcp/mcp-streamable-http-gemini-live.py similarity index 100% rename from examples/thinking-and-mcp/mcp-streamable-http-gemini-live.py rename to examples/mcp/mcp-streamable-http-gemini-live.py diff --git a/examples/thinking-and-mcp/mcp-streamable-http.py b/examples/mcp/mcp-streamable-http.py similarity index 100% rename from examples/thinking-and-mcp/mcp-streamable-http.py rename to examples/mcp/mcp-streamable-http.py diff --git a/examples/observability/heartbeats.py b/examples/observability/observability-heartbeats.py similarity index 100% rename from examples/observability/heartbeats.py rename to examples/observability/observability-heartbeats.py diff --git a/examples/observability/observer.py b/examples/observability/observability-observer.py similarity index 100% rename from examples/observability/observer.py rename to examples/observability/observability-observer.py diff --git a/examples/observability/sentry-metrics.py b/examples/observability/observability-sentry-metrics.py similarity index 100% rename from examples/observability/sentry-metrics.py rename to examples/observability/observability-sentry-metrics.py diff --git a/examples/persistent-context/anthropic.py b/examples/persistent-context/persistent-context-anthropic.py similarity index 100% rename from examples/persistent-context/anthropic.py rename to examples/persistent-context/persistent-context-anthropic.py diff --git a/examples/persistent-context/aws-nova-sonic.py b/examples/persistent-context/persistent-context-aws-nova-sonic.py similarity index 100% rename from examples/persistent-context/aws-nova-sonic.py rename to examples/persistent-context/persistent-context-aws-nova-sonic.py diff --git a/examples/persistent-context/gemini.py b/examples/persistent-context/persistent-context-gemini.py similarity index 100% rename from examples/persistent-context/gemini.py rename to examples/persistent-context/persistent-context-gemini.py diff --git a/examples/persistent-context/grok-realtime.py b/examples/persistent-context/persistent-context-grok-realtime.py similarity index 100% rename from examples/persistent-context/grok-realtime.py rename to examples/persistent-context/persistent-context-grok-realtime.py diff --git a/examples/persistent-context/openai-realtime.py b/examples/persistent-context/persistent-context-openai-realtime.py similarity index 100% rename from examples/persistent-context/openai-realtime.py rename to examples/persistent-context/persistent-context-openai-realtime.py diff --git a/examples/persistent-context/openai-responses-http.py b/examples/persistent-context/persistent-context-openai-responses-http.py similarity index 100% rename from examples/persistent-context/openai-responses-http.py rename to examples/persistent-context/persistent-context-openai-responses-http.py diff --git a/examples/persistent-context/openai-responses.py b/examples/persistent-context/persistent-context-openai-responses.py similarity index 100% rename from examples/persistent-context/openai-responses.py rename to examples/persistent-context/persistent-context-openai-responses.py diff --git a/examples/persistent-context/openai.py b/examples/persistent-context/persistent-context-openai.py similarity index 100% rename from examples/persistent-context/openai.py rename to examples/persistent-context/persistent-context-openai.py diff --git a/examples/rag/gemini-grounding-metadata.py b/examples/rag/rag-gemini-grounding-metadata.py similarity index 100% rename from examples/rag/gemini-grounding-metadata.py rename to examples/rag/rag-gemini-grounding-metadata.py diff --git a/examples/rag/gemini-rag.py b/examples/rag/rag-gemini.py similarity index 100% rename from examples/rag/gemini-rag.py rename to examples/rag/rag-gemini.py diff --git a/examples/rag/mem0.py b/examples/rag/rag-mem0.py similarity index 100% rename from examples/rag/mem0.py rename to examples/rag/rag-mem0.py diff --git a/examples/realtime/aws-nova-sonic.py b/examples/realtime/realtime-aws-nova-sonic.py similarity index 100% rename from examples/realtime/aws-nova-sonic.py rename to examples/realtime/realtime-aws-nova-sonic.py diff --git a/examples/realtime/azure.py b/examples/realtime/realtime-azure.py similarity index 100% rename from examples/realtime/azure.py rename to examples/realtime/realtime-azure.py diff --git a/examples/realtime/gemini-live-files-api.py b/examples/realtime/realtime-gemini-live-files-api.py similarity index 100% rename from examples/realtime/gemini-live-files-api.py rename to examples/realtime/realtime-gemini-live-files-api.py diff --git a/examples/realtime/gemini-live-function-calling.py b/examples/realtime/realtime-gemini-live-function-calling.py similarity index 100% rename from examples/realtime/gemini-live-function-calling.py rename to examples/realtime/realtime-gemini-live-function-calling.py diff --git a/examples/realtime/gemini-live-google-search.py b/examples/realtime/realtime-gemini-live-google-search.py similarity index 100% rename from examples/realtime/gemini-live-google-search.py rename to examples/realtime/realtime-gemini-live-google-search.py diff --git a/examples/realtime/gemini-live-graceful-end.py b/examples/realtime/realtime-gemini-live-graceful-end.py similarity index 100% rename from examples/realtime/gemini-live-graceful-end.py rename to examples/realtime/realtime-gemini-live-graceful-end.py diff --git a/examples/realtime/gemini-live-grounding-metadata.py b/examples/realtime/realtime-gemini-live-grounding-metadata.py similarity index 100% rename from examples/realtime/gemini-live-grounding-metadata.py rename to examples/realtime/realtime-gemini-live-grounding-metadata.py diff --git a/examples/realtime/gemini-live-local-vad.py b/examples/realtime/realtime-gemini-live-local-vad.py similarity index 100% rename from examples/realtime/gemini-live-local-vad.py rename to examples/realtime/realtime-gemini-live-local-vad.py diff --git a/examples/realtime/gemini-live-vertex-function-calling.py b/examples/realtime/realtime-gemini-live-vertex-function-calling.py similarity index 100% rename from examples/realtime/gemini-live-vertex-function-calling.py rename to examples/realtime/realtime-gemini-live-vertex-function-calling.py diff --git a/examples/realtime/gemini-live-video.py b/examples/realtime/realtime-gemini-live-video.py similarity index 100% rename from examples/realtime/gemini-live-video.py rename to examples/realtime/realtime-gemini-live-video.py diff --git a/examples/realtime/gemini-live.py b/examples/realtime/realtime-gemini-live.py similarity index 100% rename from examples/realtime/gemini-live.py rename to examples/realtime/realtime-gemini-live.py diff --git a/examples/realtime/grok.py b/examples/realtime/realtime-grok.py similarity index 100% rename from examples/realtime/grok.py rename to examples/realtime/realtime-grok.py diff --git a/examples/realtime/openai-live-video.py b/examples/realtime/realtime-openai-live-video.py similarity index 100% rename from examples/realtime/openai-live-video.py rename to examples/realtime/realtime-openai-live-video.py diff --git a/examples/realtime/openai-text.py b/examples/realtime/realtime-openai-text.py similarity index 100% rename from examples/realtime/openai-text.py rename to examples/realtime/realtime-openai-text.py diff --git a/examples/realtime/openai.py b/examples/realtime/realtime-openai.py similarity index 100% rename from examples/realtime/openai.py rename to examples/realtime/realtime-openai.py diff --git a/examples/realtime/ultravox-text.py b/examples/realtime/realtime-ultravox-text.py similarity index 100% rename from examples/realtime/ultravox-text.py rename to examples/realtime/realtime-ultravox-text.py diff --git a/examples/realtime/ultravox.py b/examples/realtime/realtime-ultravox.py similarity index 100% rename from examples/realtime/ultravox.py rename to examples/realtime/realtime-ultravox.py diff --git a/examples/thinking-and-mcp/thinking-anthropic.py b/examples/thinking/thinking-anthropic.py similarity index 100% rename from examples/thinking-and-mcp/thinking-anthropic.py rename to examples/thinking/thinking-anthropic.py diff --git a/examples/thinking-and-mcp/thinking-functions-anthropic.py b/examples/thinking/thinking-functions-anthropic.py similarity index 100% rename from examples/thinking-and-mcp/thinking-functions-anthropic.py rename to examples/thinking/thinking-functions-anthropic.py diff --git a/examples/thinking-and-mcp/thinking-functions-google.py b/examples/thinking/thinking-functions-google.py similarity index 100% rename from examples/thinking-and-mcp/thinking-functions-google.py rename to examples/thinking/thinking-functions-google.py diff --git a/examples/thinking-and-mcp/thinking-google.py b/examples/thinking/thinking-google.py similarity index 100% rename from examples/thinking-and-mcp/thinking-google.py rename to examples/thinking/thinking-google.py diff --git a/examples/transcription/assemblyai.py b/examples/transcription/transcription-assemblyai.py similarity index 100% rename from examples/transcription/assemblyai.py rename to examples/transcription/transcription-assemblyai.py diff --git a/examples/transcription/azure.py b/examples/transcription/transcription-azure.py similarity index 100% rename from examples/transcription/azure.py rename to examples/transcription/transcription-azure.py diff --git a/examples/transcription/cartesia.py b/examples/transcription/transcription-cartesia.py similarity index 100% rename from examples/transcription/cartesia.py rename to examples/transcription/transcription-cartesia.py diff --git a/examples/transcription/deepgram.py b/examples/transcription/transcription-deepgram.py similarity index 100% rename from examples/transcription/deepgram.py rename to examples/transcription/transcription-deepgram.py diff --git a/examples/transcription/elevenlabs.py b/examples/transcription/transcription-elevenlabs.py similarity index 100% rename from examples/transcription/elevenlabs.py rename to examples/transcription/transcription-elevenlabs.py diff --git a/examples/transcription/gladia-translation.py b/examples/transcription/transcription-gladia-translation.py similarity index 100% rename from examples/transcription/gladia-translation.py rename to examples/transcription/transcription-gladia-translation.py diff --git a/examples/transcription/gladia-transcription.py b/examples/transcription/transcription-gladia.py similarity index 100% rename from examples/transcription/gladia-transcription.py rename to examples/transcription/transcription-gladia.py diff --git a/examples/transcription/google-llm.py b/examples/transcription/transcription-google-llm.py similarity index 100% rename from examples/transcription/google-llm.py rename to examples/transcription/transcription-google-llm.py diff --git a/examples/transcription/gradium.py b/examples/transcription/transcription-gradium.py similarity index 100% rename from examples/transcription/gradium.py rename to examples/transcription/transcription-gradium.py diff --git a/examples/transcription/openai.py b/examples/transcription/transcription-openai.py similarity index 100% rename from examples/transcription/openai.py rename to examples/transcription/transcription-openai.py diff --git a/examples/transcription/soniox.py b/examples/transcription/transcription-soniox.py similarity index 100% rename from examples/transcription/soniox.py rename to examples/transcription/transcription-soniox.py diff --git a/examples/transcription/speechmatics.py b/examples/transcription/transcription-speechmatics.py similarity index 100% rename from examples/transcription/speechmatics.py rename to examples/transcription/transcription-speechmatics.py diff --git a/examples/transcription/whisper-local.py b/examples/transcription/transcription-whisper-local.py similarity index 100% rename from examples/transcription/whisper-local.py rename to examples/transcription/transcription-whisper-local.py diff --git a/examples/transcription/whisper-mlx.py b/examples/transcription/transcription-whisper-mlx.py similarity index 100% rename from examples/transcription/whisper-mlx.py rename to examples/transcription/transcription-whisper-mlx.py diff --git a/examples/transcription/whisper.py b/examples/transcription/transcription-whisper.py similarity index 100% rename from examples/transcription/whisper.py rename to examples/transcription/transcription-whisper.py diff --git a/examples/transports/daily.py b/examples/transports/transports-daily.py similarity index 100% rename from examples/transports/daily.py rename to examples/transports/transports-daily.py diff --git a/examples/transports/livekit.py b/examples/transports/transports-livekit.py similarity index 100% rename from examples/transports/livekit.py rename to examples/transports/transports-livekit.py diff --git a/examples/transports/small-webrtc.py b/examples/transports/transports-small-webrtc.py similarity index 100% rename from examples/transports/small-webrtc.py rename to examples/transports/transports-small-webrtc.py diff --git a/examples/turn-management/detect-user-idle.py b/examples/turn-management/turn-management-detect-user-idle.py similarity index 100% rename from examples/turn-management/detect-user-idle.py rename to examples/turn-management/turn-management-detect-user-idle.py diff --git a/examples/turn-management/filter-incomplete-turns.py b/examples/turn-management/turn-management-filter-incomplete-turns.py similarity index 100% rename from examples/turn-management/filter-incomplete-turns.py rename to examples/turn-management/turn-management-filter-incomplete-turns.py diff --git a/examples/turn-management/interruption-config.py b/examples/turn-management/turn-management-interruption-config.py similarity index 100% rename from examples/turn-management/interruption-config.py rename to examples/turn-management/turn-management-interruption-config.py diff --git a/examples/turn-management/smart-turn-local-coreml.py b/examples/turn-management/turn-management-smart-turn-local-coreml.py similarity index 100% rename from examples/turn-management/smart-turn-local-coreml.py rename to examples/turn-management/turn-management-smart-turn-local-coreml.py diff --git a/examples/turn-management/smart-turn-local.py b/examples/turn-management/turn-management-smart-turn-local.py similarity index 100% rename from examples/turn-management/smart-turn-local.py rename to examples/turn-management/turn-management-smart-turn-local.py diff --git a/examples/turn-management/turn-tracking-observer.py b/examples/turn-management/turn-management-turn-tracking-observer.py similarity index 100% rename from examples/turn-management/turn-tracking-observer.py rename to examples/turn-management/turn-management-turn-tracking-observer.py diff --git a/examples/turn-management/user-assistant-turns.py b/examples/turn-management/turn-management-user-assistant-turns.py similarity index 100% rename from examples/turn-management/user-assistant-turns.py rename to examples/turn-management/turn-management-user-assistant-turns.py diff --git a/examples/turn-management/user-mute-strategy.py b/examples/turn-management/turn-management-user-mute-strategy.py similarity index 100% rename from examples/turn-management/user-mute-strategy.py rename to examples/turn-management/turn-management-user-mute-strategy.py diff --git a/examples/update-settings/llm/anthropic.py b/examples/update-settings/llm/llm-anthropic.py similarity index 100% rename from examples/update-settings/llm/anthropic.py rename to examples/update-settings/llm/llm-anthropic.py diff --git a/examples/update-settings/llm/aws-bedrock.py b/examples/update-settings/llm/llm-aws-bedrock.py similarity index 100% rename from examples/update-settings/llm/aws-bedrock.py rename to examples/update-settings/llm/llm-aws-bedrock.py diff --git a/examples/update-settings/llm/aws-nova-sonic.py b/examples/update-settings/llm/llm-aws-nova-sonic.py similarity index 100% rename from examples/update-settings/llm/aws-nova-sonic.py rename to examples/update-settings/llm/llm-aws-nova-sonic.py diff --git a/examples/update-settings/llm/azure-realtime.py b/examples/update-settings/llm/llm-azure-realtime.py similarity index 100% rename from examples/update-settings/llm/azure-realtime.py rename to examples/update-settings/llm/llm-azure-realtime.py diff --git a/examples/update-settings/llm/azure.py b/examples/update-settings/llm/llm-azure.py similarity index 100% rename from examples/update-settings/llm/azure.py rename to examples/update-settings/llm/llm-azure.py diff --git a/examples/update-settings/llm/cerebras.py b/examples/update-settings/llm/llm-cerebras.py similarity index 100% rename from examples/update-settings/llm/cerebras.py rename to examples/update-settings/llm/llm-cerebras.py diff --git a/examples/update-settings/llm/deepseek.py b/examples/update-settings/llm/llm-deepseek.py similarity index 100% rename from examples/update-settings/llm/deepseek.py rename to examples/update-settings/llm/llm-deepseek.py diff --git a/examples/update-settings/llm/fireworks.py b/examples/update-settings/llm/llm-fireworks.py similarity index 100% rename from examples/update-settings/llm/fireworks.py rename to examples/update-settings/llm/llm-fireworks.py diff --git a/examples/update-settings/llm/gemini-live-vertex.py b/examples/update-settings/llm/llm-gemini-live-vertex.py similarity index 100% rename from examples/update-settings/llm/gemini-live-vertex.py rename to examples/update-settings/llm/llm-gemini-live-vertex.py diff --git a/examples/update-settings/llm/gemini-live.py b/examples/update-settings/llm/llm-gemini-live.py similarity index 100% rename from examples/update-settings/llm/gemini-live.py rename to examples/update-settings/llm/llm-gemini-live.py diff --git a/examples/update-settings/llm/google-vertex.py b/examples/update-settings/llm/llm-google-vertex.py similarity index 100% rename from examples/update-settings/llm/google-vertex.py rename to examples/update-settings/llm/llm-google-vertex.py diff --git a/examples/update-settings/llm/google.py b/examples/update-settings/llm/llm-google.py similarity index 100% rename from examples/update-settings/llm/google.py rename to examples/update-settings/llm/llm-google.py diff --git a/examples/update-settings/llm/grok-realtime.py b/examples/update-settings/llm/llm-grok-realtime.py similarity index 100% rename from examples/update-settings/llm/grok-realtime.py rename to examples/update-settings/llm/llm-grok-realtime.py diff --git a/examples/update-settings/llm/grok.py b/examples/update-settings/llm/llm-grok.py similarity index 100% rename from examples/update-settings/llm/grok.py rename to examples/update-settings/llm/llm-grok.py diff --git a/examples/update-settings/llm/groq.py b/examples/update-settings/llm/llm-groq.py similarity index 100% rename from examples/update-settings/llm/groq.py rename to examples/update-settings/llm/llm-groq.py diff --git a/examples/update-settings/llm/mistral.py b/examples/update-settings/llm/llm-mistral.py similarity index 100% rename from examples/update-settings/llm/mistral.py rename to examples/update-settings/llm/llm-mistral.py diff --git a/examples/update-settings/llm/nvidia.py b/examples/update-settings/llm/llm-nvidia.py similarity index 100% rename from examples/update-settings/llm/nvidia.py rename to examples/update-settings/llm/llm-nvidia.py diff --git a/examples/update-settings/llm/ollama.py b/examples/update-settings/llm/llm-ollama.py similarity index 100% rename from examples/update-settings/llm/ollama.py rename to examples/update-settings/llm/llm-ollama.py diff --git a/examples/update-settings/llm/openai-realtime.py b/examples/update-settings/llm/llm-openai-realtime.py similarity index 100% rename from examples/update-settings/llm/openai-realtime.py rename to examples/update-settings/llm/llm-openai-realtime.py diff --git a/examples/update-settings/llm/openai-responses-http.py b/examples/update-settings/llm/llm-openai-responses-http.py similarity index 100% rename from examples/update-settings/llm/openai-responses-http.py rename to examples/update-settings/llm/llm-openai-responses-http.py diff --git a/examples/update-settings/llm/openai-responses.py b/examples/update-settings/llm/llm-openai-responses.py similarity index 100% rename from examples/update-settings/llm/openai-responses.py rename to examples/update-settings/llm/llm-openai-responses.py diff --git a/examples/update-settings/llm/openai.py b/examples/update-settings/llm/llm-openai.py similarity index 100% rename from examples/update-settings/llm/openai.py rename to examples/update-settings/llm/llm-openai.py diff --git a/examples/update-settings/llm/openrouter.py b/examples/update-settings/llm/llm-openrouter.py similarity index 100% rename from examples/update-settings/llm/openrouter.py rename to examples/update-settings/llm/llm-openrouter.py diff --git a/examples/update-settings/llm/perplexity.py b/examples/update-settings/llm/llm-perplexity.py similarity index 100% rename from examples/update-settings/llm/perplexity.py rename to examples/update-settings/llm/llm-perplexity.py diff --git a/examples/update-settings/llm/qwen.py b/examples/update-settings/llm/llm-qwen.py similarity index 100% rename from examples/update-settings/llm/qwen.py rename to examples/update-settings/llm/llm-qwen.py diff --git a/examples/update-settings/llm/sambanova.py b/examples/update-settings/llm/llm-sambanova.py similarity index 100% rename from examples/update-settings/llm/sambanova.py rename to examples/update-settings/llm/llm-sambanova.py diff --git a/examples/update-settings/llm/sarvam.py b/examples/update-settings/llm/llm-sarvam.py similarity index 100% rename from examples/update-settings/llm/sarvam.py rename to examples/update-settings/llm/llm-sarvam.py diff --git a/examples/update-settings/llm/together.py b/examples/update-settings/llm/llm-together.py similarity index 100% rename from examples/update-settings/llm/together.py rename to examples/update-settings/llm/llm-together.py diff --git a/examples/update-settings/llm/ultravox-realtime.py b/examples/update-settings/llm/llm-ultravox-realtime.py similarity index 100% rename from examples/update-settings/llm/ultravox-realtime.py rename to examples/update-settings/llm/llm-ultravox-realtime.py diff --git a/examples/update-settings/stt/assemblyai.py b/examples/update-settings/stt/stt-assemblyai.py similarity index 100% rename from examples/update-settings/stt/assemblyai.py rename to examples/update-settings/stt/stt-assemblyai.py diff --git a/examples/update-settings/stt/aws-transcribe.py b/examples/update-settings/stt/stt-aws-transcribe.py similarity index 100% rename from examples/update-settings/stt/aws-transcribe.py rename to examples/update-settings/stt/stt-aws-transcribe.py diff --git a/examples/update-settings/stt/azure.py b/examples/update-settings/stt/stt-azure.py similarity index 100% rename from examples/update-settings/stt/azure.py rename to examples/update-settings/stt/stt-azure.py diff --git a/examples/update-settings/stt/cartesia.py b/examples/update-settings/stt/stt-cartesia.py similarity index 100% rename from examples/update-settings/stt/cartesia.py rename to examples/update-settings/stt/stt-cartesia.py diff --git a/examples/update-settings/stt/deepgram-flux.py b/examples/update-settings/stt/stt-deepgram-flux.py similarity index 100% rename from examples/update-settings/stt/deepgram-flux.py rename to examples/update-settings/stt/stt-deepgram-flux.py diff --git a/examples/update-settings/stt/deepgram-sagemaker.py b/examples/update-settings/stt/stt-deepgram-sagemaker.py similarity index 100% rename from examples/update-settings/stt/deepgram-sagemaker.py rename to examples/update-settings/stt/stt-deepgram-sagemaker.py diff --git a/examples/update-settings/stt/deepgram.py b/examples/update-settings/stt/stt-deepgram.py similarity index 100% rename from examples/update-settings/stt/deepgram.py rename to examples/update-settings/stt/stt-deepgram.py diff --git a/examples/update-settings/stt/elevenlabs-realtime.py b/examples/update-settings/stt/stt-elevenlabs-realtime.py similarity index 100% rename from examples/update-settings/stt/elevenlabs-realtime.py rename to examples/update-settings/stt/stt-elevenlabs-realtime.py diff --git a/examples/update-settings/stt/elevenlabs.py b/examples/update-settings/stt/stt-elevenlabs.py similarity index 100% rename from examples/update-settings/stt/elevenlabs.py rename to examples/update-settings/stt/stt-elevenlabs.py diff --git a/examples/update-settings/stt/fal.py b/examples/update-settings/stt/stt-fal.py similarity index 100% rename from examples/update-settings/stt/fal.py rename to examples/update-settings/stt/stt-fal.py diff --git a/examples/update-settings/stt/gladia.py b/examples/update-settings/stt/stt-gladia.py similarity index 100% rename from examples/update-settings/stt/gladia.py rename to examples/update-settings/stt/stt-gladia.py diff --git a/examples/update-settings/stt/google.py b/examples/update-settings/stt/stt-google.py similarity index 100% rename from examples/update-settings/stt/google.py rename to examples/update-settings/stt/stt-google.py diff --git a/examples/update-settings/stt/gradium.py b/examples/update-settings/stt/stt-gradium.py similarity index 100% rename from examples/update-settings/stt/gradium.py rename to examples/update-settings/stt/stt-gradium.py diff --git a/examples/update-settings/stt/groq.py b/examples/update-settings/stt/stt-groq.py similarity index 100% rename from examples/update-settings/stt/groq.py rename to examples/update-settings/stt/stt-groq.py diff --git a/examples/update-settings/stt/nvidia-segmented.py b/examples/update-settings/stt/stt-nvidia-segmented.py similarity index 100% rename from examples/update-settings/stt/nvidia-segmented.py rename to examples/update-settings/stt/stt-nvidia-segmented.py diff --git a/examples/update-settings/stt/nvidia.py b/examples/update-settings/stt/stt-nvidia.py similarity index 100% rename from examples/update-settings/stt/nvidia.py rename to examples/update-settings/stt/stt-nvidia.py diff --git a/examples/update-settings/stt/openai-realtime.py b/examples/update-settings/stt/stt-openai-realtime.py similarity index 100% rename from examples/update-settings/stt/openai-realtime.py rename to examples/update-settings/stt/stt-openai-realtime.py diff --git a/examples/update-settings/stt/sarvam.py b/examples/update-settings/stt/stt-sarvam.py similarity index 100% rename from examples/update-settings/stt/sarvam.py rename to examples/update-settings/stt/stt-sarvam.py diff --git a/examples/update-settings/stt/soniox.py b/examples/update-settings/stt/stt-soniox.py similarity index 100% rename from examples/update-settings/stt/soniox.py rename to examples/update-settings/stt/stt-soniox.py diff --git a/examples/update-settings/stt/speechmatics.py b/examples/update-settings/stt/stt-speechmatics.py similarity index 100% rename from examples/update-settings/stt/speechmatics.py rename to examples/update-settings/stt/stt-speechmatics.py diff --git a/examples/update-settings/stt/whisper-api.py b/examples/update-settings/stt/stt-whisper-api.py similarity index 100% rename from examples/update-settings/stt/whisper-api.py rename to examples/update-settings/stt/stt-whisper-api.py diff --git a/examples/update-settings/stt/whisper-mlx.py b/examples/update-settings/stt/stt-whisper-mlx.py similarity index 100% rename from examples/update-settings/stt/whisper-mlx.py rename to examples/update-settings/stt/stt-whisper-mlx.py diff --git a/examples/update-settings/stt/whisper.py b/examples/update-settings/stt/stt-whisper.py similarity index 100% rename from examples/update-settings/stt/whisper.py rename to examples/update-settings/stt/stt-whisper.py diff --git a/examples/update-settings/tts/asyncai-http.py b/examples/update-settings/tts/tts-asyncai-http.py similarity index 100% rename from examples/update-settings/tts/asyncai-http.py rename to examples/update-settings/tts/tts-asyncai-http.py diff --git a/examples/update-settings/tts/asyncai.py b/examples/update-settings/tts/tts-asyncai.py similarity index 100% rename from examples/update-settings/tts/asyncai.py rename to examples/update-settings/tts/tts-asyncai.py diff --git a/examples/update-settings/tts/aws-polly.py b/examples/update-settings/tts/tts-aws-polly.py similarity index 100% rename from examples/update-settings/tts/aws-polly.py rename to examples/update-settings/tts/tts-aws-polly.py diff --git a/examples/update-settings/tts/azure-http.py b/examples/update-settings/tts/tts-azure-http.py similarity index 100% rename from examples/update-settings/tts/azure-http.py rename to examples/update-settings/tts/tts-azure-http.py diff --git a/examples/update-settings/tts/azure.py b/examples/update-settings/tts/tts-azure.py similarity index 100% rename from examples/update-settings/tts/azure.py rename to examples/update-settings/tts/tts-azure.py diff --git a/examples/update-settings/tts/camb.py b/examples/update-settings/tts/tts-camb.py similarity index 100% rename from examples/update-settings/tts/camb.py rename to examples/update-settings/tts/tts-camb.py diff --git a/examples/update-settings/tts/cartesia-http.py b/examples/update-settings/tts/tts-cartesia-http.py similarity index 100% rename from examples/update-settings/tts/cartesia-http.py rename to examples/update-settings/tts/tts-cartesia-http.py diff --git a/examples/update-settings/tts/cartesia.py b/examples/update-settings/tts/tts-cartesia.py similarity index 100% rename from examples/update-settings/tts/cartesia.py rename to examples/update-settings/tts/tts-cartesia.py diff --git a/examples/update-settings/tts/deepgram-http.py b/examples/update-settings/tts/tts-deepgram-http.py similarity index 100% rename from examples/update-settings/tts/deepgram-http.py rename to examples/update-settings/tts/tts-deepgram-http.py diff --git a/examples/update-settings/tts/deepgram-sagemaker.py b/examples/update-settings/tts/tts-deepgram-sagemaker.py similarity index 100% rename from examples/update-settings/tts/deepgram-sagemaker.py rename to examples/update-settings/tts/tts-deepgram-sagemaker.py diff --git a/examples/update-settings/tts/deepgram.py b/examples/update-settings/tts/tts-deepgram.py similarity index 100% rename from examples/update-settings/tts/deepgram.py rename to examples/update-settings/tts/tts-deepgram.py diff --git a/examples/update-settings/tts/elevenlabs-http.py b/examples/update-settings/tts/tts-elevenlabs-http.py similarity index 100% rename from examples/update-settings/tts/elevenlabs-http.py rename to examples/update-settings/tts/tts-elevenlabs-http.py diff --git a/examples/update-settings/tts/elevenlabs.py b/examples/update-settings/tts/tts-elevenlabs.py similarity index 100% rename from examples/update-settings/tts/elevenlabs.py rename to examples/update-settings/tts/tts-elevenlabs.py diff --git a/examples/update-settings/tts/fish.py b/examples/update-settings/tts/tts-fish.py similarity index 100% rename from examples/update-settings/tts/fish.py rename to examples/update-settings/tts/tts-fish.py diff --git a/examples/update-settings/tts/gemini.py b/examples/update-settings/tts/tts-gemini.py similarity index 100% rename from examples/update-settings/tts/gemini.py rename to examples/update-settings/tts/tts-gemini.py diff --git a/examples/update-settings/tts/google-http.py b/examples/update-settings/tts/tts-google-http.py similarity index 100% rename from examples/update-settings/tts/google-http.py rename to examples/update-settings/tts/tts-google-http.py diff --git a/examples/update-settings/tts/google-stream.py b/examples/update-settings/tts/tts-google-stream.py similarity index 100% rename from examples/update-settings/tts/google-stream.py rename to examples/update-settings/tts/tts-google-stream.py diff --git a/examples/update-settings/tts/gradium.py b/examples/update-settings/tts/tts-gradium.py similarity index 100% rename from examples/update-settings/tts/gradium.py rename to examples/update-settings/tts/tts-gradium.py diff --git a/examples/update-settings/tts/groq.py b/examples/update-settings/tts/tts-groq.py similarity index 100% rename from examples/update-settings/tts/groq.py rename to examples/update-settings/tts/tts-groq.py diff --git a/examples/update-settings/tts/hume.py b/examples/update-settings/tts/tts-hume.py similarity index 100% rename from examples/update-settings/tts/hume.py rename to examples/update-settings/tts/tts-hume.py diff --git a/examples/update-settings/tts/inworld-http.py b/examples/update-settings/tts/tts-inworld-http.py similarity index 100% rename from examples/update-settings/tts/inworld-http.py rename to examples/update-settings/tts/tts-inworld-http.py diff --git a/examples/update-settings/tts/inworld.py b/examples/update-settings/tts/tts-inworld.py similarity index 100% rename from examples/update-settings/tts/inworld.py rename to examples/update-settings/tts/tts-inworld.py diff --git a/examples/update-settings/tts/kokoro.py b/examples/update-settings/tts/tts-kokoro.py similarity index 100% rename from examples/update-settings/tts/kokoro.py rename to examples/update-settings/tts/tts-kokoro.py diff --git a/examples/update-settings/tts/lmnt.py b/examples/update-settings/tts/tts-lmnt.py similarity index 100% rename from examples/update-settings/tts/lmnt.py rename to examples/update-settings/tts/tts-lmnt.py diff --git a/examples/update-settings/tts/minimax.py b/examples/update-settings/tts/tts-minimax.py similarity index 100% rename from examples/update-settings/tts/minimax.py rename to examples/update-settings/tts/tts-minimax.py diff --git a/examples/update-settings/tts/neuphonic-http.py b/examples/update-settings/tts/tts-neuphonic-http.py similarity index 100% rename from examples/update-settings/tts/neuphonic-http.py rename to examples/update-settings/tts/tts-neuphonic-http.py diff --git a/examples/update-settings/tts/neuphonic.py b/examples/update-settings/tts/tts-neuphonic.py similarity index 100% rename from examples/update-settings/tts/neuphonic.py rename to examples/update-settings/tts/tts-neuphonic.py diff --git a/examples/update-settings/tts/nvidia.py b/examples/update-settings/tts/tts-nvidia.py similarity index 100% rename from examples/update-settings/tts/nvidia.py rename to examples/update-settings/tts/tts-nvidia.py diff --git a/examples/update-settings/tts/openai.py b/examples/update-settings/tts/tts-openai.py similarity index 100% rename from examples/update-settings/tts/openai.py rename to examples/update-settings/tts/tts-openai.py diff --git a/examples/update-settings/tts/piper-http.py b/examples/update-settings/tts/tts-piper-http.py similarity index 100% rename from examples/update-settings/tts/piper-http.py rename to examples/update-settings/tts/tts-piper-http.py diff --git a/examples/update-settings/tts/piper.py b/examples/update-settings/tts/tts-piper.py similarity index 100% rename from examples/update-settings/tts/piper.py rename to examples/update-settings/tts/tts-piper.py diff --git a/examples/update-settings/tts/resembleai.py b/examples/update-settings/tts/tts-resembleai.py similarity index 100% rename from examples/update-settings/tts/resembleai.py rename to examples/update-settings/tts/tts-resembleai.py diff --git a/examples/update-settings/tts/rime-http.py b/examples/update-settings/tts/tts-rime-http.py similarity index 100% rename from examples/update-settings/tts/rime-http.py rename to examples/update-settings/tts/tts-rime-http.py diff --git a/examples/update-settings/tts/rime.py b/examples/update-settings/tts/tts-rime.py similarity index 100% rename from examples/update-settings/tts/rime.py rename to examples/update-settings/tts/tts-rime.py diff --git a/examples/update-settings/tts/sarvam-http.py b/examples/update-settings/tts/tts-sarvam-http.py similarity index 100% rename from examples/update-settings/tts/sarvam-http.py rename to examples/update-settings/tts/tts-sarvam-http.py diff --git a/examples/update-settings/tts/sarvam.py b/examples/update-settings/tts/tts-sarvam.py similarity index 100% rename from examples/update-settings/tts/sarvam.py rename to examples/update-settings/tts/tts-sarvam.py diff --git a/examples/update-settings/tts/speechmatics.py b/examples/update-settings/tts/tts-speechmatics.py similarity index 100% rename from examples/update-settings/tts/speechmatics.py rename to examples/update-settings/tts/tts-speechmatics.py diff --git a/examples/update-settings/tts/xtts.py b/examples/update-settings/tts/tts-xtts.py similarity index 100% rename from examples/update-settings/tts/xtts.py rename to examples/update-settings/tts/tts-xtts.py diff --git a/examples/video-avatar/heygen-transport.py b/examples/video-avatar/video-avatar-heygen-transport.py similarity index 100% rename from examples/video-avatar/heygen-transport.py rename to examples/video-avatar/video-avatar-heygen-transport.py diff --git a/examples/video-avatar/heygen-video-service.py b/examples/video-avatar/video-avatar-heygen-video-service.py similarity index 100% rename from examples/video-avatar/heygen-video-service.py rename to examples/video-avatar/video-avatar-heygen-video-service.py diff --git a/examples/video-avatar/lemonslice-transport.py b/examples/video-avatar/video-avatar-lemonslice-transport.py similarity index 100% rename from examples/video-avatar/lemonslice-transport.py rename to examples/video-avatar/video-avatar-lemonslice-transport.py diff --git a/examples/video-avatar/simli-layer.py b/examples/video-avatar/video-avatar-simli-layer.py similarity index 100% rename from examples/video-avatar/simli-layer.py rename to examples/video-avatar/video-avatar-simli-layer.py diff --git a/examples/video-avatar/tavus-transport.py b/examples/video-avatar/video-avatar-tavus-transport.py similarity index 100% rename from examples/video-avatar/tavus-transport.py rename to examples/video-avatar/video-avatar-tavus-transport.py diff --git a/examples/video-avatar/tavus-video-service.py b/examples/video-avatar/video-avatar-tavus-video-service.py similarity index 100% rename from examples/video-avatar/tavus-video-service.py rename to examples/video-avatar/video-avatar-tavus-video-service.py diff --git a/examples/video-processing/custom-video-track.py b/examples/video-processing/video-processing-custom-video-track.py similarity index 100% rename from examples/video-processing/custom-video-track.py rename to examples/video-processing/video-processing-custom-video-track.py diff --git a/examples/video-processing/gstreamer-filesrc.py b/examples/video-processing/video-processing-gstreamer-filesrc.py similarity index 100% rename from examples/video-processing/gstreamer-filesrc.py rename to examples/video-processing/video-processing-gstreamer-filesrc.py diff --git a/examples/video-processing/gstreamer-videotestsrc.py b/examples/video-processing/video-processing-gstreamer-videotestsrc.py similarity index 100% rename from examples/video-processing/gstreamer-videotestsrc.py rename to examples/video-processing/video-processing-gstreamer-videotestsrc.py diff --git a/examples/video-processing/local-mirror.py b/examples/video-processing/video-processing-local-mirror.py similarity index 100% rename from examples/video-processing/local-mirror.py rename to examples/video-processing/video-processing-local-mirror.py diff --git a/examples/video-processing/mirror.py b/examples/video-processing/video-processing-mirror.py similarity index 100% rename from examples/video-processing/mirror.py rename to examples/video-processing/video-processing-mirror.py diff --git a/examples/vision/anthropic.py b/examples/vision/vision-anthropic.py similarity index 100% rename from examples/vision/anthropic.py rename to examples/vision/vision-anthropic.py diff --git a/examples/vision/aws.py b/examples/vision/vision-aws.py similarity index 100% rename from examples/vision/aws.py rename to examples/vision/vision-aws.py diff --git a/examples/vision/gemini-flash.py b/examples/vision/vision-gemini-flash.py similarity index 100% rename from examples/vision/gemini-flash.py rename to examples/vision/vision-gemini-flash.py diff --git a/examples/vision/moondream.py b/examples/vision/vision-moondream.py similarity index 100% rename from examples/vision/moondream.py rename to examples/vision/vision-moondream.py diff --git a/examples/vision/openai-responses-http.py b/examples/vision/vision-openai-responses-http.py similarity index 100% rename from examples/vision/openai-responses-http.py rename to examples/vision/vision-openai-responses-http.py diff --git a/examples/vision/openai-responses.py b/examples/vision/vision-openai-responses.py similarity index 100% rename from examples/vision/openai-responses.py rename to examples/vision/vision-openai-responses.py diff --git a/examples/vision/openai.py b/examples/vision/vision-openai.py similarity index 100% rename from examples/vision/openai.py rename to examples/vision/vision-openai.py diff --git a/examples/voice/aicoustics.py b/examples/voice/voice-aicoustics.py similarity index 100% rename from examples/voice/aicoustics.py rename to examples/voice/voice-aicoustics.py diff --git a/examples/voice/assemblyai-turn-detection.py b/examples/voice/voice-assemblyai-turn-detection.py similarity index 100% rename from examples/voice/assemblyai-turn-detection.py rename to examples/voice/voice-assemblyai-turn-detection.py diff --git a/examples/voice/assemblyai.py b/examples/voice/voice-assemblyai.py similarity index 100% rename from examples/voice/assemblyai.py rename to examples/voice/voice-assemblyai.py diff --git a/examples/voice/asyncai-http.py b/examples/voice/voice-asyncai-http.py similarity index 100% rename from examples/voice/asyncai-http.py rename to examples/voice/voice-asyncai-http.py diff --git a/examples/voice/asyncai.py b/examples/voice/voice-asyncai.py similarity index 100% rename from examples/voice/asyncai.py rename to examples/voice/voice-asyncai.py diff --git a/examples/voice/aws-strands.py b/examples/voice/voice-aws-strands.py similarity index 100% rename from examples/voice/aws-strands.py rename to examples/voice/voice-aws-strands.py diff --git a/examples/voice/aws.py b/examples/voice/voice-aws.py similarity index 100% rename from examples/voice/aws.py rename to examples/voice/voice-aws.py diff --git a/examples/voice/azure-http.py b/examples/voice/voice-azure-http.py similarity index 100% rename from examples/voice/azure-http.py rename to examples/voice/voice-azure-http.py diff --git a/examples/voice/azure.py b/examples/voice/voice-azure.py similarity index 100% rename from examples/voice/azure.py rename to examples/voice/voice-azure.py diff --git a/examples/voice/camb.py b/examples/voice/voice-camb.py similarity index 100% rename from examples/voice/camb.py rename to examples/voice/voice-camb.py diff --git a/examples/voice/cartesia-http.py b/examples/voice/voice-cartesia-http.py similarity index 100% rename from examples/voice/cartesia-http.py rename to examples/voice/voice-cartesia-http.py diff --git a/examples/voice/cartesia.py b/examples/voice/voice-cartesia.py similarity index 100% rename from examples/voice/cartesia.py rename to examples/voice/voice-cartesia.py diff --git a/examples/voice/deepgram-flux-sagemaker.py b/examples/voice/voice-deepgram-flux-sagemaker.py similarity index 100% rename from examples/voice/deepgram-flux-sagemaker.py rename to examples/voice/voice-deepgram-flux-sagemaker.py diff --git a/examples/voice/deepgram-flux.py b/examples/voice/voice-deepgram-flux.py similarity index 100% rename from examples/voice/deepgram-flux.py rename to examples/voice/voice-deepgram-flux.py diff --git a/examples/voice/deepgram-http.py b/examples/voice/voice-deepgram-http.py similarity index 100% rename from examples/voice/deepgram-http.py rename to examples/voice/voice-deepgram-http.py diff --git a/examples/voice/deepgram-sagemaker.py b/examples/voice/voice-deepgram-sagemaker.py similarity index 100% rename from examples/voice/deepgram-sagemaker.py rename to examples/voice/voice-deepgram-sagemaker.py diff --git a/examples/voice/deepgram-vad.py b/examples/voice/voice-deepgram-vad.py similarity index 100% rename from examples/voice/deepgram-vad.py rename to examples/voice/voice-deepgram-vad.py diff --git a/examples/voice/deepgram.py b/examples/voice/voice-deepgram.py similarity index 100% rename from examples/voice/deepgram.py rename to examples/voice/voice-deepgram.py diff --git a/examples/voice/elevenlabs-http.py b/examples/voice/voice-elevenlabs-http.py similarity index 100% rename from examples/voice/elevenlabs-http.py rename to examples/voice/voice-elevenlabs-http.py diff --git a/examples/voice/elevenlabs.py b/examples/voice/voice-elevenlabs.py similarity index 100% rename from examples/voice/elevenlabs.py rename to examples/voice/voice-elevenlabs.py diff --git a/examples/voice/fal.py b/examples/voice/voice-fal.py similarity index 100% rename from examples/voice/fal.py rename to examples/voice/voice-fal.py diff --git a/examples/voice/fish.py b/examples/voice/voice-fish.py similarity index 100% rename from examples/voice/fish.py rename to examples/voice/voice-fish.py diff --git a/examples/voice/gladia-vad.py b/examples/voice/voice-gladia-vad.py similarity index 100% rename from examples/voice/gladia-vad.py rename to examples/voice/voice-gladia-vad.py diff --git a/examples/voice/gladia.py b/examples/voice/voice-gladia.py similarity index 100% rename from examples/voice/gladia.py rename to examples/voice/voice-gladia.py diff --git a/examples/voice/google-audio-in.py b/examples/voice/voice-google-audio-in.py similarity index 100% rename from examples/voice/google-audio-in.py rename to examples/voice/voice-google-audio-in.py diff --git a/examples/voice/google-gemini-tts.py b/examples/voice/voice-google-gemini-tts.py similarity index 100% rename from examples/voice/google-gemini-tts.py rename to examples/voice/voice-google-gemini-tts.py diff --git a/examples/voice/google-http.py b/examples/voice/voice-google-http.py similarity index 100% rename from examples/voice/google-http.py rename to examples/voice/voice-google-http.py diff --git a/examples/voice/google-image.py b/examples/voice/voice-google-image.py similarity index 100% rename from examples/voice/google-image.py rename to examples/voice/voice-google-image.py diff --git a/examples/voice/google.py b/examples/voice/voice-google.py similarity index 100% rename from examples/voice/google.py rename to examples/voice/voice-google.py diff --git a/examples/voice/gradium.py b/examples/voice/voice-gradium.py similarity index 100% rename from examples/voice/gradium.py rename to examples/voice/voice-gradium.py diff --git a/examples/voice/groq.py b/examples/voice/voice-groq.py similarity index 100% rename from examples/voice/groq.py rename to examples/voice/voice-groq.py diff --git a/examples/voice/hume.py b/examples/voice/voice-hume.py similarity index 100% rename from examples/voice/hume.py rename to examples/voice/voice-hume.py diff --git a/examples/voice/inworld-http.py b/examples/voice/voice-inworld-http.py similarity index 100% rename from examples/voice/inworld-http.py rename to examples/voice/voice-inworld-http.py diff --git a/examples/voice/inworld.py b/examples/voice/voice-inworld.py similarity index 100% rename from examples/voice/inworld.py rename to examples/voice/voice-inworld.py diff --git a/examples/voice/kokoro.py b/examples/voice/voice-kokoro.py similarity index 100% rename from examples/voice/kokoro.py rename to examples/voice/voice-kokoro.py diff --git a/examples/voice/krisp-viva.py b/examples/voice/voice-krisp-viva.py similarity index 100% rename from examples/voice/krisp-viva.py rename to examples/voice/voice-krisp-viva.py diff --git a/examples/voice/langchain.py b/examples/voice/voice-langchain.py similarity index 100% rename from examples/voice/langchain.py rename to examples/voice/voice-langchain.py diff --git a/examples/voice/lmnt.py b/examples/voice/voice-lmnt.py similarity index 100% rename from examples/voice/lmnt.py rename to examples/voice/voice-lmnt.py diff --git a/examples/voice/minimax.py b/examples/voice/voice-minimax.py similarity index 100% rename from examples/voice/minimax.py rename to examples/voice/voice-minimax.py diff --git a/examples/voice/neuphonic-http.py b/examples/voice/voice-neuphonic-http.py similarity index 100% rename from examples/voice/neuphonic-http.py rename to examples/voice/voice-neuphonic-http.py diff --git a/examples/voice/neuphonic.py b/examples/voice/voice-neuphonic.py similarity index 100% rename from examples/voice/neuphonic.py rename to examples/voice/voice-neuphonic.py diff --git a/examples/voice/nvidia.py b/examples/voice/voice-nvidia.py similarity index 100% rename from examples/voice/nvidia.py rename to examples/voice/voice-nvidia.py diff --git a/examples/voice/openai-http.py b/examples/voice/voice-openai-http.py similarity index 100% rename from examples/voice/openai-http.py rename to examples/voice/voice-openai-http.py diff --git a/examples/voice/openai-responses-http.py b/examples/voice/voice-openai-responses-http.py similarity index 100% rename from examples/voice/openai-responses-http.py rename to examples/voice/voice-openai-responses-http.py diff --git a/examples/voice/openai-responses.py b/examples/voice/voice-openai-responses.py similarity index 100% rename from examples/voice/openai-responses.py rename to examples/voice/voice-openai-responses.py diff --git a/examples/voice/openai.py b/examples/voice/voice-openai.py similarity index 100% rename from examples/voice/openai.py rename to examples/voice/voice-openai.py diff --git a/examples/voice/piper.py b/examples/voice/voice-piper.py similarity index 100% rename from examples/voice/piper.py rename to examples/voice/voice-piper.py diff --git a/examples/voice/resemble.py b/examples/voice/voice-resemble.py similarity index 100% rename from examples/voice/resemble.py rename to examples/voice/voice-resemble.py diff --git a/examples/voice/rime-http.py b/examples/voice/voice-rime-http.py similarity index 100% rename from examples/voice/rime-http.py rename to examples/voice/voice-rime-http.py diff --git a/examples/voice/rime.py b/examples/voice/voice-rime.py similarity index 100% rename from examples/voice/rime.py rename to examples/voice/voice-rime.py diff --git a/examples/voice/sarvam-http.py b/examples/voice/voice-sarvam-http.py similarity index 100% rename from examples/voice/sarvam-http.py rename to examples/voice/voice-sarvam-http.py diff --git a/examples/voice/sarvam.py b/examples/voice/voice-sarvam.py similarity index 100% rename from examples/voice/sarvam.py rename to examples/voice/voice-sarvam.py diff --git a/examples/voice/smallest.py b/examples/voice/voice-smallest.py similarity index 100% rename from examples/voice/smallest.py rename to examples/voice/voice-smallest.py diff --git a/examples/voice/soniox.py b/examples/voice/voice-soniox.py similarity index 100% rename from examples/voice/soniox.py rename to examples/voice/voice-soniox.py diff --git a/examples/voice/speechmatics-vad.py b/examples/voice/voice-speechmatics-vad.py similarity index 100% rename from examples/voice/speechmatics-vad.py rename to examples/voice/voice-speechmatics-vad.py diff --git a/examples/voice/speechmatics.py b/examples/voice/voice-speechmatics.py similarity index 100% rename from examples/voice/speechmatics.py rename to examples/voice/voice-speechmatics.py diff --git a/examples/voice/xai.py b/examples/voice/voice-xai.py similarity index 100% rename from examples/voice/xai.py rename to examples/voice/voice-xai.py diff --git a/examples/voice/xtts.py b/examples/voice/voice-xtts.py similarity index 100% rename from examples/voice/xtts.py rename to examples/voice/voice-xtts.py From 080da8b94cb9ce5c96d444240068274ed14893ad Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Tue, 31 Mar 2026 22:09:42 -0400 Subject: [PATCH 07/11] Update eval script paths to match renamed example files --- scripts/evals/run-release-evals.py | 256 ++++++++++++++--------------- 1 file changed, 128 insertions(+), 128 deletions(-) diff --git a/scripts/evals/run-release-evals.py b/scripts/evals/run-release-evals.py index be3bd1295..3a6055772 100644 --- a/scripts/evals/run-release-evals.py +++ b/scripts/evals/run-release-evals.py @@ -97,70 +97,70 @@ EVAL_COMPLETE_TURN = EvalConfig( TESTS_VOICE = [ - ("voice/cartesia.py", EVAL_SIMPLE_MATH), - ("voice/cartesia-http.py", EVAL_SIMPLE_MATH), - ("voice/speechmatics.py", EVAL_SIMPLE_MATH), - ("voice/speechmatics-vad.py", EVAL_SIMPLE_MATH), - ("voice/langchain.py", EVAL_SIMPLE_MATH), - ("voice/deepgram.py", EVAL_SIMPLE_MATH), - ("voice/deepgram-flux.py", EVAL_SIMPLE_MATH), - ("voice/deepgram-http.py", EVAL_SIMPLE_MATH), - ("voice/elevenlabs.py", EVAL_SIMPLE_MATH), - ("voice/elevenlabs-http.py", EVAL_SIMPLE_MATH), - ("voice/xai.py", EVAL_SIMPLE_MATH), - ("voice/azure.py", EVAL_SIMPLE_MATH), - ("voice/azure-http.py", EVAL_SIMPLE_MATH), - ("voice/openai.py", EVAL_SIMPLE_MATH), - ("voice/openai-http.py", EVAL_SIMPLE_MATH), - ("voice/gladia.py", EVAL_SIMPLE_MATH), - ("voice/gladia-vad.py", EVAL_SIMPLE_MATH), - ("voice/lmnt.py", EVAL_SIMPLE_MATH), - ("voice/groq.py", EVAL_SIMPLE_MATH), - ("voice/aws.py", EVAL_SIMPLE_MATH), - ("voice/aws-strands.py", EVAL_WEATHER), - ("voice/google-gemini-tts.py", EVAL_SIMPLE_MATH), - ("voice/google.py", EVAL_SIMPLE_MATH), - ("voice/google-http.py", EVAL_SIMPLE_MATH), - ("voice/assemblyai.py", EVAL_SIMPLE_MATH), - ("voice/krisp-viva.py", EVAL_SIMPLE_MATH), - ("voice/rime.py", EVAL_SIMPLE_MATH), - ("voice/rime-http.py", EVAL_SIMPLE_MATH), - ("voice/nvidia.py", EVAL_SIMPLE_MATH), - ("voice/google-audio-in.py", EVAL_SIMPLE_MATH), - ("voice/fish.py", EVAL_SIMPLE_MATH), - ("voice/neuphonic.py", EVAL_SIMPLE_MATH), - ("voice/neuphonic-http.py", EVAL_SIMPLE_MATH), - ("voice/fal.py", EVAL_SIMPLE_MATH), - ("voice/minimax.py", EVAL_SIMPLE_MATH), - ("voice/sarvam.py", EVAL_SIMPLE_MATH), - ("voice/sarvam-http.py", EVAL_SIMPLE_MATH), - ("voice/soniox.py", EVAL_SIMPLE_MATH), - ("voice/inworld.py", EVAL_SIMPLE_MATH), - ("voice/inworld-http.py", EVAL_SIMPLE_MATH), - ("voice/asyncai.py", EVAL_SIMPLE_MATH), - ("voice/asyncai-http.py", EVAL_SIMPLE_MATH), - ("voice/aicoustics.py", EVAL_SIMPLE_MATH), - ("voice/hume.py", EVAL_SIMPLE_MATH), - ("voice/gradium.py", EVAL_SIMPLE_MATH), - ("voice/camb.py", EVAL_SIMPLE_MATH), - ("voice/piper.py", EVAL_SIMPLE_MATH), - ("voice/kokoro.py", EVAL_SIMPLE_MATH), - ("voice/resemble.py", EVAL_SIMPLE_MATH), - ("voice/smallest.py", EVAL_SIMPLE_MATH), - ("voice/openai-responses.py", EVAL_SIMPLE_MATH), - ("voice/openai-responses-http.py", EVAL_SIMPLE_MATH), + ("voice/voice-cartesia.py", EVAL_SIMPLE_MATH), + ("voice/voice-cartesia-http.py", EVAL_SIMPLE_MATH), + ("voice/voice-speechmatics.py", EVAL_SIMPLE_MATH), + ("voice/voice-speechmatics-vad.py", EVAL_SIMPLE_MATH), + ("voice/voice-langchain.py", EVAL_SIMPLE_MATH), + ("voice/voice-deepgram.py", EVAL_SIMPLE_MATH), + ("voice/voice-deepgram-flux.py", EVAL_SIMPLE_MATH), + ("voice/voice-deepgram-http.py", EVAL_SIMPLE_MATH), + ("voice/voice-elevenlabs.py", EVAL_SIMPLE_MATH), + ("voice/voice-elevenlabs-http.py", EVAL_SIMPLE_MATH), + ("voice/voice-xai.py", EVAL_SIMPLE_MATH), + ("voice/voice-azure.py", EVAL_SIMPLE_MATH), + ("voice/voice-azure-http.py", EVAL_SIMPLE_MATH), + ("voice/voice-openai.py", EVAL_SIMPLE_MATH), + ("voice/voice-openai-http.py", EVAL_SIMPLE_MATH), + ("voice/voice-gladia.py", EVAL_SIMPLE_MATH), + ("voice/voice-gladia-vad.py", EVAL_SIMPLE_MATH), + ("voice/voice-lmnt.py", EVAL_SIMPLE_MATH), + ("voice/voice-groq.py", EVAL_SIMPLE_MATH), + ("voice/voice-aws.py", EVAL_SIMPLE_MATH), + ("voice/voice-aws-strands.py", EVAL_WEATHER), + ("voice/voice-google-gemini-tts.py", EVAL_SIMPLE_MATH), + ("voice/voice-google.py", EVAL_SIMPLE_MATH), + ("voice/voice-google-http.py", EVAL_SIMPLE_MATH), + ("voice/voice-assemblyai.py", EVAL_SIMPLE_MATH), + ("voice/voice-krisp-viva.py", EVAL_SIMPLE_MATH), + ("voice/voice-rime.py", EVAL_SIMPLE_MATH), + ("voice/voice-rime-http.py", EVAL_SIMPLE_MATH), + ("voice/voice-nvidia.py", EVAL_SIMPLE_MATH), + ("voice/voice-google-audio-in.py", EVAL_SIMPLE_MATH), + ("voice/voice-fish.py", EVAL_SIMPLE_MATH), + ("voice/voice-neuphonic.py", EVAL_SIMPLE_MATH), + ("voice/voice-neuphonic-http.py", EVAL_SIMPLE_MATH), + ("voice/voice-fal.py", EVAL_SIMPLE_MATH), + ("voice/voice-minimax.py", EVAL_SIMPLE_MATH), + ("voice/voice-sarvam.py", EVAL_SIMPLE_MATH), + ("voice/voice-sarvam-http.py", EVAL_SIMPLE_MATH), + ("voice/voice-soniox.py", EVAL_SIMPLE_MATH), + ("voice/voice-inworld.py", EVAL_SIMPLE_MATH), + ("voice/voice-inworld-http.py", EVAL_SIMPLE_MATH), + ("voice/voice-asyncai.py", EVAL_SIMPLE_MATH), + ("voice/voice-asyncai-http.py", EVAL_SIMPLE_MATH), + ("voice/voice-aicoustics.py", EVAL_SIMPLE_MATH), + ("voice/voice-hume.py", EVAL_SIMPLE_MATH), + ("voice/voice-gradium.py", EVAL_SIMPLE_MATH), + ("voice/voice-camb.py", EVAL_SIMPLE_MATH), + ("voice/voice-piper.py", EVAL_SIMPLE_MATH), + ("voice/voice-kokoro.py", EVAL_SIMPLE_MATH), + ("voice/voice-resemble.py", EVAL_SIMPLE_MATH), + ("voice/voice-smallest.py", EVAL_SIMPLE_MATH), + ("voice/voice-openai-responses.py", EVAL_SIMPLE_MATH), + ("voice/voice-openai-responses-http.py", EVAL_SIMPLE_MATH), # Needs a local XTTS docker instance running. - # ("voice/xtts.py", EVAL_SIMPLE_MATH), + # ("voice/voice-xtts.py", EVAL_SIMPLE_MATH), ] TESTS_VISION = [ - ("vision/openai.py", EVAL_VISION_IMAGE(eval_speaks_first=True)), - ("vision/openai-responses.py", EVAL_VISION_IMAGE(eval_speaks_first=True)), - ("vision/openai-responses-http.py", EVAL_VISION_IMAGE(eval_speaks_first=True)), - ("vision/anthropic.py", EVAL_VISION_IMAGE(eval_speaks_first=True)), - ("vision/aws.py", EVAL_VISION_IMAGE(eval_speaks_first=True)), - ("vision/gemini-flash.py", EVAL_VISION_IMAGE(eval_speaks_first=True)), - ("vision/moondream.py", EVAL_VISION_IMAGE()), + ("vision/vision-openai.py", EVAL_VISION_IMAGE(eval_speaks_first=True)), + ("vision/vision-openai-responses.py", EVAL_VISION_IMAGE(eval_speaks_first=True)), + ("vision/vision-openai-responses-http.py", EVAL_VISION_IMAGE(eval_speaks_first=True)), + ("vision/vision-anthropic.py", EVAL_VISION_IMAGE(eval_speaks_first=True)), + ("vision/vision-aws.py", EVAL_VISION_IMAGE(eval_speaks_first=True)), + ("vision/vision-gemini-flash.py", EVAL_VISION_IMAGE(eval_speaks_first=True)), + ("vision/vision-moondream.py", EVAL_VISION_IMAGE()), ] # For a few major services, we also test parallel function calling. @@ -169,91 +169,91 @@ TESTS_VISION = [ TESTS_FUNCTION_CALLING = [ ("getting-started/07-function-calling.py", EVAL_WEATHER), ("getting-started/07-function-calling.py", EVAL_WEATHER_AND_RESTAURANT), - ("function-calling/openai-responses.py", EVAL_WEATHER), - ("function-calling/openai-responses.py", EVAL_WEATHER_AND_RESTAURANT), - ("function-calling/openai-responses-http.py", EVAL_WEATHER), - ("function-calling/openai-responses-http.py", EVAL_WEATHER_AND_RESTAURANT), - ("function-calling/anthropic.py", EVAL_WEATHER), - ("function-calling/anthropic.py", EVAL_WEATHER_AND_RESTAURANT), - ("function-calling/openai.py", EVAL_WEATHER), - ("function-calling/google.py", EVAL_WEATHER), - ("function-calling/google.py", EVAL_WEATHER_AND_RESTAURANT), - ("function-calling/groq.py", EVAL_WEATHER), - ("function-calling/grok.py", EVAL_WEATHER), - ("function-calling/azure.py", EVAL_WEATHER), - ("function-calling/fireworks.py", EVAL_WEATHER), - ("function-calling/nvidia.py", EVAL_WEATHER), - ("function-calling/cerebras.py", EVAL_WEATHER), - ("function-calling/openrouter.py", EVAL_WEATHER), - ("function-calling/perplexity.py", EVAL_WEATHER), - ("function-calling/google-vertex.py", EVAL_WEATHER), - ("function-calling/qwen.py", EVAL_WEATHER), - ("function-calling/aws.py", EVAL_WEATHER), - ("function-calling/sambanova.py", EVAL_WEATHER), - ("function-calling/aws.py", EVAL_WEATHER_AND_RESTAURANT), - ("function-calling/nebius.py", EVAL_WEATHER), - ("function-calling/mistral.py", EVAL_WEATHER), - ("function-calling/sarvam.py", EVAL_WEATHER), - ("function-calling/novita.py", EVAL_WEATHER), + ("function-calling/function-calling-openai-responses.py", EVAL_WEATHER), + ("function-calling/function-calling-openai-responses.py", EVAL_WEATHER_AND_RESTAURANT), + ("function-calling/function-calling-openai-responses-http.py", EVAL_WEATHER), + ("function-calling/function-calling-openai-responses-http.py", EVAL_WEATHER_AND_RESTAURANT), + ("function-calling/function-calling-anthropic.py", EVAL_WEATHER), + ("function-calling/function-calling-anthropic.py", EVAL_WEATHER_AND_RESTAURANT), + ("function-calling/function-calling-openai.py", EVAL_WEATHER), + ("function-calling/function-calling-google.py", EVAL_WEATHER), + ("function-calling/function-calling-google.py", EVAL_WEATHER_AND_RESTAURANT), + ("function-calling/function-calling-groq.py", EVAL_WEATHER), + ("function-calling/function-calling-grok.py", EVAL_WEATHER), + ("function-calling/function-calling-azure.py", EVAL_WEATHER), + ("function-calling/function-calling-fireworks.py", EVAL_WEATHER), + ("function-calling/function-calling-nvidia.py", EVAL_WEATHER), + ("function-calling/function-calling-cerebras.py", EVAL_WEATHER), + ("function-calling/function-calling-openrouter.py", EVAL_WEATHER), + ("function-calling/function-calling-perplexity.py", EVAL_WEATHER), + ("function-calling/function-calling-google-vertex.py", EVAL_WEATHER), + ("function-calling/function-calling-qwen.py", EVAL_WEATHER), + ("function-calling/function-calling-aws.py", EVAL_WEATHER), + ("function-calling/function-calling-sambanova.py", EVAL_WEATHER), + ("function-calling/function-calling-aws.py", EVAL_WEATHER_AND_RESTAURANT), + ("function-calling/function-calling-nebius.py", EVAL_WEATHER), + ("function-calling/function-calling-mistral.py", EVAL_WEATHER), + ("function-calling/function-calling-sarvam.py", EVAL_WEATHER), + ("function-calling/function-calling-novita.py", EVAL_WEATHER), # Video - ("function-calling/anthropic-video.py", EVAL_VISION_CAMERA), - ("function-calling/aws-video.py", EVAL_VISION_CAMERA), - ("function-calling/google-video.py", EVAL_VISION_CAMERA), - ("function-calling/moondream-video.py", EVAL_VISION_CAMERA), - ("function-calling/openai-video.py", EVAL_VISION_CAMERA), - ("function-calling/openai-responses-video.py", EVAL_VISION_CAMERA), - ("function-calling/openai-responses-video-http.py", EVAL_VISION_CAMERA), + ("function-calling/function-calling-anthropic-video.py", EVAL_VISION_CAMERA), + ("function-calling/function-calling-aws-video.py", EVAL_VISION_CAMERA), + ("function-calling/function-calling-google-video.py", EVAL_VISION_CAMERA), + ("function-calling/function-calling-moondream-video.py", EVAL_VISION_CAMERA), + ("function-calling/function-calling-openai-video.py", EVAL_VISION_CAMERA), + ("function-calling/function-calling-openai-responses-video.py", EVAL_VISION_CAMERA), + ("function-calling/function-calling-openai-responses-video-http.py", EVAL_VISION_CAMERA), # Currently not working. - # ("function-calling/together.py", EVAL_WEATHER), - # ("function-calling/deepseek.py", EVAL_WEATHER), - # ("function-calling/gemini-openai-format.py", EVAL_WEATHER), + # ("function-calling/function-calling-together.py", EVAL_WEATHER), + # ("function-calling/function-calling-deepseek.py", EVAL_WEATHER), + # ("function-calling/function-calling-gemini-openai-format.py", EVAL_WEATHER), ] TESTS_FEATURES = [ - ("features/switch-languages.py", EVAL_SWITCH_LANGUAGE), - ("features/voicemail-detection.py", EVAL_VOICEMAIL), - ("features/voicemail-detection.py", EVAL_CONVERSATION), - ("features/concurrent-llm-evaluation.py", EVAL_SIMPLE_MATH), + ("features/features-switch-languages.py", EVAL_SWITCH_LANGUAGE), + ("features/features-voicemail-detection.py", EVAL_VOICEMAIL), + ("features/features-voicemail-detection.py", EVAL_CONVERSATION), + ("features/features-concurrent-llm-evaluation.py", EVAL_SIMPLE_MATH), ] TESTS_REALTIME = [ - ("realtime/openai.py", EVAL_WEATHER), - ("realtime/openai-beta.py", EVAL_WEATHER), + ("realtime/realtime-openai.py", EVAL_WEATHER), + ("realtime/realtime-openai-beta.py", EVAL_WEATHER), # OpenAI Realtime not released on Azure yet - # ("realtime/azure.py", EVAL_WEATHER), - ("realtime/azure-beta.py", EVAL_WEATHER), - ("realtime/openai-text.py", EVAL_WEATHER), - ("realtime/openai-beta-text.py", EVAL_WEATHER), - ("realtime/openai-live-video.py", EVAL_VISION_CAMERA), - ("realtime/gemini-live.py", EVAL_SIMPLE_MATH), - ("realtime/gemini-live-local-vad.py", EVAL_SIMPLE_MATH), - ("realtime/gemini-live-function-calling.py", EVAL_WEATHER), - ("realtime/gemini-live-video.py", EVAL_VISION_CAMERA), - ("realtime/gemini-live-google-search.py", EVAL_ONLINE_SEARCH), - ("realtime/gemini-live-vertex-function-calling.py", EVAL_WEATHER), + # ("realtime/realtime-azure.py", EVAL_WEATHER), + ("realtime/realtime-azure-beta.py", EVAL_WEATHER), + ("realtime/realtime-openai-text.py", EVAL_WEATHER), + ("realtime/realtime-openai-beta-text.py", EVAL_WEATHER), + ("realtime/realtime-openai-live-video.py", EVAL_VISION_CAMERA), + ("realtime/realtime-gemini-live.py", EVAL_SIMPLE_MATH), + ("realtime/realtime-gemini-live-local-vad.py", EVAL_SIMPLE_MATH), + ("realtime/realtime-gemini-live-function-calling.py", EVAL_WEATHER), + ("realtime/realtime-gemini-live-video.py", EVAL_VISION_CAMERA), + ("realtime/realtime-gemini-live-google-search.py", EVAL_ONLINE_SEARCH), + ("realtime/realtime-gemini-live-vertex-function-calling.py", EVAL_WEATHER), # Currently not working. - # ("realtime/gemini-live-text.py", EVAL_SIMPLE_MATH), - ("realtime/aws-nova-sonic.py", EVAL_SIMPLE_MATH), - ("realtime/ultravox.py", EVAL_ORDER), - ("realtime/grok.py", EVAL_WEATHER), + # ("realtime/realtime-gemini-live-text.py", EVAL_SIMPLE_MATH), + ("realtime/realtime-aws-nova-sonic.py", EVAL_SIMPLE_MATH), + ("realtime/realtime-ultravox.py", EVAL_ORDER), + ("realtime/realtime-grok.py", EVAL_WEATHER), ] TESTS_VIDEO_AVATAR = [ - ("video-avatar/tavus-video-service.py", EVAL_SIMPLE_MATH), - ("video-avatar/heygen-video-service.py", EVAL_SIMPLE_MATH), - ("video-avatar/simli-layer.py", EVAL_SIMPLE_MATH), - ("video-avatar/lemonslice-transport.py", EVAL_SIMPLE_MATH), + ("video-avatar/video-avatar-tavus-video-service.py", EVAL_SIMPLE_MATH), + ("video-avatar/video-avatar-heygen-video-service.py", EVAL_SIMPLE_MATH), + ("video-avatar/video-avatar-simli-layer.py", EVAL_SIMPLE_MATH), + ("video-avatar/video-avatar-lemonslice-transport.py", EVAL_SIMPLE_MATH), ] TESTS_TURN_MANAGEMENT = [ - ("turn-management/filter-incomplete-turns.py", EVAL_COMPLETE_TURN), + ("turn-management/turn-management-filter-incomplete-turns.py", EVAL_COMPLETE_TURN), ] -TESTS_THINKING_AND_MCP = [ - ("thinking-and-mcp/thinking-anthropic.py", EVAL_SIMPLE_MATH), - ("thinking-and-mcp/thinking-google.py", EVAL_SIMPLE_MATH), - ("thinking-and-mcp/thinking-functions-anthropic.py", EVAL_FLIGHT_STATUS), - ("thinking-and-mcp/thinking-functions-google.py", EVAL_FLIGHT_STATUS), +TESTS_THINKING = [ + ("thinking/thinking-anthropic.py", EVAL_SIMPLE_MATH), + ("thinking/thinking-google.py", EVAL_SIMPLE_MATH), + ("thinking/thinking-functions-anthropic.py", EVAL_FLIGHT_STATUS), + ("thinking/thinking-functions-google.py", EVAL_FLIGHT_STATUS), ] TESTS = [ @@ -264,7 +264,7 @@ TESTS = [ *TESTS_REALTIME, *TESTS_VIDEO_AVATAR, *TESTS_TURN_MANAGEMENT, - *TESTS_THINKING_AND_MCP, + *TESTS_THINKING, ] From 6a84d021564262a1221a7f652780329d0d6757ea Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Tue, 31 Mar 2026 22:13:52 -0400 Subject: [PATCH 08/11] Update evals - Removed evals for removed services - Added eval for function-calling-deepseek.py --- scripts/evals/run-release-evals.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/scripts/evals/run-release-evals.py b/scripts/evals/run-release-evals.py index 3a6055772..aae89650f 100644 --- a/scripts/evals/run-release-evals.py +++ b/scripts/evals/run-release-evals.py @@ -195,6 +195,7 @@ TESTS_FUNCTION_CALLING = [ ("function-calling/function-calling-mistral.py", EVAL_WEATHER), ("function-calling/function-calling-sarvam.py", EVAL_WEATHER), ("function-calling/function-calling-novita.py", EVAL_WEATHER), + ("function-calling/function-calling-deepseek.py", EVAL_WEATHER), # Video ("function-calling/function-calling-anthropic-video.py", EVAL_VISION_CAMERA), ("function-calling/function-calling-aws-video.py", EVAL_VISION_CAMERA), @@ -205,8 +206,6 @@ TESTS_FUNCTION_CALLING = [ ("function-calling/function-calling-openai-responses-video-http.py", EVAL_VISION_CAMERA), # Currently not working. # ("function-calling/function-calling-together.py", EVAL_WEATHER), - # ("function-calling/function-calling-deepseek.py", EVAL_WEATHER), - # ("function-calling/function-calling-gemini-openai-format.py", EVAL_WEATHER), ] TESTS_FEATURES = [ @@ -218,12 +217,9 @@ TESTS_FEATURES = [ TESTS_REALTIME = [ ("realtime/realtime-openai.py", EVAL_WEATHER), - ("realtime/realtime-openai-beta.py", EVAL_WEATHER), # OpenAI Realtime not released on Azure yet # ("realtime/realtime-azure.py", EVAL_WEATHER), - ("realtime/realtime-azure-beta.py", EVAL_WEATHER), ("realtime/realtime-openai-text.py", EVAL_WEATHER), - ("realtime/realtime-openai-beta-text.py", EVAL_WEATHER), ("realtime/realtime-openai-live-video.py", EVAL_VISION_CAMERA), ("realtime/realtime-gemini-live.py", EVAL_SIMPLE_MATH), ("realtime/realtime-gemini-live-local-vad.py", EVAL_SIMPLE_MATH), @@ -231,8 +227,6 @@ TESTS_REALTIME = [ ("realtime/realtime-gemini-live-video.py", EVAL_VISION_CAMERA), ("realtime/realtime-gemini-live-google-search.py", EVAL_ONLINE_SEARCH), ("realtime/realtime-gemini-live-vertex-function-calling.py", EVAL_WEATHER), - # Currently not working. - # ("realtime/realtime-gemini-live-text.py", EVAL_SIMPLE_MATH), ("realtime/realtime-aws-nova-sonic.py", EVAL_SIMPLE_MATH), ("realtime/realtime-ultravox.py", EVAL_ORDER), ("realtime/realtime-grok.py", EVAL_WEATHER), From 3ca656cae5b6babd152f68a4408abc7e87a9c4c0 Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Tue, 31 Mar 2026 22:20:31 -0400 Subject: [PATCH 09/11] Update simli name to match others --- ...vatar-simli-layer.py => video-avatar-simli-video-service.py} | 0 scripts/evals/run-release-evals.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename examples/video-avatar/{video-avatar-simli-layer.py => video-avatar-simli-video-service.py} (100%) diff --git a/examples/video-avatar/video-avatar-simli-layer.py b/examples/video-avatar/video-avatar-simli-video-service.py similarity index 100% rename from examples/video-avatar/video-avatar-simli-layer.py rename to examples/video-avatar/video-avatar-simli-video-service.py diff --git a/scripts/evals/run-release-evals.py b/scripts/evals/run-release-evals.py index aae89650f..4924828c7 100644 --- a/scripts/evals/run-release-evals.py +++ b/scripts/evals/run-release-evals.py @@ -235,7 +235,7 @@ TESTS_REALTIME = [ TESTS_VIDEO_AVATAR = [ ("video-avatar/video-avatar-tavus-video-service.py", EVAL_SIMPLE_MATH), ("video-avatar/video-avatar-heygen-video-service.py", EVAL_SIMPLE_MATH), - ("video-avatar/video-avatar-simli-layer.py", EVAL_SIMPLE_MATH), + ("video-avatar/video-avatar-simli-video-service.py", EVAL_SIMPLE_MATH), ("video-avatar/video-avatar-lemonslice-transport.py", EVAL_SIMPLE_MATH), ] From bd6cbd7fe7e9013cc10289fc1936af00530139c8 Mon Sep 17 00:00:00 2001 From: Harshita Jain <77115160+harshitajain165@users.noreply.github.com> Date: Wed, 1 Apr 2026 23:14:04 +0530 Subject: [PATCH 10/11] feat: add Smallest AI STT service integration (#4162) Add SmallestSTTService using the Pulse WebSocket API for real-time transcription. Includes SmallestSTTSettings dataclass, 32-language support with resolve_language fallback, VAD-driven finalize signal, and SMALLEST_TTFS_P99 latency constant. Also adds X-Source and X-Pipecat-Version headers to Smallest STT and TTS WebSocket connections. --- examples/voice/voice-smallest.py | 10 +- src/pipecat/services/smallest/stt.py | 412 +++++++++++++++++++++++++++ src/pipecat/services/smallest/tts.py | 7 +- src/pipecat/services/stt_latency.py | 3 + 4 files changed, 428 insertions(+), 4 deletions(-) create mode 100644 src/pipecat/services/smallest/stt.py diff --git a/examples/voice/voice-smallest.py b/examples/voice/voice-smallest.py index 8ffa48245..f1b3dcf43 100644 --- a/examples/voice/voice-smallest.py +++ b/examples/voice/voice-smallest.py @@ -21,9 +21,10 @@ from pipecat.processors.aggregators.llm_response_universal import ( ) from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport -from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService +from pipecat.services.smallest.stt import SmallestSTTService from pipecat.services.smallest.tts import SmallestTTSService +from pipecat.transcriptions.language import Language from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams @@ -50,8 +51,11 @@ transport_params = { async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): logger.info(f"Starting bot") - stt = DeepgramSTTService( - api_key=os.getenv("DEEPGRAM_API_KEY"), + stt = SmallestSTTService( + api_key=os.getenv("SMALLEST_API_KEY"), + settings=SmallestSTTService.Settings( + language=Language.EN, + ), ) tts = SmallestTTSService( diff --git a/src/pipecat/services/smallest/stt.py b/src/pipecat/services/smallest/stt.py new file mode 100644 index 000000000..78fa0e44d --- /dev/null +++ b/src/pipecat/services/smallest/stt.py @@ -0,0 +1,412 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +"""Smallest AI speech-to-text service implementation. + +This module provides a STT service using Smallest AI's Waves API: + +- ``SmallestSTTService``: WebSocket-based real-time STT. Streams audio + continuously and receives interim/final transcripts with low latency. +""" + +import asyncio +import json +from dataclasses import dataclass, field +from enum import Enum +from typing import Any, AsyncGenerator, Optional +from urllib.parse import urlencode + +from loguru import logger + +from pipecat import version as pipecat_version +from pipecat.frames.frames import ( + CancelFrame, + EndFrame, + ErrorFrame, + Frame, + InterimTranscriptionFrame, + StartFrame, + TranscriptionFrame, + VADUserStartedSpeakingFrame, + VADUserStoppedSpeakingFrame, +) +from pipecat.processors.frame_processor import FrameDirection +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven +from pipecat.services.stt_latency import SMALLEST_TTFS_P99 +from pipecat.services.stt_service import WebsocketSTTService +from pipecat.transcriptions.language import Language, resolve_language +from pipecat.utils.time import time_now_iso8601 +from pipecat.utils.tracing.service_decorators import traced_stt + +try: + from websockets.asyncio.client import connect as websocket_connect + from websockets.protocol import State +except ModuleNotFoundError as e: + logger.error(f"Exception: {e}") + logger.error("In order to use Smallest, you need to `pip install pipecat-ai[smallest]`.") + raise Exception(f"Missing module: {e}") + + +def language_to_smallest_stt_language(language: Language) -> str: + """Convert a Language enum to Smallest STT language code. + + Args: + language: The Language enum value to convert. + + Returns: + The Smallest language code string. + """ + LANGUAGE_MAP = { + Language.BG: "bg", + Language.BN: "bn", + Language.CS: "cs", + Language.DA: "da", + Language.DE: "de", + Language.EN: "en", + Language.ES: "es", + Language.ET: "et", + Language.FI: "fi", + Language.FR: "fr", + Language.GU: "gu", + Language.HI: "hi", + Language.HU: "hu", + Language.IT: "it", + Language.KN: "kn", + Language.LT: "lt", + Language.LV: "lv", + Language.ML: "ml", + Language.MR: "mr", + Language.MT: "mt", + Language.NL: "nl", + Language.OR: "or", + Language.PA: "pa", + Language.PL: "pl", + Language.PT: "pt", + Language.RO: "ro", + Language.RU: "ru", + Language.SK: "sk", + Language.SV: "sv", + Language.TA: "ta", + Language.TE: "te", + Language.UK: "uk", + } + + return resolve_language(language, LANGUAGE_MAP) + + +class SmallestSTTModel(str, Enum): + """Available Smallest AI STT models.""" + + PULSE = "pulse" + + +@dataclass +class SmallestSTTSettings(STTSettings): + """Settings for SmallestSTTService. + + Parameters: + word_timestamps: Include word-level timestamps. + full_transcript: Include cumulative transcript. + sentence_timestamps: Include sentence-level timestamps. + redact_pii: Redact personally identifiable information. + redact_pci: Redact payment card information. + numerals: Convert spoken numerals to digits. + diarize: Enable speaker diarization. + """ + + word_timestamps: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + full_transcript: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + sentence_timestamps: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + redact_pii: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + redact_pci: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + numerals: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + diarize: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + +class SmallestSTTService(WebsocketSTTService): + """Smallest AI real-time speech-to-text service using the Pulse WebSocket API. + + Streams audio continuously over a WebSocket connection and receives + interim and final transcription results with low latency. Best suited + for real-time voice applications where immediate feedback is needed. + + Uses Pipecat's VAD to detect when the user stops speaking and sends + a finalize message to flush the final transcript. + + Example:: + + stt = SmallestSTTService( + api_key="your-api-key", + settings=SmallestSTTService.Settings( + language=Language.EN, + word_timestamps=True, + ), + ) + """ + + Settings = SmallestSTTSettings + _settings: Settings + + def __init__( + self, + *, + api_key: str, + base_url: str = "wss://api.smallest.ai", + encoding: str = "linear16", + sample_rate: Optional[int] = None, + settings: Optional[Settings] = None, + ttfs_p99_latency: Optional[float] = SMALLEST_TTFS_P99, + **kwargs, + ): + """Initialize the Smallest AI STT service. + + Args: + api_key: Smallest AI API key for authentication. + base_url: Base WebSocket URL for the Smallest API. + encoding: Audio encoding format. Defaults to "linear16". + sample_rate: Audio sample rate in Hz. If None, uses the pipeline's rate. + settings: Runtime-updatable settings for the STT service. + ttfs_p99_latency: P99 latency from speech end to final transcript in seconds. + **kwargs: Additional arguments passed to WebsocketSTTService. + """ + default_settings = self.Settings( + model=SmallestSTTModel.PULSE.value, + language=Language.EN, + word_timestamps=False, + full_transcript=False, + sentence_timestamps=False, + redact_pii=False, + redact_pci=False, + numerals="auto", + diarize=False, + ) + + if settings is not None: + default_settings.apply_update(settings) + + super().__init__( + sample_rate=sample_rate, + ttfs_p99_latency=ttfs_p99_latency, + keepalive_timeout=10, + keepalive_interval=5, + settings=default_settings, + **kwargs, + ) + + self._api_key = api_key + self._base_url = base_url.rstrip("/") + self._encoding = encoding + self._receive_task = None + self._connected_event = asyncio.Event() + self._connected_event.set() + + def can_generate_metrics(self) -> bool: + """Check if this service can generate processing metrics.""" + return True + + def language_to_service_language(self, language: Language) -> Optional[str]: + """Convert a Language enum to Smallest service language format. + + Args: + language: The language to convert. + + Returns: + The Smallest-specific language code, or None if not supported. + """ + return language_to_smallest_stt_language(language) + + async def start(self, frame: StartFrame): + """Start the service and connect to the WebSocket.""" + await super().start(frame) + await self._connect() + + async def stop(self, frame: EndFrame): + """Stop the service and disconnect from the WebSocket.""" + await super().stop(frame) + await self._disconnect() + + async def cancel(self, frame: CancelFrame): + """Cancel the service and disconnect from the WebSocket.""" + await super().cancel(frame) + await self._disconnect() + + async def process_frame(self, frame: Frame, direction: FrameDirection): + """Process frames, handling VAD events for finalization.""" + await super().process_frame(frame, direction) + + if isinstance(frame, VADUserStartedSpeakingFrame): + await self.start_processing_metrics() + elif isinstance(frame, VADUserStoppedSpeakingFrame): + if self._websocket and self._websocket.state is State.OPEN: + try: + await self._websocket.send(json.dumps({"type": "finalize"})) + except Exception as e: + logger.warning(f"{self} failed to send finalize: {e}") + + async def run_stt(self, audio: bytes) -> AsyncGenerator[Frame, None]: + """Send audio to the Smallest Pulse WebSocket for transcription. + + Args: + audio: Raw PCM audio bytes. + + Yields: + None -- transcription results arrive via WebSocket messages. + """ + await self._connected_event.wait() + + if not self._websocket or self._websocket.state is State.CLOSED: + await self._connect() + + if self._websocket and self._websocket.state is State.OPEN: + try: + await self._websocket.send(audio) + except Exception as e: + yield ErrorFrame(error=f"Smallest STT error: {e}") + return + + yield None + + async def _update_settings(self, delta: STTSettings) -> dict[str, Any]: + """Apply a settings delta and reconnect if anything changed.""" + changed = await super()._update_settings(delta) + + if changed: + await self._disconnect() + await self._connect() + + return changed + + async def _connect(self): + self._connected_event.clear() + try: + await self._connect_websocket() + await super()._connect() + + if self._websocket and not self._receive_task: + self._receive_task = self.create_task( + self._receive_task_handler(self._report_error) + ) + finally: + self._connected_event.set() + + async def _disconnect(self): + await super()._disconnect() + + if self._receive_task: + await self.cancel_task(self._receive_task) + self._receive_task = None + + await self._disconnect_websocket() + + async def _connect_websocket(self): + """Establish WebSocket connection to the Smallest Pulse STT API.""" + try: + if self._websocket and self._websocket.state is State.OPEN: + return + + logger.debug("Connecting to Smallest STT") + + query_params = { + "language": self._settings.language, + "encoding": self._encoding, + "sample_rate": str(self.sample_rate), + "word_timestamps": str(self._settings.word_timestamps).lower(), + "full_transcript": str(self._settings.full_transcript).lower(), + "sentence_timestamps": str(self._settings.sentence_timestamps).lower(), + "redact_pii": str(self._settings.redact_pii).lower(), + "redact_pci": str(self._settings.redact_pci).lower(), + "numerals": self._settings.numerals, + "diarize": str(self._settings.diarize).lower(), + } + + ws_url = f"{self._base_url}/waves/v1/pulse/get_text?{urlencode(query_params)}" + + self._websocket = await websocket_connect( + ws_url, + additional_headers={ + "Authorization": f"Bearer {self._api_key}", + "X-Source": "pipecat", + "X-Pipecat-Version": pipecat_version(), + }, + ) + await self._call_event_handler("on_connected") + logger.debug("Connected to Smallest STT") + except Exception as e: + await self.push_error(error_msg=f"Smallest STT connection error: {e}", exception=e) + self._websocket = None + await self._call_event_handler("on_connection_error", f"{e}") + + async def _disconnect_websocket(self): + """Close the WebSocket connection.""" + try: + if self._websocket and self._websocket.state is State.OPEN: + logger.debug("Disconnecting from Smallest STT") + await self._websocket.close() + except Exception as e: + logger.error(f"{self} error closing websocket: {e}") + finally: + self._websocket = None + await self._call_event_handler("on_disconnected") + + def _get_websocket(self): + if self._websocket: + return self._websocket + raise Exception("Websocket not connected") + + async def _receive_messages(self): + """Receive and process messages from the Smallest Pulse WebSocket.""" + async for message in self._get_websocket(): + try: + data = json.loads(message) + await self._process_response(data) + except json.JSONDecodeError: + logger.warning(f"{self} received non-JSON message: {message}") + except Exception as e: + logger.error(f"{self} error processing message: {e}") + + async def _process_response(self, data: dict): + """Process a transcription response from the Pulse API. + + Args: + data: Parsed JSON response containing transcript data. + """ + is_final = data.get("is_final", False) + text = data.get("transcript", "").strip() + + if not text: + return + + if is_final: + await self.stop_processing_metrics() + logger.debug(f"Smallest final transcript: [{text}]") + await self._handle_transcription(text, True, data.get("language")) + await self.push_frame( + TranscriptionFrame( + text, + self._user_id, + time_now_iso8601(), + data.get("language"), + result=data, + ) + ) + else: + logger.trace(f"Smallest interim transcript: [{text}]") + await self.push_frame( + InterimTranscriptionFrame( + text, + self._user_id, + time_now_iso8601(), + data.get("language"), + result=data, + ) + ) + + @traced_stt + async def _handle_transcription( + self, transcript: str, is_final: bool, language: Optional[str] = None + ): + """Handle a transcription result with tracing.""" + pass diff --git a/src/pipecat/services/smallest/tts.py b/src/pipecat/services/smallest/tts.py index 15d107ffd..5ef4aaa49 100644 --- a/src/pipecat/services/smallest/tts.py +++ b/src/pipecat/services/smallest/tts.py @@ -19,6 +19,7 @@ from typing import Any, AsyncGenerator, Optional from loguru import logger +from pipecat import version as pipecat_version from pipecat.frames.frames import ( CancelFrame, EndFrame, @@ -298,7 +299,11 @@ class SmallestTTSService(InterruptibleTTSService): self._websocket = await websocket_connect( self._build_websocket_url(), - additional_headers={"Authorization": f"Bearer {self._api_key}"}, + additional_headers={ + "Authorization": f"Bearer {self._api_key}", + "X-Source": "pipecat", + "X-Pipecat-Version": pipecat_version(), + }, ) await self._call_event_handler("on_connected") diff --git a/src/pipecat/services/stt_latency.py b/src/pipecat/services/stt_latency.py index 21c33dfb0..403902379 100644 --- a/src/pipecat/services/stt_latency.py +++ b/src/pipecat/services/stt_latency.py @@ -53,3 +53,6 @@ SPEECHMATICS_TTFS_P99: float = 0.74 # These services run locally and should be replaced with measured values NVIDIA_TTFS_P99: float = DEFAULT_TTFS_P99 WHISPER_TTFS_P99: float = DEFAULT_TTFS_P99 + +# No benchmark available yet; using conservative default +SMALLEST_TTFS_P99: float = DEFAULT_TTFS_P99 From df68665ec11a8d1ba11f038cfe6487af168c8be2 Mon Sep 17 00:00:00 2001 From: Paul Kompfner Date: Wed, 1 Apr 2026 14:03:08 -0400 Subject: [PATCH 11/11] Add changelog entries for OpenAILLMContext removal --- changelog/4215.changed.md | 1 + changelog/4215.removed.2.md | 22 ++++++++++++++++++++++ changelog/4215.removed.3.md | 1 + changelog/4215.removed.4.md | 1 + changelog/4215.removed.5.md | 1 + changelog/4215.removed.6.md | 1 + changelog/4215.removed.md | 18 ++++++++++++++++++ 7 files changed, 45 insertions(+) create mode 100644 changelog/4215.changed.md create mode 100644 changelog/4215.removed.2.md create mode 100644 changelog/4215.removed.3.md create mode 100644 changelog/4215.removed.4.md create mode 100644 changelog/4215.removed.5.md create mode 100644 changelog/4215.removed.6.md create mode 100644 changelog/4215.removed.md diff --git a/changelog/4215.changed.md b/changelog/4215.changed.md new file mode 100644 index 000000000..b84a343dd --- /dev/null +++ b/changelog/4215.changed.md @@ -0,0 +1 @@ +- ⚠️ `BaseOpenAILLMService.get_chat_completions()` now accepts an `LLMContext` instead of `OpenAILLMInvocationParams`. If you override this method, update your signature accordingly. diff --git a/changelog/4215.removed.2.md b/changelog/4215.removed.2.md new file mode 100644 index 000000000..4d5f6c630 --- /dev/null +++ b/changelog/4215.removed.2.md @@ -0,0 +1,22 @@ +- ⚠️ Removed deprecated service-specific context and aggregator machinery, which was superseded by the universal `LLMContext` system. + + Service-specific classes removed: `AnthropicLLMContext`, `AnthropicContextAggregatorPair`, `AWSBedrockLLMContext`, `AWSBedrockContextAggregatorPair`, `OpenAIContextAggregatorPair`, and their user/assistant aggregators. Also removed `create_context_aggregator()` from `LLMService`, `OpenAILLMService`, `AnthropicLLMService`, and `AWSBedrockLLMService`. + + Base aggregator classes removed (from `pipecat.processors.aggregators.llm_response`): `BaseLLMResponseAggregator`, `LLMContextResponseAggregator`, `LLMUserContextAggregator`, `LLMAssistantContextAggregator`, `LLMUserResponseAggregator`, `LLMAssistantResponseAggregator`. + + From the developer's point of view, migrating will usually be a matter of going from this: + + ```python + context = OpenAILLMContext(messages, tools) + context_aggregator = llm.create_context_aggregator(context) + ``` + + To this: + + ```python + from pipecat.processors.aggregators.llm_context import LLMContext + from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair + + context = LLMContext(messages, tools) + context_aggregator = LLMContextAggregatorPair(context) + ``` diff --git a/changelog/4215.removed.3.md b/changelog/4215.removed.3.md new file mode 100644 index 000000000..4ef02fa89 --- /dev/null +++ b/changelog/4215.removed.3.md @@ -0,0 +1 @@ +- ⚠️ Removed deprecated frame types `LLMMessagesFrame` and `OpenAILLMContextAssistantTimestampFrame` from `pipecat.frames.frames`. Instead of `LLMMessagesFrame`, use `LLMContextFrame` with the new messages, or `LLMMessagesUpdateFrame` with `run_llm=True`. diff --git a/changelog/4215.removed.4.md b/changelog/4215.removed.4.md new file mode 100644 index 000000000..20bde1ce9 --- /dev/null +++ b/changelog/4215.removed.4.md @@ -0,0 +1 @@ +- ⚠️ Removed `GatedOpenAILLMContextAggregator` (from `pipecat.processors.aggregators.gated_open_ai_llm_context`). Use `GatedLLMContextAggregator` (from `pipecat.processors.aggregators.gated_llm_context`) instead. diff --git a/changelog/4215.removed.5.md b/changelog/4215.removed.5.md new file mode 100644 index 000000000..cbd84018d --- /dev/null +++ b/changelog/4215.removed.5.md @@ -0,0 +1 @@ +- ⚠️ Removed `VisionImageFrameAggregator` (from `pipecat.processors.aggregators.vision_image_frame`). Vision/image handling is now built into `LLMContext` (from `pipecat.processors.aggregators.llm_context`). See the `12*` examples for the recommended replacement pattern. diff --git a/changelog/4215.removed.6.md b/changelog/4215.removed.6.md new file mode 100644 index 000000000..28062359a --- /dev/null +++ b/changelog/4215.removed.6.md @@ -0,0 +1 @@ +- ⚠️ Removed deprecated compatibility modules: `pipecat.services.openai_realtime_beta` (use `pipecat.services.openai.realtime`), `pipecat.services.openai_realtime.context`, `pipecat.services.openai_realtime.frames`, `pipecat.services.openai.realtime.context`, `pipecat.services.openai.realtime.frames`, `pipecat.services.gemini_multimodal_live` (use `pipecat.services.google.gemini_live`), `pipecat.services.aws_nova_sonic.context` (use `pipecat.services.aws.nova_sonic`), `pipecat.services.google.openai` and `pipecat.services.google.llm_openai` (use `pipecat.services.google.llm`). diff --git a/changelog/4215.removed.md b/changelog/4215.removed.md new file mode 100644 index 000000000..96cb03c71 --- /dev/null +++ b/changelog/4215.removed.md @@ -0,0 +1,18 @@ +- ⚠️ Removed `OpenAILLMContext`, `OpenAILLMContextFrame`, and `OpenAILLMContext.from_messages()`. Use `LLMContext` (from `pipecat.processors.aggregators.llm_context`) and `LLMContextFrame` (from `pipecat.frames.frames`) instead. All services now exclusively use the universal `LLMContext`. + + From the developer's point of view, migrating will usually be a matter of going from this: + + ```python + context = OpenAILLMContext(messages, tools) + context_aggregator = llm.create_context_aggregator(context) + ``` + + To this: + + ```python + from pipecat.processors.aggregators.llm_context import LLMContext + from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair + + context = LLMContext(messages, tools) + context_aggregator = LLMContextAggregatorPair(context) + ```