From 90d11398e6b1f86994fedb85cd8cb3a3dc121fbc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Thu, 6 Jun 2024 10:11:52 -0700 Subject: [PATCH] examples: add 15a-switch-languages --- examples/foundational/15a-switch-languages.py | 153 ++++++++++++++++++ 1 file changed, 153 insertions(+) create mode 100644 examples/foundational/15a-switch-languages.py diff --git a/examples/foundational/15a-switch-languages.py b/examples/foundational/15a-switch-languages.py new file mode 100644 index 000000000..5e0f7b5d8 --- /dev/null +++ b/examples/foundational/15a-switch-languages.py @@ -0,0 +1,153 @@ +# +# Copyright (c) 2024, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import aiohttp +import os +import sys + +from pipecat.frames.frames import LLMMessagesFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.parallel_pipeline import ParallelPipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_response import ( + LLMAssistantContextAggregator, + LLMUserContextAggregator +) +from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext +from pipecat.processors.filters.function_filter import FunctionFilter +from pipecat.services.elevenlabs import ElevenLabsTTSService +from pipecat.services.openai import OpenAILLMService +from pipecat.services.whisper import Model, WhisperSTTService +from pipecat.transports.services.daily import DailyParams, DailyTransport +from pipecat.vad.silero import SileroVADAnalyzer + +from openai.types.chat import ChatCompletionToolParam + +from runner import configure + +from loguru import logger + +from dotenv import load_dotenv +load_dotenv(override=True) + +logger.remove(0) +logger.add(sys.stderr, level="DEBUG") + +current_language = "English" + + +async def switch_language(llm, args): + global current_language + current_language = args["language"] + return {"voice": f"Your answers from now on should be in {current_language}."} + + +async def english_filter(frame) -> bool: + return current_language == "English" + + +async def spanish_filter(frame) -> bool: + return current_language == "Spanish" + + +async def main(room_url: str, token): + async with aiohttp.ClientSession() as session: + transport = DailyTransport( + room_url, + token, + "Pipecat", + DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + vad_audio_passthrough=True + ) + ) + + stt = WhisperSTTService(model=Model.LARGE) + + english_tts = ElevenLabsTTSService( + aiohttp_session=session, + api_key=os.getenv("ELEVENLABS_API_KEY"), + voice_id="pNInz6obpgDQGcFmaJgB", + ) + + spanish_tts = ElevenLabsTTSService( + aiohttp_session=session, + api_key=os.getenv("ELEVENLABS_API_KEY"), + model="eleven_multilingual_v2", + voice_id="9F4C8ztpNUmXkdDDbz3J", + ) + + llm = OpenAILLMService( + api_key=os.getenv("OPENAI_API_KEY"), + model="gpt-4o") + llm.register_function("switch_language", switch_language) + + tools = [ + ChatCompletionToolParam( + type="function", + function={ + "name": "switch_language", + "description": "Switch to another language when the user asks you to", + "parameters": { + "type": "object", + "properties": { + "language": { + "type": "string", + "description": "The language the user wants you to speak", + }, + }, + "required": ["language"], + }, + })] + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities. Respond to what the user said in a creative and helpful way. Your output should not include non-alphanumeric characters. You can speak the following languages: 'English' and 'Spanish'.", + }, + ] + + context = OpenAILLMContext(messages, tools) + tma_in = LLMUserContextAggregator(context) + tma_out = LLMAssistantContextAggregator(context) + + pipeline = Pipeline([ + transport.input(), # Transport user input + stt, # STT + tma_in, # User responses + llm, # LLM + ParallelPipeline( # TTS (bot will speak the chosen language) + [FunctionFilter(english_filter), english_tts], # English + [FunctionFilter(spanish_filter), spanish_tts], # Spanish + ), + transport.output(), # Transport bot output + tma_out # Assistant spoken responses + ]) + + task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True)) + + @transport.event_handler("on_first_participant_joined") + async def on_first_participant_joined(transport, participant): + transport.capture_participant_transcription(participant["id"]) + # Kick off the conversation. + messages.append( + { + "role": "system", + "content": f"Please introduce yourself to the user and let them know the languages you speak. Your initial responses should be in {current_language}."}) + await task.queue_frames([LLMMessagesFrame(messages)]) + + runner = PipelineRunner() + + await runner.run(task) + + +if __name__ == "__main__": + (url, token) = configure() + asyncio.run(main(url, token))