From 9db37439011ec0feee8bf80bc1d89b78c387bcbd Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Wed, 4 Dec 2024 12:35:21 -0500 Subject: [PATCH] Update pyproject.toml with a nim optional dep --- .../foundational/14j-function-calling-nim.py | 140 ++++++++++++++++++ pyproject.toml | 1 + 2 files changed, 141 insertions(+) create mode 100644 examples/foundational/14j-function-calling-nim.py diff --git a/examples/foundational/14j-function-calling-nim.py b/examples/foundational/14j-function-calling-nim.py new file mode 100644 index 000000000..68bb3b52e --- /dev/null +++ b/examples/foundational/14j-function-calling-nim.py @@ -0,0 +1,140 @@ +# +# Copyright (c) 2024, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os +import sys + +import aiohttp +from dotenv import load_dotenv +from loguru import logger +from openai.types.chat import ChatCompletionToolParam +from runner import configure + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.services.cartesia import CartesiaTTSService +from pipecat.services.nim import NimLLMService +from pipecat.services.openai import OpenAILLMContext +from pipecat.transports.services.daily import DailyParams, DailyTransport + +load_dotenv(override=True) + +logger.remove(0) +logger.add(sys.stderr, level="DEBUG") + + +async def start_fetch_weather(function_name, llm, context): + # note: we can't push a frame to the LLM here. the bot + # can interrupt itself and/or cause audio overlapping glitches. + # possible question for Aleix and Chad about what the right way + # to trigger speech is, now, with the new queues/async/sync refactors. + # await llm.push_frame(TextFrame("Let me check on that.")) + logger.debug(f"Starting fetch_weather_from_api with function_name: {function_name}") + + +async def fetch_weather_from_api(function_name, tool_call_id, args, llm, context, result_callback): + await result_callback({"conditions": "nice", "temperature": "75"}) + + +async def main(): + async with aiohttp.ClientSession() as session: + (room_url, token) = await configure(session) + + transport = DailyTransport( + room_url, + token, + "Respond bot", + DailyParams( + audio_out_enabled=True, + transcription_enabled=True, + vad_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + ) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady + # text_filter=MarkdownTextFilter(), + ) + + llm = NimLLMService( + api_key=os.getenv("NVIDIA_API_KEY"), model="meta/llama-3.1-405b-instruct" + ) + # Register a function_name of None to get all functions + # sent to the same callback with an additional function_name parameter. + llm.register_function(None, fetch_weather_from_api, start_callback=start_fetch_weather) + + tools = [ + ChatCompletionToolParam( + type="function", + function={ + "name": "get_current_weather", + "description": "Get the current weather", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA", + }, + "format": { + "type": "string", + "enum": ["celsius", "fahrenheit"], + "description": "The temperature unit to use. Infer this from the users location.", + }, + }, + "required": ["location", "format"], + }, + }, + ) + ] + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = OpenAILLMContext(messages, tools) + context_aggregator = llm.create_context_aggregator(context) + + pipeline = Pipeline( + [ + transport.input(), + context_aggregator.user(), + llm, + tts, + transport.output(), + context_aggregator.assistant(), + ] + ) + + task = PipelineTask( + pipeline, + PipelineParams( + allow_interruptions=True, + enable_metrics=True, + enable_usage_metrics=True, + ), + ) + + @transport.event_handler("on_first_participant_joined") + async def on_first_participant_joined(transport, participant): + await transport.capture_participant_transcription(participant["id"]) + # Kick off the conversation. + await task.queue_frames([context_aggregator.user().get_context_frame()]) + + runner = PipelineRunner() + + await runner.run(task) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/pyproject.toml b/pyproject.toml index 321d3a9bb..d487a7dd7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,6 +59,7 @@ livekit = [ "livekit~=0.17.5", "livekit-api~=0.7.1", "tenacity~=8.5.0" ] lmnt = [ "lmnt~=1.1.4" ] local = [ "pyaudio~=0.2.14" ] moondream = [ "einops~=0.8.0", "timm~=1.0.8", "transformers~=4.44.0" ] +nim = [ "openai~=1.50.2" ] noisereduce = [ "noisereduce~=3.0.3" ] openai = [ "openai~=1.50.2", "websockets~=13.1", "python-deepcompare~=1.0.1" ] openpipe = [ "openpipe~=4.24.0" ]