diff --git a/CHANGELOG.md b/CHANGELOG.md index 11d1bf922..c9322dd94 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 base class `BaseWhisperSTTService` to handle common Whisper API functionality. +- Added `PerplexityLLMService` for Perplexity NIM API integration, with an + OpenAI-compatible interface. Also, added foundational example + `14n-function-calling-perplexity.py`. + ### Changed - Updated foundation example `14f-function-calling-groq.py` to use diff --git a/README.md b/README.md index 91541e616..8e796b8d8 100644 --- a/README.md +++ b/README.md @@ -55,17 +55,17 @@ pip install "pipecat-ai[option,...]" ### Available services -| Category | Services | Install Command Example | -| ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------- | -| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) | `pip install "pipecat-ai[deepgram]"` | -| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Together AI](https://docs.pipecat.ai/server/services/llm/together) | `pip install "pipecat-ai[openai]"` | -| Text-to-Speech | [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) | `pip install "pipecat-ai[cartesia]"` | -| Speech-to-Speech | [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai) | `pip install "pipecat-ai[openai]"` | -| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local | `pip install "pipecat-ai[daily]"` | -| Video | [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) | `pip install "pipecat-ai[tavus,simli]"` | -| Vision & Image | [Moondream](https://docs.pipecat.ai/server/services/vision/moondream), [fal](https://docs.pipecat.ai/server/services/image-generation/fal) | `pip install "pipecat-ai[moondream]"` | -| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [Noisereduce](https://docs.pipecat.ai/server/utilities/audio/noisereduce-filter) | `pip install "pipecat-ai[silero]"` | -| Analytics & Metrics | [Canonical AI](https://docs.pipecat.ai/server/services/analytics/canonical), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) | `pip install "pipecat-ai[canonical]"` | +| Category | Services | Install Command Example | +| ------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------- | +| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) | `pip install "pipecat-ai[deepgram]"` | +| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Together AI](https://docs.pipecat.ai/server/services/llm/together) | `pip install "pipecat-ai[openai]"` | +| Text-to-Speech | [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) | `pip install "pipecat-ai[cartesia]"` | +| Speech-to-Speech | [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai) | `pip install "pipecat-ai[google]"` | +| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local | `pip install "pipecat-ai[daily]"` | +| Video | [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) | `pip install "pipecat-ai[tavus,simli]"` | +| Vision & Image | [Moondream](https://docs.pipecat.ai/server/services/vision/moondream), [fal](https://docs.pipecat.ai/server/services/image-generation/fal) | `pip install "pipecat-ai[moondream]"` | +| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [Noisereduce](https://docs.pipecat.ai/server/utilities/audio/noisereduce-filter) | `pip install "pipecat-ai[silero]"` | +| Analytics & Metrics | [Canonical AI](https://docs.pipecat.ai/server/services/analytics/canonical), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) | `pip install "pipecat-ai[canonical]"` | 📚 [View full services documentation →](https://docs.pipecat.ai/server/services/supported-services) diff --git a/examples/foundational/14n-function-calling-perplexity.py b/examples/foundational/14n-function-calling-perplexity.py new file mode 100644 index 000000000..ba4d252d4 --- /dev/null +++ b/examples/foundational/14n-function-calling-perplexity.py @@ -0,0 +1,106 @@ +# +# Copyright (c) 2024–2025, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +"""This example demonstrates using the Perplexity API as a drop-in replacement for OpenAI. + +Note that while this file is in the function-calling examples, Perplexity's API does not +currently support function calling. The example shows basic chat completion functionality +using Perplexity's API while maintaining compatibility with the OpenAI interface. +""" + +import asyncio +import os +import sys + +import aiohttp +from dotenv import load_dotenv +from loguru import logger +from openai.types.chat import ChatCompletionToolParam +from runner import configure + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import TTSSpeakFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.services.cartesia import CartesiaTTSService +from pipecat.services.openai import OpenAILLMContext, OpenAILLMService +from pipecat.services.perplexity import PerplexityLLMService +from pipecat.transports.services.daily import DailyParams, DailyTransport + +load_dotenv(override=True) + +logger.remove(0) +logger.add(sys.stderr, level="DEBUG") + + +async def main(): + async with aiohttp.ClientSession() as session: + (room_url, token) = await configure(session) + + transport = DailyTransport( + room_url, + token, + "Respond bot", + DailyParams( + audio_out_enabled=True, + transcription_enabled=True, + vad_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + ) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady + ) + + llm = PerplexityLLMService(api_key=os.getenv("PERPLEXITY_API_KEY"), model="sonar") + + messages = [ + { + "role": "user", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = OpenAILLMContext(messages) + context_aggregator = llm.create_context_aggregator(context) + + pipeline = Pipeline( + [ + transport.input(), + context_aggregator.user(), + llm, + tts, + transport.output(), + context_aggregator.assistant(), + ] + ) + + task = PipelineTask( + pipeline, + PipelineParams( + allow_interruptions=True, + enable_metrics=True, + enable_usage_metrics=True, + report_only_initial_ttfb=True, + ), + ) + + @transport.event_handler("on_first_participant_joined") + async def on_first_participant_joined(transport, participant): + await transport.capture_participant_transcription(participant["id"]) + # Kick off the conversation. + await task.queue_frames([context_aggregator.user().get_context_frame()]) + + runner = PipelineRunner() + + await runner.run(task) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/pyproject.toml b/pyproject.toml index baac7f9e9..46b64c6ba 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -71,6 +71,7 @@ nim = [ "openai~=1.59.6" ] noisereduce = [ "noisereduce~=3.0.3" ] openai = [ "openai~=1.59.6", "websockets~=13.1", "python-deepcompare~=2.1.0" ] openpipe = [ "openpipe~=4.45.0" ] +perplexity = [ "openai~=1.59.6" ] playht = [ "pyht~=0.1.6", "websockets~=13.1" ] riva = [ "nvidia-riva-client~=2.18.0" ] sentry = [ "sentry-sdk~=2.20.0" ] diff --git a/src/pipecat/services/perplexity.py b/src/pipecat/services/perplexity.py new file mode 100644 index 000000000..8152461c3 --- /dev/null +++ b/src/pipecat/services/perplexity.py @@ -0,0 +1,141 @@ +# +# Copyright (c) 2024–2025, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +from typing import List + +from loguru import logger + +from pipecat.metrics.metrics import LLMTokenUsage +from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext +from pipecat.services.openai import OpenAILLMService + +try: + from openai import ( + NOT_GIVEN, + AsyncStream, + ) + from openai.types.chat import ChatCompletionChunk, ChatCompletionMessageParam +except ModuleNotFoundError as e: + logger.error(f"Exception: {e}") + logger.error( + "In order to use Perplexity, you need to `pip install pipecat-ai[perplexity]`. Also, set `PERPLEXITY_API_KEY` environment variable." + ) + raise Exception(f"Missing module: {e}") + + +class PerplexityLLMService(OpenAILLMService): + """A service for interacting with Perplexity's API. + + This service extends OpenAILLMService to work with Perplexity's API while maintaining + compatibility with the OpenAI-style interface. It specifically handles the difference + in token usage reporting between Perplexity (incremental) and OpenAI (final summary). + + Args: + api_key (str): The API key for accessing Perplexity's API + base_url (str, optional): The base URL for Perplexity's API. Defaults to "https://api.perplexity.ai" + model (str, optional): The model identifier to use. Defaults to "sonar" + **kwargs: Additional keyword arguments passed to OpenAILLMService + """ + + def __init__( + self, + *, + api_key: str, + base_url: str = "https://api.perplexity.ai", + model: str = "sonar", + **kwargs, + ): + super().__init__(api_key=api_key, base_url=base_url, model=model, **kwargs) + # Counters for accumulating token usage metrics + self._prompt_tokens = 0 + self._completion_tokens = 0 + self._total_tokens = 0 + self._has_reported_prompt_tokens = False + self._is_processing = False + + async def get_chat_completions( + self, context: OpenAILLMContext, messages: List[ChatCompletionMessageParam] + ) -> AsyncStream[ChatCompletionChunk]: + """Get chat completions from Perplexity API using OpenAI-compatible parameters. + + Args: + context: The context containing conversation history and settings + messages: The messages to send to the API + + Returns: + A stream of chat completion chunks + """ + params = { + "model": self.model_name, + "stream": True, + "messages": messages, + } + + # Add OpenAI-compatible parameters if they're set + if self._settings["frequency_penalty"] is not NOT_GIVEN: + params["frequency_penalty"] = self._settings["frequency_penalty"] + if self._settings["presence_penalty"] is not NOT_GIVEN: + params["presence_penalty"] = self._settings["presence_penalty"] + if self._settings["temperature"] is not NOT_GIVEN: + params["temperature"] = self._settings["temperature"] + if self._settings["top_p"] is not NOT_GIVEN: + params["top_p"] = self._settings["top_p"] + if self._settings["max_tokens"] is not NOT_GIVEN: + params["max_tokens"] = self._settings["max_tokens"] + + chunks = await self._client.chat.completions.create(**params) + return chunks + + async def _process_context(self, context: OpenAILLMContext): + """Process a context through the LLM and accumulate token usage metrics. + + This method overrides the parent class implementation to handle + Perplexity's incremental token reporting style, accumulating the counts + and reporting them once at the end of processing. + + Args: + context (OpenAILLMContext): The context to process, containing messages + and other information needed for the LLM interaction. + """ + # Reset all counters and flags at the start of processing + self._prompt_tokens = 0 + self._completion_tokens = 0 + self._total_tokens = 0 + self._has_reported_prompt_tokens = False + self._is_processing = True + + try: + await super()._process_context(context) + finally: + self._is_processing = False + # Report final accumulated token usage at the end of processing + if self._prompt_tokens > 0 or self._completion_tokens > 0: + self._total_tokens = self._prompt_tokens + self._completion_tokens + tokens = LLMTokenUsage( + prompt_tokens=self._prompt_tokens, + completion_tokens=self._completion_tokens, + total_tokens=self._total_tokens, + ) + await super().start_llm_usage_metrics(tokens) + + async def start_llm_usage_metrics(self, tokens: LLMTokenUsage): + """Accumulate token usage metrics during processing. + + Perplexity reports token usage incrementally during streaming, + unlike OpenAI which provides a final summary. We accumulate the + counts and report the total at the end of processing. + """ + if not self._is_processing: + return + + # Record prompt tokens the first time we see them + if not self._has_reported_prompt_tokens and tokens.prompt_tokens > 0: + self._prompt_tokens = tokens.prompt_tokens + self._has_reported_prompt_tokens = True + + # Update completion tokens count if it has increased + if tokens.completion_tokens > self._completion_tokens: + self._completion_tokens = tokens.completion_tokens