Compare commits
34 Commits
v0.0.78
...
mb/update-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
708ef71c96 | ||
|
|
241ab19228 | ||
|
|
c08e8ec8fb | ||
|
|
eb9bc9644e | ||
|
|
3a306dae90 | ||
|
|
c42cc8254f | ||
|
|
a8e21f7d5d | ||
|
|
c6ef8de578 | ||
|
|
fc571fba42 | ||
|
|
0502ee2b5a | ||
|
|
9ec047094b | ||
|
|
d991c106c8 | ||
|
|
312fb23c89 | ||
|
|
4d7f21d44e | ||
|
|
ec25d0a7c9 | ||
|
|
2b8218deaa | ||
|
|
11119430cd | ||
|
|
9ca79232c1 | ||
|
|
9ea06c33f7 | ||
|
|
30a1dd202e | ||
|
|
809ab0b7b6 | ||
|
|
2b5db9c562 | ||
|
|
b4a886b59f | ||
|
|
07eb00722b | ||
|
|
96652b8fba | ||
|
|
df1fcf0c68 | ||
|
|
711f740d9e | ||
|
|
a0bda98c20 | ||
|
|
1c1bae35ab | ||
|
|
56c52c2cf2 | ||
|
|
740aee1a1a | ||
|
|
f0391c3280 | ||
|
|
64e48e4660 | ||
|
|
b8147bdbbd |
70
.github/workflows/python-compatibility.yaml
vendored
70
.github/workflows/python-compatibility.yaml
vendored
@@ -9,14 +9,14 @@ on:
|
||||
paths: ['pyproject.toml']
|
||||
|
||||
jobs:
|
||||
test-compatibility:
|
||||
test-dev-environment:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ['3.10.18', '3.11.13', '3.12.11', '3.13.5']
|
||||
|
||||
name: Python ${{ matrix.python-version }}
|
||||
name: Dev Environment - Python ${{ matrix.python-version }}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
@@ -55,7 +55,69 @@ jobs:
|
||||
--no-extra moondream \
|
||||
--no-extra mlx-whisper
|
||||
|
||||
- name: Verify installation
|
||||
- name: Verify dev installation
|
||||
run: |
|
||||
uv run python --version
|
||||
uv run python -c "import pipecat; print('✅ Pipecat imports successfully')"
|
||||
uv run python -c "import pipecat; print('✅ Dev environment - Pipecat imports successfully')"
|
||||
|
||||
test-user-experience:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ['3.10.18', '3.11.13', '3.12.11', '3.13.5']
|
||||
|
||||
name: User Experience - Python ${{ matrix.python-version }}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install system dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y \
|
||||
portaudio19-dev \
|
||||
libcairo2-dev \
|
||||
libgirepository1.0-dev \
|
||||
pkg-config
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v4
|
||||
with:
|
||||
version: 'latest'
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
run: |
|
||||
uv python install ${{ matrix.python-version }}
|
||||
|
||||
- name: Build local package
|
||||
run: |
|
||||
uv build
|
||||
|
||||
- name: Create test project
|
||||
run: |
|
||||
mkdir test-project
|
||||
cd test-project
|
||||
uv init --python ${{ matrix.python-version }}
|
||||
|
||||
- name: Test comprehensive extras with uv add (Python 3.10-3.12)
|
||||
if: "!startsWith(matrix.python-version, '3.13.')"
|
||||
run: |
|
||||
cd test-project
|
||||
# Use uv add with built wheel to leverage dependency management
|
||||
uv add "../dist/pipecat_ai-"*".whl[anthropic,assemblyai,asyncai,aws,aws-nova-sonic,azure,cartesia,cerebras,deepseek,daily,deepgram,elevenlabs,fal,fireworks,fish,gladia,google,grok,groq,gstreamer,heygen,inworld,koala,langchain,livekit,lmnt,local,mcp,mem0,mlx-whisper,moondream,nim,neuphonic,noisereduce,openai,openpipe,openrouter,perplexity,playht,qwen,rime,riva,runner,sambanova,sentry,local-smart-turn,remote-smart-turn,silero,simli,soniox,soundfile,speechmatics,tavus,together,tracing,ultravox,webrtc,websocket,whisper]"
|
||||
|
||||
- name: Test Python 3.13 compatible extras with uv add
|
||||
if: startsWith(matrix.python-version, '3.13.')
|
||||
run: |
|
||||
cd test-project
|
||||
# Use uv add with built wheel and Python 3.13 compatible extras
|
||||
uv add "../dist/pipecat_ai-"*".whl[anthropic,assemblyai,asyncai,aws,aws-nova-sonic,azure,cartesia,cerebras,deepseek,daily,deepgram,elevenlabs,fal,fireworks,fish,gladia,google,grok,groq,gstreamer,heygen,inworld,koala,langchain,livekit,lmnt,local,mcp,mem0,nim,neuphonic,noisereduce,openai,openpipe,openrouter,perplexity,playht,qwen,rime,riva,runner,sambanova,sentry,remote-smart-turn,silero,simli,soniox,soundfile,speechmatics,tavus,together,tracing,webrtc,websocket,whisper]"
|
||||
|
||||
- name: Verify user installation
|
||||
run: |
|
||||
cd test-project
|
||||
uv run python --version
|
||||
uv run python -c "import pipecat; print('✅ User experience - Pipecat imports successfully')"
|
||||
# Test that basic functionality works
|
||||
uv run python -c "from pipecat.pipeline.pipeline import Pipeline; print('✅ Pipeline import works')"
|
||||
|
||||
39
CHANGELOG.md
39
CHANGELOG.md
@@ -5,6 +5,42 @@ All notable changes to **Pipecat** will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## Unreleased
|
||||
|
||||
### Changed
|
||||
|
||||
- Updated `pyproject.toml` to once again pin `numba` to `>=0.61.2` in order to
|
||||
resolve package versioning issues.
|
||||
|
||||
### Other
|
||||
|
||||
- Updated `15-switch-voices.py` and `15a-switch-languages.py` examples to show
|
||||
how to enclose complex logic (e.g. `ParallelPipeline`) into a single processor
|
||||
so the main pipeline becomes simpler.
|
||||
|
||||
## [0.0.79] - 2025-08-07
|
||||
|
||||
### Changed
|
||||
|
||||
- Changed `pipecat-ai`'s `openai` dependency to `>=1.74.0,<=1.99.1` due to a
|
||||
breaking change in `openai` 1.99.2 ([commit](https://github.com/openai/openai-python/commit/657f551dbe583ffb259d987dafae12c6211fba06))
|
||||
|
||||
### Deprecated
|
||||
|
||||
- `TTSService.say()` is deprecated, push a `TTSSpeakFrame` instead. Calling
|
||||
functions directly is a discouraged pattern in Pipecat because, for example,
|
||||
it might cause issues with frame ordering.
|
||||
|
||||
- `LLMMessagesFrame` is deprecated, in favor of either:
|
||||
|
||||
- `LLMMessagesUpdateFrame` with `run_llm=True`
|
||||
- `OpenAILLMContextFrame` with desired messages in a new context
|
||||
|
||||
- `LLMUserResponseAggregator` and `LLMAssistantResponseAggregator` are
|
||||
deprecated, as they depended on the now-deprecated `LLMMessagesFrame`. Use
|
||||
`LLMUserContextAggregator` and `LLMAssistantResponseAggregator` (or
|
||||
LLM-specific subclasses thereof) instead.
|
||||
|
||||
## [0.0.78] - 2025-08-07
|
||||
|
||||
### Added
|
||||
@@ -53,7 +89,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
- Added Chinese, Japanese, Korean word timestamp support to
|
||||
`CartesiaTTSService`.
|
||||
|
||||
- Added `region` parameter to `GladiaSTTService`. Accepted values: eu-west (default), us-west.
|
||||
- Added `region` parameter to `GladiaSTTService`. Accepted values: eu-west
|
||||
(default), us-west.
|
||||
|
||||
### Changed
|
||||
|
||||
|
||||
23
README.md
23
README.md
@@ -128,7 +128,7 @@ You can get started with Pipecat running on your local machine, then move your a
|
||||
2. Install development and testing dependencies:
|
||||
|
||||
```bash
|
||||
uv sync --group dev --all-extras --no-extra krisp
|
||||
uv sync --group dev --all-extras --no-extra gstreamer --no-extra krisp --no-extra local
|
||||
```
|
||||
|
||||
3. Install the git pre-commit hooks:
|
||||
@@ -137,18 +137,25 @@ You can get started with Pipecat running on your local machine, then move your a
|
||||
uv run pre-commit install
|
||||
```
|
||||
|
||||
### Python 3.13+ Note
|
||||
### Python 3.13+ Compatibility
|
||||
|
||||
Some features require PyTorch (not yet available on Python 3.13+):
|
||||
|
||||
- `ultravox`, `local-smart-turn`, `moondream`, `mlx-whisper`
|
||||
|
||||
**For full compatibility:** Use Python 3.12
|
||||
Some features require PyTorch, which doesn't yet support Python 3.13+. Install using:
|
||||
|
||||
```bash
|
||||
uv python pin 3.12 && uv sync --group dev --all-extras --no-extra krisp
|
||||
uv sync --group dev --all-extras \
|
||||
--no-extra gstreamer \
|
||||
--no-extra krisp \
|
||||
--no-extra local \
|
||||
--no-extra local-smart-turn \
|
||||
--no-extra mlx-whisper \
|
||||
--no-extra moondream \
|
||||
--no-extra ultravox
|
||||
```
|
||||
|
||||
> **Tip:** For full compatibility, use Python 3.12: `uv python pin 3.12`
|
||||
|
||||
> **Note**: Some extras (local, gstreamer) require system dependencies. See documentation if you encounter build errors.
|
||||
|
||||
### Running tests
|
||||
|
||||
To run all tests, from the root directory:
|
||||
|
||||
@@ -29,6 +29,9 @@ CARTESIA_API_KEY=...
|
||||
DAILY_API_KEY=...
|
||||
DAILY_SAMPLE_ROOM_URL=https://...
|
||||
|
||||
# Deepgram
|
||||
DEEPGRAM_API_KEY=...
|
||||
|
||||
# ElevenLabs
|
||||
ELEVENLABS_API_KEY=...
|
||||
ELEVENLABS_VOICE_ID=...
|
||||
|
||||
@@ -9,10 +9,14 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.frames.frames import EndFrame, LLMMessagesFrame
|
||||
from pipecat.frames.frames import EndFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import (
|
||||
OpenAILLMContext,
|
||||
OpenAILLMContextFrame,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
@@ -59,7 +63,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
# Register an event handler so we can play the audio when the client joins
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
await task.queue_frames([LLMMessagesFrame(messages), EndFrame()])
|
||||
await task.queue_frames([OpenAILLMContextFrame(OpenAILLMContext(messages)), EndFrame()])
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
|
||||
@@ -15,13 +15,16 @@ from pipecat.frames.frames import (
|
||||
DataFrame,
|
||||
Frame,
|
||||
LLMFullResponseStartFrame,
|
||||
LLMMessagesFrame,
|
||||
TextFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.sync_parallel_pipeline import SyncParallelPipeline
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import (
|
||||
OpenAILLMContext,
|
||||
OpenAILLMContextFrame,
|
||||
)
|
||||
from pipecat.processors.aggregators.sentence import SentenceAggregator
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
@@ -153,7 +156,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
}
|
||||
]
|
||||
frames.append(MonthFrame(month=month))
|
||||
frames.append(LLMMessagesFrame(messages))
|
||||
frames.append(OpenAILLMContextFrame(OpenAILLMContext(messages)))
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
|
||||
@@ -15,7 +15,6 @@ from loguru import logger
|
||||
|
||||
from pipecat.frames.frames import (
|
||||
Frame,
|
||||
LLMMessagesFrame,
|
||||
OutputAudioRawFrame,
|
||||
TextFrame,
|
||||
TTSAudioRawFrame,
|
||||
@@ -25,6 +24,10 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.sync_parallel_pipeline import SyncParallelPipeline
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import (
|
||||
OpenAILLMContext,
|
||||
OpenAILLMContextFrame,
|
||||
)
|
||||
from pipecat.processors.aggregators.sentence import SentenceAggregator
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.cartesia.tts import CartesiaHttpTTSService
|
||||
@@ -137,7 +140,7 @@ async def main():
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
await task.queue_frame(LLMMessagesFrame(messages))
|
||||
await task.queue_frame(OpenAILLMContextFrame(OpenAILLMContext(messages)))
|
||||
await task.stop_when_done()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
@@ -16,13 +16,16 @@ from langchain_openai import ChatOpenAI
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.frames.frames import LLMMessagesUpdateFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
LLMAssistantContextAggregator,
|
||||
LLMUserContextAggregator,
|
||||
)
|
||||
from pipecat.processors.aggregators.openai_llm_context import (
|
||||
OpenAILLMContext,
|
||||
)
|
||||
from pipecat.processors.frameworks.langchain import LangchainProcessor
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
@@ -97,8 +100,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
)
|
||||
lc = LangchainProcessor(history_chain)
|
||||
|
||||
tma_in = LLMUserResponseAggregator()
|
||||
tma_out = LLMAssistantResponseAggregator()
|
||||
context = OpenAILLMContext()
|
||||
tma_in = LLMUserContextAggregator(context=context)
|
||||
tma_out = LLMAssistantContextAggregator(context=context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
@@ -125,11 +129,11 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
# the `LLMMessagesFrame` will be picked up by the LangchainProcessor using
|
||||
# An `OpenAILLMContextFrame` will be picked up by the LangchainProcessor using
|
||||
# only the content of the last message to inject it in the prompt defined
|
||||
# above. So no role is required here.
|
||||
messages = [({"content": "Please briefly introduce yourself to the user."})]
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
await task.queue_frames([LLMMessagesUpdateFrame(messages, run_llm=True)])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -6,9 +6,13 @@ from typing import Tuple
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from pipecat.frames.frames import AudioFrame, EndFrame, ImageFrame, LLMMessagesFrame, TextFrame
|
||||
from pipecat.frames.frames import AudioFrame, EndFrame, ImageFrame, TextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.processors.aggregators import SentenceAggregator
|
||||
from pipecat.processors.aggregators.openai_llm_context import (
|
||||
OpenAILLMContext,
|
||||
OpenAILLMContextFrame,
|
||||
)
|
||||
from pipecat.runner.daily import configure
|
||||
from pipecat.services.azure import AzureLLMService, AzureTTSService
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
@@ -79,7 +83,7 @@ async def main():
|
||||
sentence_aggregator = SentenceAggregator()
|
||||
pipeline = Pipeline([llm, sentence_aggregator, tts1], source_queue, sink_queue)
|
||||
|
||||
await source_queue.put(LLMMessagesFrame(messages))
|
||||
await source_queue.put(OpenAILLMContextFrame(OpenAILLMContext(messages)))
|
||||
await source_queue.put(EndFrame())
|
||||
await pipeline.run_pipeline()
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@ from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import Frame, TextFrame, UserImageRequestFrame
|
||||
from pipecat.frames.frames import Frame, TextFrame, TTSSpeakFrame, UserImageRequestFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
@@ -119,7 +119,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
image_requester.set_participant_id(client_id)
|
||||
|
||||
# Welcome message
|
||||
await tts.say("Hi there! Feel free to ask me what I see.")
|
||||
await task.queue_frame(TTSSpeakFrame("Hi there! Feel free to ask me what I see."))
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -11,7 +11,7 @@ from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import Frame, TextFrame, UserImageRequestFrame
|
||||
from pipecat.frames.frames import Frame, TextFrame, TTSSpeakFrame, UserImageRequestFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
@@ -123,7 +123,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
image_requester.set_participant_id(client_id)
|
||||
|
||||
# Welcome message
|
||||
await tts.say("Hi there! Feel free to ask me what I see.")
|
||||
await task.queue_frame(TTSSpeakFrame("Hi there! Feel free to ask me what I see."))
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -11,7 +11,7 @@ from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import Frame, TextFrame, UserImageRequestFrame
|
||||
from pipecat.frames.frames import Frame, TextFrame, TTSSpeakFrame, UserImageRequestFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
@@ -123,7 +123,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
image_requester.set_participant_id(client_id)
|
||||
|
||||
# Welcome message
|
||||
await tts.say("Hi there! Feel free to ask me what I see.")
|
||||
await task.queue_frame(TTSSpeakFrame("Hi there! Feel free to ask me what I see."))
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -11,7 +11,7 @@ from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import Frame, TextFrame, UserImageRequestFrame
|
||||
from pipecat.frames.frames import Frame, TextFrame, TTSSpeakFrame, UserImageRequestFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
@@ -123,7 +123,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
image_requester.set_participant_id(client_id)
|
||||
|
||||
# Welcome message
|
||||
await tts.say("Hi there! Feel free to ask me what I see.")
|
||||
await task.queue_frame(TTSSpeakFrame("Hi there! Feel free to ask me what I see."))
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -12,6 +12,7 @@ from loguru import logger
|
||||
from openai.types.chat import ChatCompletionToolParam
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import Frame
|
||||
from pipecat.pipeline.parallel_pipeline import ParallelPipeline
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -31,29 +32,54 @@ from pipecat.transports.services.daily import DailyParams
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
current_voice = "News Lady"
|
||||
class SwitchVoices(ParallelPipeline):
|
||||
def __init__(self):
|
||||
self._current_voice = "News Lady"
|
||||
|
||||
news_lady = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="bf991597-6c13-47e4-8411-91ec2de5c466", # Newslady
|
||||
)
|
||||
|
||||
async def switch_voice(params: FunctionCallParams):
|
||||
global current_voice
|
||||
current_voice = params.arguments["voice"]
|
||||
await params.result_callback(
|
||||
{
|
||||
"voice": f"You are now using your {current_voice} voice. Your responses should now be as if you were a {current_voice}."
|
||||
}
|
||||
)
|
||||
british_lady = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
barbershop_man = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="a0e99841-438c-4a64-b679-ae501e7d6091", # Barbershop Man
|
||||
)
|
||||
|
||||
async def news_lady_filter(frame) -> bool:
|
||||
return current_voice == "News Lady"
|
||||
super().__init__(
|
||||
# News Lady voice
|
||||
[FunctionFilter(self.news_lady_filter), news_lady],
|
||||
# British Reading Lady voice
|
||||
[FunctionFilter(self.british_lady_filter), british_lady],
|
||||
# Barbershop Man voice
|
||||
[FunctionFilter(self.barbershop_man_filter), barbershop_man],
|
||||
)
|
||||
|
||||
@property
|
||||
def current_voice(self):
|
||||
return self._current_voice
|
||||
|
||||
async def british_lady_filter(frame) -> bool:
|
||||
return current_voice == "British Lady"
|
||||
async def switch_voice(self, params: FunctionCallParams):
|
||||
self._current_voice = params.arguments["voice"]
|
||||
await params.result_callback(
|
||||
{
|
||||
"voice": f"You are now using your {self.current_voice} voice. Your responses should now be as if you were a {self.current_voice}."
|
||||
}
|
||||
)
|
||||
|
||||
async def news_lady_filter(self, _: Frame) -> bool:
|
||||
return self.current_voice == "News Lady"
|
||||
|
||||
async def barbershop_man_filter(frame) -> bool:
|
||||
return current_voice == "Barbershop Man"
|
||||
async def british_lady_filter(self, _: Frame) -> bool:
|
||||
return self.current_voice == "British Lady"
|
||||
|
||||
async def barbershop_man_filter(self, _: Frame) -> bool:
|
||||
return self.current_voice == "Barbershop Man"
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
@@ -83,23 +109,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
news_lady = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="bf991597-6c13-47e4-8411-91ec2de5c466", # Newslady
|
||||
)
|
||||
|
||||
british_lady = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
barbershop_man = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="a0e99841-438c-4a64-b679-ae501e7d6091", # Barbershop Man
|
||||
)
|
||||
tts = SwitchVoices()
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
llm.register_function("switch_voice", switch_voice)
|
||||
llm.register_function("switch_voice", tts.switch_voice)
|
||||
|
||||
tools = [
|
||||
ChatCompletionToolParam(
|
||||
@@ -136,14 +149,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
stt,
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
ParallelPipeline( # TTS (one of the following vocies)
|
||||
[FunctionFilter(news_lady_filter), news_lady], # News Lady voice
|
||||
[
|
||||
FunctionFilter(british_lady_filter),
|
||||
british_lady,
|
||||
], # British Reading Lady voice
|
||||
[FunctionFilter(barbershop_man_filter), barbershop_man], # Barbershop Man voice
|
||||
),
|
||||
tts, # TTS with switch voice functionality
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
@@ -165,7 +171,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
messages.append(
|
||||
{
|
||||
"role": "system",
|
||||
"content": f"Please introduce yourself to the user and let them know the voices you can do. Your initial responses should be as if you were a {current_voice}.",
|
||||
"content": f"Please introduce yourself to the user and let them know the voices you can do. Your initial responses should be as if you were a {tts.current_voice}.",
|
||||
}
|
||||
)
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@@ -13,6 +13,7 @@ from loguru import logger
|
||||
from openai.types.chat import ChatCompletionToolParam
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import Frame
|
||||
from pipecat.pipeline.parallel_pipeline import ParallelPipeline
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -32,23 +33,42 @@ from pipecat.transports.services.daily import DailyParams
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
current_language = "English"
|
||||
class SwitchLanguage(ParallelPipeline):
|
||||
def __init__(self):
|
||||
self._current_language = "English"
|
||||
|
||||
english_tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
async def switch_language(params: FunctionCallParams):
|
||||
global current_language
|
||||
current_language = params.arguments["language"]
|
||||
await params.result_callback(
|
||||
{"voice": f"Your answers from now on should be in {current_language}."}
|
||||
)
|
||||
spanish_tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="d4db5fb9-f44b-4bd1-85fa-192e0f0d75f9", # Spanish-speaking Lady
|
||||
)
|
||||
|
||||
super().__init__(
|
||||
# English
|
||||
[FunctionFilter(self.english_filter), english_tts],
|
||||
# Spanish
|
||||
[FunctionFilter(self.spanish_filter), spanish_tts],
|
||||
)
|
||||
|
||||
async def english_filter(frame) -> bool:
|
||||
return current_language == "English"
|
||||
@property
|
||||
def current_language(self):
|
||||
return self._current_language
|
||||
|
||||
async def switch_language(self, params: FunctionCallParams):
|
||||
self._current_language = params.arguments["language"]
|
||||
await params.result_callback(
|
||||
{"voice": f"Your answers from now on should be in {self.current_language}."}
|
||||
)
|
||||
|
||||
async def spanish_filter(frame) -> bool:
|
||||
return current_language == "Spanish"
|
||||
async def english_filter(self, _: Frame) -> bool:
|
||||
return self.current_language == "English"
|
||||
|
||||
async def spanish_filter(self, _: Frame) -> bool:
|
||||
return self.current_language == "Spanish"
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
@@ -80,18 +100,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
api_key=os.getenv("DEEPGRAM_API_KEY"), live_options=LiveOptions(language="multi")
|
||||
)
|
||||
|
||||
english_tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
spanish_tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="d4db5fb9-f44b-4bd1-85fa-192e0f0d75f9", # Spanish-speaking Lady
|
||||
)
|
||||
tts = SwitchLanguage()
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
llm.register_function("switch_language", switch_language)
|
||||
llm.register_function("switch_language", tts.switch_language)
|
||||
|
||||
tools = [
|
||||
ChatCompletionToolParam(
|
||||
@@ -128,10 +140,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
stt, # STT
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
ParallelPipeline( # TTS (bot will speak the chosen language)
|
||||
[FunctionFilter(english_filter), english_tts], # English
|
||||
[FunctionFilter(spanish_filter), spanish_tts], # Spanish
|
||||
),
|
||||
tts, # TTS (bot will speak the chosen language)
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
@@ -153,7 +162,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
messages.append(
|
||||
{
|
||||
"role": "system",
|
||||
"content": f"Please introduce yourself to the user and let them know the languages you speak. Your initial responses should be in {current_language}.",
|
||||
"content": f"Please introduce yourself to the user and let them know the languages you speak. Your initial responses should be in {tts.current_language}.",
|
||||
}
|
||||
)
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@@ -11,7 +11,7 @@ from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import EndFrame, LLMMessagesFrame, TTSSpeakFrame
|
||||
from pipecat.frames.frames import EndFrame, LLMMessagesAppendFrame, TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
@@ -75,23 +75,19 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
async def handle_user_idle(user_idle: UserIdleProcessor, retry_count: int) -> bool:
|
||||
if retry_count == 1:
|
||||
# First attempt: Add a gentle prompt to the conversation
|
||||
messages.append(
|
||||
{
|
||||
"role": "system",
|
||||
"content": "The user has been quiet. Politely and briefly ask if they're still there.",
|
||||
}
|
||||
)
|
||||
await user_idle.push_frame(LLMMessagesFrame(messages))
|
||||
message = {
|
||||
"role": "system",
|
||||
"content": "The user has been quiet. Politely and briefly ask if they're still there.",
|
||||
}
|
||||
await user_idle.push_frame(LLMMessagesAppendFrame([message], run_llm=True))
|
||||
return True
|
||||
elif retry_count == 2:
|
||||
# Second attempt: More direct prompt
|
||||
messages.append(
|
||||
{
|
||||
"role": "system",
|
||||
"content": "The user is still inactive. Ask if they'd like to continue our conversation.",
|
||||
}
|
||||
)
|
||||
await user_idle.push_frame(LLMMessagesFrame(messages))
|
||||
message = {
|
||||
"role": "system",
|
||||
"content": "The user is still inactive. Ask if they'd like to continue our conversation.",
|
||||
}
|
||||
await user_idle.push_frame(LLMMessagesAppendFrame([message], run_llm=True))
|
||||
return True
|
||||
else:
|
||||
# Third attempt: End the conversation
|
||||
|
||||
@@ -14,6 +14,7 @@ from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
@@ -34,7 +35,6 @@ load_dotenv(override=True)
|
||||
|
||||
|
||||
BASE_FILENAME = "/tmp/pipecat_conversation_"
|
||||
tts = None
|
||||
|
||||
|
||||
async def fetch_weather_from_api(params: FunctionCallParams):
|
||||
@@ -87,7 +87,7 @@ async def load_conversation(params: FunctionCallParams):
|
||||
logger.debug(
|
||||
f"loaded conversation from {filename}\n{json.dumps(params.context.messages, indent=4)}"
|
||||
)
|
||||
await tts.say("Ok, I've loaded that conversation.")
|
||||
await params.llm.queue_frame(TTSSpeakFrame("Ok, I've loaded that conversation."))
|
||||
except Exception as e:
|
||||
await params.result_callback({"success": False, "error": str(e)})
|
||||
|
||||
@@ -190,7 +190,7 @@ transport_params = {
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
global tts
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
|
||||
@@ -14,6 +14,7 @@ from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
@@ -88,7 +89,7 @@ async def load_conversation(params: FunctionCallParams):
|
||||
logger.debug(
|
||||
f"loaded conversation from {filename}\n{json.dumps(params.context.messages, indent=4)}"
|
||||
)
|
||||
await tts.say("Ok, I've loaded that conversation.")
|
||||
await params.llm.queue_frame(TTSSpeakFrame("Ok, I've loaded that conversation."))
|
||||
except Exception as e:
|
||||
await params.result_callback({"success": False, "error": str(e)})
|
||||
|
||||
|
||||
@@ -19,7 +19,6 @@ from pipecat.frames.frames import (
|
||||
Frame,
|
||||
FunctionCallInProgressFrame,
|
||||
FunctionCallResultFrame,
|
||||
LLMMessagesFrame,
|
||||
StartFrame,
|
||||
StartInterruptionFrame,
|
||||
StopInterruptionFrame,
|
||||
@@ -60,10 +59,6 @@ classifier_statement = "Determine if the user's statement ends with a complete t
|
||||
|
||||
|
||||
class StatementJudgeContextFilter(FrameProcessor):
|
||||
def __init__(self, notifier: BaseNotifier, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self._notifier = notifier
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
# We must not block system frames.
|
||||
@@ -71,13 +66,8 @@ class StatementJudgeContextFilter(FrameProcessor):
|
||||
await self.push_frame(frame, direction)
|
||||
return
|
||||
|
||||
# Just treat an LLMMessagesFrame as complete, no matter what.
|
||||
if isinstance(frame, LLMMessagesFrame):
|
||||
await self._notifier.notify()
|
||||
return
|
||||
|
||||
# Otherwise, we only want to handle OpenAILLMContextFrames, and only want to push a simple
|
||||
# messages frame that contains a system prompt and the most recent user messages,
|
||||
# We only want to handle OpenAILLMContextFrames, and only want to push through a simplified
|
||||
# context frame that contains a system prompt and the most recent user messages,
|
||||
# concatenated.
|
||||
if isinstance(frame, OpenAILLMContextFrame):
|
||||
logger.debug(f"Context Frame: {frame}")
|
||||
@@ -96,7 +86,7 @@ class StatementJudgeContextFilter(FrameProcessor):
|
||||
for content in message["content"]:
|
||||
if content["type"] == "text":
|
||||
user_text_messages.insert(0, content["text"])
|
||||
# If we have any user text content, push an LLMMessagesFrame
|
||||
# If we have any user text content, push a context frame with the simplified context.
|
||||
if user_text_messages:
|
||||
logger.debug(f"User text messages: {user_text_messages}")
|
||||
user_message = " ".join(reversed(user_text_messages))
|
||||
@@ -110,7 +100,7 @@ class StatementJudgeContextFilter(FrameProcessor):
|
||||
if last_assistant_message:
|
||||
messages.append(last_assistant_message)
|
||||
messages.append({"role": "user", "content": user_message})
|
||||
await self.push_frame(LLMMessagesFrame(messages))
|
||||
await self.push_frame(OpenAILLMContextFrame(OpenAILLMContext(messages)))
|
||||
|
||||
|
||||
class CompletenessCheck(FrameProcessor):
|
||||
@@ -296,7 +286,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
|
||||
# This turns the LLM context into an inference request to classify the user's speech
|
||||
# as complete or incomplete.
|
||||
statement_judge_context_filter = StatementJudgeContextFilter(notifier=notifier)
|
||||
statement_judge_context_filter = StatementJudgeContextFilter()
|
||||
|
||||
# This sends a UserStoppedSpeakingFrame and triggers the notifier event
|
||||
completeness_check = CompletenessCheck(notifier=notifier)
|
||||
@@ -316,7 +306,6 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
async def pass_only_llm_trigger_frames(frame):
|
||||
return (
|
||||
isinstance(frame, OpenAILLMContextFrame)
|
||||
or isinstance(frame, LLMMessagesFrame)
|
||||
or isinstance(frame, StartInterruptionFrame)
|
||||
or isinstance(frame, StopInterruptionFrame)
|
||||
or isinstance(frame, FunctionCallInProgressFrame)
|
||||
@@ -331,14 +320,14 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
ParallelPipeline(
|
||||
[
|
||||
# Ignore everything except an OpenAILLMContextFrame. Pass a specially constructed
|
||||
# LLMMessagesFrame to the statement classifier LLM. The only frame this
|
||||
# simplified context frame to the statement classifier LLM. The only frame this
|
||||
# sub-pipeline will output is a UserStoppedSpeakingFrame.
|
||||
statement_judge_context_filter,
|
||||
statement_llm,
|
||||
completeness_check,
|
||||
],
|
||||
[
|
||||
# Block everything except OpenAILLMContextFrame and LLMMessagesFrame
|
||||
# Block everything except frames that trigger LLM inference.
|
||||
FunctionFilter(filter=pass_only_llm_trigger_frames),
|
||||
llm,
|
||||
bot_output_gate, # Buffer all llm/tts output until notified.
|
||||
|
||||
@@ -19,7 +19,6 @@ from pipecat.frames.frames import (
|
||||
Frame,
|
||||
FunctionCallInProgressFrame,
|
||||
FunctionCallResultFrame,
|
||||
LLMMessagesFrame,
|
||||
StartFrame,
|
||||
StartInterruptionFrame,
|
||||
StopInterruptionFrame,
|
||||
@@ -266,10 +265,6 @@ Please be very concise in your responses. Unless you are explicitly asked to do
|
||||
|
||||
|
||||
class StatementJudgeContextFilter(FrameProcessor):
|
||||
def __init__(self, notifier: BaseNotifier, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self._notifier = notifier
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
# We must not block system frames.
|
||||
@@ -277,14 +272,8 @@ class StatementJudgeContextFilter(FrameProcessor):
|
||||
await self.push_frame(frame, direction)
|
||||
return
|
||||
|
||||
# Just treat an LLMMessagesFrame as complete, no matter what.
|
||||
if isinstance(frame, LLMMessagesFrame):
|
||||
await self._notifier.notify()
|
||||
return
|
||||
|
||||
# Otherwise, we only want to handle OpenAILLMContextFrames, and only want to push a simple
|
||||
# messages frame that contains a system prompt and the most recent user messages,
|
||||
# concatenated.
|
||||
# We only want to handle OpenAILLMContextFrames, and only want to push through a simplified
|
||||
# context frame that contains a system prompt and the most recent user messages,
|
||||
if isinstance(frame, OpenAILLMContextFrame):
|
||||
# Take text content from the most recent user messages.
|
||||
messages = frame.context.messages
|
||||
@@ -301,7 +290,7 @@ class StatementJudgeContextFilter(FrameProcessor):
|
||||
for content in message["content"]:
|
||||
if content["type"] == "text":
|
||||
user_text_messages.insert(0, content["text"])
|
||||
# If we have any user text content, push an LLMMessagesFrame
|
||||
# If we have any user text content, push a context frame with the simplified context.
|
||||
if user_text_messages:
|
||||
user_message = " ".join(reversed(user_text_messages))
|
||||
logger.debug(f"!!! {user_message}")
|
||||
@@ -314,7 +303,7 @@ class StatementJudgeContextFilter(FrameProcessor):
|
||||
if last_assistant_message:
|
||||
messages.append(last_assistant_message)
|
||||
messages.append({"role": "user", "content": user_message})
|
||||
await self.push_frame(LLMMessagesFrame(messages))
|
||||
await self.push_frame(OpenAILLMContextFrame(OpenAILLMContext(messages)))
|
||||
|
||||
|
||||
class CompletenessCheck(FrameProcessor):
|
||||
@@ -499,7 +488,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
|
||||
# This turns the LLM context into an inference request to classify the user's speech
|
||||
# as complete or incomplete.
|
||||
statement_judge_context_filter = StatementJudgeContextFilter(notifier=notifier)
|
||||
statement_judge_context_filter = StatementJudgeContextFilter()
|
||||
|
||||
# This sends a UserStoppedSpeakingFrame and triggers the notifier event
|
||||
completeness_check = CompletenessCheck(notifier=notifier)
|
||||
@@ -522,7 +511,6 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
async def pass_only_llm_trigger_frames(frame):
|
||||
return (
|
||||
isinstance(frame, OpenAILLMContextFrame)
|
||||
or isinstance(frame, LLMMessagesFrame)
|
||||
or isinstance(frame, StartInterruptionFrame)
|
||||
or isinstance(frame, StopInterruptionFrame)
|
||||
or isinstance(frame, FunctionCallInProgressFrame)
|
||||
@@ -542,14 +530,14 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
],
|
||||
[
|
||||
# Ignore everything except an OpenAILLMContextFrame. Pass a specially constructed
|
||||
# LLMMessagesFrame to the statement classifier LLM. The only frame this
|
||||
# simplified context frame to the statement classifier LLM. The only frame this
|
||||
# sub-pipeline will output is a UserStoppedSpeakingFrame.
|
||||
statement_judge_context_filter,
|
||||
statement_llm,
|
||||
completeness_check,
|
||||
],
|
||||
[
|
||||
# Block everything except OpenAILLMContextFrame and LLMMessagesFrame
|
||||
# Block everything except frames that trigger LLM inference.
|
||||
FunctionFilter(filter=pass_only_llm_trigger_frames),
|
||||
llm,
|
||||
bot_output_gate, # Buffer all llm/tts output until notified.
|
||||
|
||||
@@ -33,7 +33,9 @@ dependencies = [
|
||||
"pyloudnorm~=0.1.1",
|
||||
"resampy~=0.4.3",
|
||||
"soxr~=0.5.0",
|
||||
"openai>=1.74.0,<2",
|
||||
"openai>=1.74.0,<=1.99.1",
|
||||
# Pinning numba (resampy dep) to resolve a package dependency
|
||||
"numba==0.61.2",
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
|
||||
@@ -93,7 +93,7 @@ TESTS_14 = [
|
||||
("14p-function-calling-gemini-vertex-ai.py", PROMPT_WEATHER, EVAL_WEATHER),
|
||||
("14q-function-calling-qwen.py", PROMPT_WEATHER, EVAL_WEATHER),
|
||||
("14r-function-calling-aws.py", PROMPT_WEATHER, EVAL_WEATHER),
|
||||
("14v-function-calling-openai.py.py", PROMPT_WEATHER, EVAL_WEATHER),
|
||||
("14v-function-calling-openai.py", PROMPT_WEATHER, EVAL_WEATHER),
|
||||
# Currently not working.
|
||||
# ("14c-function-calling-together.py", PROMPT_WEATHER, EVAL_WEATHER),
|
||||
# ("14k-function-calling-cerebras.py", PROMPT_WEATHER, EVAL_WEATHER),
|
||||
|
||||
@@ -478,6 +478,12 @@ class TranscriptionUpdateFrame(DataFrame):
|
||||
class LLMMessagesFrame(DataFrame):
|
||||
"""Frame containing LLM messages for chat completion.
|
||||
|
||||
.. deprecated:: 0.0.79
|
||||
This class is deprecated and will be removed in a future version.
|
||||
Instead, use either:
|
||||
- `LLMMessagesUpdateFrame` with `run_llm=True`
|
||||
- `OpenAILLMContextFrame` with desired messages in a new context
|
||||
|
||||
A frame containing a list of LLM messages. Used to signal that an LLM
|
||||
service should run a chat completion and emit an LLMFullResponseStartFrame,
|
||||
TextFrames and an LLMFullResponseEndFrame. Note that the `messages`
|
||||
@@ -490,6 +496,20 @@ class LLMMessagesFrame(DataFrame):
|
||||
|
||||
messages: List[dict]
|
||||
|
||||
def __post_init__(self):
|
||||
super().__post_init__()
|
||||
import warnings
|
||||
|
||||
warnings.simplefilter("always")
|
||||
warnings.warn(
|
||||
"LLMMessagesFrame is deprecated and will be removed in a future version. "
|
||||
"Instead, use either "
|
||||
"`LLMMessagesUpdateFrame` with `run_llm=True`, or "
|
||||
"`OpenAILLMContextFrame` with desired messages in a new context",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class LLMMessagesAppendFrame(DataFrame):
|
||||
|
||||
@@ -12,6 +12,7 @@ LLM processing, and text-to-speech components in conversational AI pipelines.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import warnings
|
||||
from abc import abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from typing import Dict, List, Literal, Optional, Set
|
||||
@@ -994,6 +995,10 @@ class LLMAssistantContextAggregator(LLMContextResponseAggregator):
|
||||
class LLMUserResponseAggregator(LLMUserContextAggregator):
|
||||
"""User response aggregator that outputs LLMMessagesFrame instead of context frames.
|
||||
|
||||
.. deprecated:: 0.0.79
|
||||
This class is deprecated and will be removed in a future version.
|
||||
Use `LLMUserContextAggregator` or another LLM-specific subclass instead.
|
||||
|
||||
This aggregator extends LLMUserContextAggregator but pushes LLMMessagesFrame
|
||||
objects downstream instead of OpenAILLMContextFrame objects. This is useful
|
||||
when you need message-based output rather than context-based output.
|
||||
@@ -1013,6 +1018,12 @@ class LLMUserResponseAggregator(LLMUserContextAggregator):
|
||||
params: Configuration parameters for aggregation behavior.
|
||||
**kwargs: Additional arguments passed to parent class.
|
||||
"""
|
||||
warnings.warn(
|
||||
"LLMUserResponseAggregator is deprecated and will be removed in a future version. "
|
||||
"Use LLMUserContextAggregator or another LLM-specific subclass instead.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
super().__init__(context=OpenAILLMContext(messages), params=params, **kwargs)
|
||||
|
||||
async def _process_aggregation(self):
|
||||
@@ -1027,6 +1038,10 @@ class LLMUserResponseAggregator(LLMUserContextAggregator):
|
||||
class LLMAssistantResponseAggregator(LLMAssistantContextAggregator):
|
||||
"""Assistant response aggregator that outputs LLMMessagesFrame instead of context frames.
|
||||
|
||||
.. deprecated:: 0.0.79
|
||||
This class is deprecated and will be removed in a future version.
|
||||
Use `LLMAssistantContextAggregator` or another LLM-specific subclass instead.
|
||||
|
||||
This aggregator extends LLMAssistantContextAggregator but pushes LLMMessagesFrame
|
||||
objects downstream instead of OpenAILLMContextFrame objects. This is useful
|
||||
when you need message-based output rather than context-based output.
|
||||
@@ -1046,6 +1061,12 @@ class LLMAssistantResponseAggregator(LLMAssistantContextAggregator):
|
||||
params: Configuration parameters for aggregation behavior.
|
||||
**kwargs: Additional arguments passed to parent class.
|
||||
"""
|
||||
warnings.warn(
|
||||
"LLMAssistantResponseAggregator is deprecated and will be removed in a future version. "
|
||||
"Use LLMAssistantContextAggregator or another LLM-specific subclass instead.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
super().__init__(context=OpenAILLMContext(messages), params=params, **kwargs)
|
||||
|
||||
async def push_aggregation(self):
|
||||
|
||||
@@ -12,13 +12,14 @@ in conversational pipelines.
|
||||
"""
|
||||
|
||||
from pipecat.frames.frames import TextFrame
|
||||
from pipecat.processors.aggregators.llm_response import LLMUserResponseAggregator
|
||||
from pipecat.processors.aggregators.llm_response import LLMUserContextAggregator
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
|
||||
|
||||
class UserResponseAggregator(LLMUserResponseAggregator):
|
||||
class UserResponseAggregator(LLMUserContextAggregator):
|
||||
"""Aggregates user responses into TextFrame objects.
|
||||
|
||||
This aggregator extends LLMUserResponseAggregator to specifically handle
|
||||
This aggregator extends LLMUserContextAggregator to specifically handle
|
||||
user input by collecting text responses and outputting them as TextFrame
|
||||
objects when the aggregation is complete.
|
||||
"""
|
||||
@@ -27,9 +28,9 @@ class UserResponseAggregator(LLMUserResponseAggregator):
|
||||
"""Initialize the user response aggregator.
|
||||
|
||||
Args:
|
||||
**kwargs: Additional arguments passed to parent LLMUserResponseAggregator.
|
||||
**kwargs: Additional arguments passed to parent LLMUserContextAggregator.
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
super().__init__(context=OpenAILLMContext(), **kwargs)
|
||||
|
||||
async def push_aggregation(self):
|
||||
"""Push the aggregated user response as a TextFrame.
|
||||
|
||||
@@ -14,9 +14,9 @@ from pipecat.frames.frames import (
|
||||
Frame,
|
||||
LLMFullResponseEndFrame,
|
||||
LLMFullResponseStartFrame,
|
||||
LLMMessagesFrame,
|
||||
TextFrame,
|
||||
)
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContextFrame
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
|
||||
try:
|
||||
@@ -64,11 +64,11 @@ class LangchainProcessor(FrameProcessor):
|
||||
"""
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, LLMMessagesFrame):
|
||||
if isinstance(frame, OpenAILLMContextFrame):
|
||||
# Messages are accumulated on the context as a list of messages.
|
||||
# The last one by the human is the one we want to send to the LLM.
|
||||
logger.debug(f"Got transcription frame {frame}")
|
||||
text: str = frame.messages[-1]["content"]
|
||||
text: str = frame.context.messages[-1]["content"]
|
||||
|
||||
await self._ainvoke(text.strip())
|
||||
else:
|
||||
|
||||
@@ -448,14 +448,16 @@ class AnthropicLLMContext(OpenAILLMContext):
|
||||
system: System message content.
|
||||
"""
|
||||
super().__init__(messages=messages, tools=tools, tool_choice=tool_choice)
|
||||
self.__setup_local()
|
||||
self.system = system
|
||||
|
||||
def __setup_local(self):
|
||||
# For beta prompt caching. This is a counter that tracks the number of turns
|
||||
# we've seen above the cache threshold. We reset this when we reset the
|
||||
# messages list. We only care about this number being 0, 1, or 2. But
|
||||
# it's easiest just to treat it as a counter.
|
||||
self.turns_above_cache_threshold = 0
|
||||
|
||||
self.system = system
|
||||
return
|
||||
|
||||
@staticmethod
|
||||
def upgrade_to_anthropic(obj: OpenAILLMContext) -> "AnthropicLLMContext":
|
||||
@@ -472,6 +474,7 @@ class AnthropicLLMContext(OpenAILLMContext):
|
||||
logger.debug(f"Upgrading to Anthropic: {obj}")
|
||||
if isinstance(obj, OpenAILLMContext) and not isinstance(obj, AnthropicLLMContext):
|
||||
obj.__class__ = AnthropicLLMContext
|
||||
obj.__setup_local()
|
||||
obj._restructure_from_openai_messages()
|
||||
return obj
|
||||
|
||||
|
||||
@@ -269,9 +269,20 @@ class TTSService(AIService):
|
||||
async def say(self, text: str):
|
||||
"""Immediately speak the provided text.
|
||||
|
||||
.. deprecated:: 0.0.79
|
||||
Push a `TTSSpeakFrame` instead to ensure frame ordering is maintained.
|
||||
|
||||
Args:
|
||||
text: The text to speak.
|
||||
"""
|
||||
import warnings
|
||||
|
||||
warnings.warn(
|
||||
"`TTSService.say()` is deprecated. Push a `TTSSpeakFrame` instead.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
await self.queue_frame(TTSSpeakFrame(text))
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
|
||||
@@ -12,7 +12,7 @@ from langchain_core.language_models import FakeStreamingListLLM
|
||||
from pipecat.frames.frames import (
|
||||
LLMFullResponseEndFrame,
|
||||
LLMFullResponseStartFrame,
|
||||
LLMMessagesFrame,
|
||||
OpenAILLMContextAssistantTimestampFrame,
|
||||
TextFrame,
|
||||
TranscriptionFrame,
|
||||
UserStartedSpeakingFrame,
|
||||
@@ -21,8 +21,12 @@ from pipecat.frames.frames import (
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantAggregatorParams,
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
LLMAssistantContextAggregator,
|
||||
LLMUserContextAggregator,
|
||||
)
|
||||
from pipecat.processors.aggregators.openai_llm_context import (
|
||||
OpenAILLMContext,
|
||||
OpenAILLMContextFrame,
|
||||
)
|
||||
from pipecat.processors.frame_processor import FrameProcessor
|
||||
from pipecat.processors.frameworks.langchain import LangchainProcessor
|
||||
@@ -63,9 +67,10 @@ class TestLangchain(unittest.IsolatedAsyncioTestCase):
|
||||
proc = LangchainProcessor(chain=chain)
|
||||
self.mock_proc = self.MockProcessor("token_collector")
|
||||
|
||||
tma_in = LLMUserResponseAggregator(messages)
|
||||
tma_out = LLMAssistantResponseAggregator(
|
||||
messages, params=LLMAssistantAggregatorParams(expect_stripped_words=False)
|
||||
context = OpenAILLMContext()
|
||||
tma_in = LLMUserContextAggregator(context)
|
||||
tma_out = LLMAssistantContextAggregator(
|
||||
context, params=LLMAssistantAggregatorParams(expect_stripped_words=False)
|
||||
)
|
||||
|
||||
pipeline = Pipeline([tma_in, proc, self.mock_proc, tma_out])
|
||||
@@ -79,7 +84,8 @@ class TestLangchain(unittest.IsolatedAsyncioTestCase):
|
||||
expected_down_frames = [
|
||||
UserStartedSpeakingFrame,
|
||||
UserStoppedSpeakingFrame,
|
||||
LLMMessagesFrame,
|
||||
OpenAILLMContextFrame,
|
||||
OpenAILLMContextAssistantTimestampFrame,
|
||||
]
|
||||
await run_test(
|
||||
pipeline,
|
||||
|
||||
Reference in New Issue
Block a user