Merge branch 'main' into cutting_initial_words

This commit is contained in:
Filipi Fuchter
2025-01-29 15:36:52 -03:00
180 changed files with 6107 additions and 1692 deletions

View File

@@ -1,4 +1,4 @@
name: test
name: tests
on:
workflow_dispatch:
@@ -49,4 +49,4 @@ jobs:
- name: Test with pytest
run: |
source .venv/bin/activate
pytest --ignore-glob="*to_be_updated*" --ignore-glob=*pipeline_source* src tests
pytest

7
.pre-commit-config.yaml Normal file
View File

@@ -0,0 +1,7 @@
repos:
- repo: local
hooks:
- id: ruff-format-hook
name: Check ruff formatting
entry: sh scripts/pre-commit.sh
language: system

View File

@@ -9,9 +9,134 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added
- Added a `metadata` field to `Frame` which makes it possible to pass custom
data to all frames.
- Added `test/utils.py` inside of pipecat package.
### Fixed
- Fixed an issue where `ElevenLabsTTSService` messages would return a 1009
websocket error by increasing the max message size limit to 16MB.
- Fixed a `DailyTransport` issue that would cause events to be triggered before
join finished.
- Fixed a `PipelineTask` issue that was preventing processors to be cleaned up
after cancelling the task.
- Fixed an issue where queuing a `CancelFrame` to a pipeline task would not
cause the task to finish. However, using `PipelineTask.cancel()` is still the
recommended way to cancel a task.
### Other
- Updated all examples to use `task.cancel()` instead of pushing an `EndFrame`
when a participant leaves/disconnects. If you push an `EndFrame` this will
cause the bot to run through everything that is internally queued (which could
take seconds). Instead, if a participant disconnects there is nothing else to
be sent and therefore we should stop immediately.
## [0.0.54] - 2025-01-27
### Added
- In order to create tasks in Pipecat frame processors it is now recommended to
use `FrameProcessor.create_task()` (which uses the new
`utils.asyncio.create_task()`). It takes care of uncaught exceptions, task
cancellation handling and task management. To cancel or wait for a task there
is `FrameProcessor.cancel_task()` and `FrameProcessor.wait_for_task()`. All of
Pipecat processors have been updated accordingly. Also, when a pipeline runner
finishes, a warning about dangling tasks might appear, which indicates if any
of the created tasks was never cancelled or awaited for (using these new
functions).
- It is now possible to specify the period of the `PipelineTask` heartbeat
frames with `heartbeats_period_secs`.
- Added `DailyMeetingTokenProperties` and `DailyMeetingTokenParams` Pydantic models
for meeting token creation in `get_token` method of `DailyRESTHelper`.
- Added `enable_recording` and `geo` parameters to `DailyRoomProperties`.
- Added `RecordingsBucketConfig` to `DailyRoomProperties` to upload recordings to a custom AWS bucket.
### Changed
- Enhanced `UserIdleProcessor` with retry functionality and control over idle
monitoring via new callback signature `(processor, retry_count) -> bool`.
Updated the `17-detect-user-idle.py` to show how to use the `retry_count`.
- Add defensive error handling for `OpenAIRealtimeBetaLLMService`'s audio
truncation. Audio truncation errors during interruptions now log a warning
and allow the session to continue instead of throwing an exception.
- Modified `TranscriptProcessor` to use TTS text frames for more accurate assistant
transcripts. Assistant messages are now aggregated based on bot speaking boundaries
rather than LLM context, providing better handling of interruptions and partial
utterances.
- Updated foundational examples `28a-transcription-processor-openai.py`,
`28b-transcript-processor-anthropic.py`, and
`28c-transcription-processor-gemini.py` to use the updated
`TranscriptProcessor`.
### Fixed
- Fixed an `GeminiMultimodalLiveLLMService` issue that was preventing the user
to push initial LLM assistant messages (using `LLMMessagesAppendFrame`).
- Added missing `FrameProcessor.cleanup()` calls to `Pipeline`,
`ParallelPipeline` and `UserIdleProcessor`.
- Fixed a type error when using `voice_settings` in `ElevenLabsHttpTTSService`.
- Fixed an issue where `OpenAIRealtimeBetaLLMService` function calling resulted
in an error.
- Fixed an issue in `AudioBufferProcessor` where the last audio buffer was not
being processed, in cases where the `_user_audio_buffer` was smaller than the
buffer size.
### Performance
- Replaced audio resampling library `resampy` with `soxr`. Resampling a 2:21s
audio file from 24KHz to 16KHz took 1.41s with `resampy` and 0.031s with
`soxr` with similar audio quality.
### Other
- Added initial unit test infrastructure.
## [0.0.53] - 2025-01-18
### Added
- Added `ElevenLabsHttpTTSService` which uses EleveLabs' HTTP API instead of the
websocket one.
- Introduced pipeline frame observers. Observers can view all the frames that go
through the pipeline without the need to inject processors in the
pipeline. This can be useful, for example, to implement frame loggers or
debuggers among other things. The example
`examples/foundational/30-observer.py` shows how to add an observer to a
pipeline for debugging.
- Introduced heartbeat frames. The pipeline task can now push periodic
heartbeats down the pipeline when `enable_heartbeats=True`. Heartbeats are
system frames that are supposed to make it all the way to the end of the
pipeline. When a heartbeat frame is received the traversing time (i.e. the
time it took to go through the whole pipeline) will be displayed (with TRACE
logging) otherwise a warning will be shown. The example
`examples/foundational/31-heartbeats.py` shows how to enable heartbeats and
forces warnings to be displayed.
- Added `LLMTextFrame` and `TTSTextFrame` which should be pushed by LLM and TTS
services respectively instead of `TextFrame`s.
- Added `OpenRouter` for OpenRouter integration with an OpenAI-compatible
interface. Added foundational example `14m-function-calling-openrouter.py`.
- Added a new `WebsocketService` based class for TTS services, containing
base functions and retry logic.
@@ -48,6 +173,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Changed
- Modified `UserIdleProcessor` to start monitoring only after first
conversation activity (`UserStartedSpeakingFrame` or
`BotStartedSpeakingFrame`) instead of immediately.
- Modified `OpenAIAssistantContextAggregator` to support controlled completions
and to emit context update callbacks via `FunctionCallResultProperties`.
@@ -71,6 +200,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Fixed
- Fixed an issue where `DeepgramSTTService` was not generating metrics using
pipeline's VAD.
- Fixed `UserIdleProcessor` not properly propagating `EndFrame`s through the
pipeline.
- Fixed an issue where websocket based TTS services could incorrectly terminate
their connection due to a retry counter not resetting.
@@ -87,6 +222,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Fixed an issue where setting the voice and model for `RimeHttpTTSService`
wasn't working.
- Fixed an issue where `IdleFrameProcessor` and `UserIdleProcessor` were getting
initialized before the start of the pipeline.
## [0.0.52] - 2024-12-24
### Added

View File

@@ -2,7 +2,7 @@
 <img alt="pipecat" width="300px" height="auto" src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/pipecat.png">
</div></h1>
[![PyPI](https://img.shields.io/pypi/v/pipecat-ai)](https://pypi.org/project/pipecat-ai) [![Docs](https://img.shields.io/badge/Documentation-blue)](https://docs.pipecat.ai) [![Discord](https://img.shields.io/discord/1239284677165056021)](https://discord.gg/pipecat) <a href="https://app.commanddash.io/agent/github_pipecat-ai_pipecat"><img src="https://img.shields.io/badge/AI-Code%20Agent-EB9FDA"></a>
[![PyPI](https://img.shields.io/pypi/v/pipecat-ai)](https://pypi.org/project/pipecat-ai) ![Tests](https://github.com/pipecat-ai/pipecat/actions/workflows/tests.yaml/badge.svg) [![Docs](https://img.shields.io/badge/Documentation-blue)](https://docs.pipecat.ai) [![Discord](https://img.shields.io/discord/1239284677165056021)](https://discord.gg/pipecat) <a href="https://app.commanddash.io/agent/github_pipecat-ai_pipecat"><img src="https://img.shields.io/badge/AI-Code%20Agent-EB9FDA"></a>
Pipecat is an open source Python framework for building voice and multimodal conversational agents. It handles the complex orchestration of AI services, network transport, audio processing, and multimodal interactions, letting you focus on creating engaging experiences.
@@ -53,7 +53,7 @@ To keep things lightweight, only the core framework is included by default. If y
pip install "pipecat-ai[option,...]"
```
Available options include:
### Available services
| Category | Services | Install Command Example |
| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------- |
@@ -81,7 +81,7 @@ Here is a very basic Pipecat bot that greets a user when they join a real-time s
```python
import asyncio
from pipecat.frames.frames import EndFrame, TextFrame
from pipecat.frames.frames import TextFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.task import PipelineTask
from pipecat.pipeline.runner import PipelineRunner
@@ -122,7 +122,7 @@ async def main():
# Register an event handler to exit the application when the user leaves.
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.queue_frame(EndFrame())
await task.cancel()
# Run the pipeline task
await runner.run(task)
@@ -160,15 +160,24 @@ From the root of this repo, run the following:
```shell
pip install -r dev-requirements.txt
python -m build
```
This builds the package. To use the package locally (e.g. to run sample files), run
This will install the necessary development dependencies. Also, make sure you install the git pre-commit hooks:
```shell
pre-commit install
```
The hooks will just save you time when you submit a PR by making sure your code follows the project rules.
To use the package locally (e.g. to run sample files), run:
```shell
pip install --editable ".[option,...]"
```
The `--editable` option makes sure you don't have to run `pip install` again and you can just edit the project files locally.
If you want to use this package from another directory, you can run:
```shell
@@ -180,7 +189,7 @@ pip install "path_to_this_repo[option,...]"
From the root directory, run:
```shell
pytest --doctest-modules --ignore-glob="*to_be_updated*" --ignore-glob=*pipeline_source* src tests
pytest
```
## Setting up your editor

View File

@@ -1,9 +1,11 @@
build~=1.2.2
grpcio-tools~=1.68.1
grpcio-tools~=1.69.0
pip-tools~=7.4.1
pyright~=1.1.390
pre-commit~=4.0.1
pyright~=1.1.392
pytest~=8.3.4
ruff~=0.8.3
setuptools~=75.6.0
pytest-asyncio~=0.25.2
ruff~=0.9.1
setuptools~=75.8.0
setuptools_scm~=8.1.0
python-dotenv~=1.0.1

View File

@@ -39,10 +39,10 @@ Next, follow the steps in the README for each demo.
| [Translation Chatbot](translation-chatbot) | Listens for user speech, then translates that speech to Spanish and speaks the translation back. Demonstrates multi-participant use-cases. | Deepgram, Azure, OpenAI, Daily, Daily Prebuilt UI |
| [Moondream Chatbot](moondream-chatbot) | Demonstrates how to add vision capabilities to GPT4. **Note: works best with a GPU** | Deepgram, ElevenLabs, OpenAI, Moondream, Daily, Daily Prebuilt UI |
| [Patient intake](patient-intake) | A chatbot that can call functions in response to user input. | Deepgram, ElevenLabs, OpenAI, Daily, Daily Prebuilt UI |
| [Dialin Chatbot](dialin-chatbot) | A chatbot that connects to an incoming phone call from Daily or Twilio. | Deepgram, ElevenLabs, OpenAI, Daily, Twilio |
| [Phone Chatbot](phone-chatbot) | A chatbot that connects to PSTN/SIP phone calls, powered by Daily or Twilio. | Deepgram, ElevenLabs, OpenAI, Daily, Twilio |
| [Twilio Chatbot](twilio-chatbot) | A chatbot that connects to an incoming phone call from Twilio. | Deepgram, ElevenLabs, OpenAI, Daily, Twilio |
| [studypal](studypal) | A chatbot to have a conversation about any article on the web | |
| [WebSocket Chatbot Server](websocket-server) | A real-time websocket server that handles audio streaming and bot interactions with speech-to-text and text-to-speech capabilities | `python-websockets`, `openai`, `deepgram`, `silero-tts`, `numpy` |
| [WebSocket Chatbot Server](websocket-server) | A real-time websocket server that handles audio streaming and bot interactions with speech-to-text and text-to-speech capabilities. | Cartesia, Deepgram, OpenAI, Websockets |
> [!IMPORTANT]
> These example projects use Daily as a WebRTC transport and can be joined using their hosted Prebuilt UI.

View File

@@ -97,7 +97,7 @@ async def main():
call completion, CanonicalMetrics will send the audio buffer to Canonical for
analysis. Visit https://voice.canonical.chat to learn more.
"""
audio_buffer_processor = AudioBufferProcessor()
audio_buffer_processor = AudioBufferProcessor(num_channels=2)
canonical = CanonicalMetricsService(
audio_buffer_processor=audio_buffer_processor,
aiohttp_session=session,
@@ -105,6 +105,7 @@ async def main():
call_id=str(uuid.uuid4()),
assistant="pipecat-chatbot",
assistant_speaks_first=True,
context=context,
)
pipeline = Pipeline(
[
@@ -129,11 +130,13 @@ async def main():
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
print(f"Participant left: {participant}")
await task.queue_frame(EndFrame())
await task.cancel()
@transport.event_handler("on_call_state_updated")
async def on_call_state_updated(transport, state):
if state == "left":
# Here we don't want to cancel, we just want to finish sending
# whatever is queued, so we use an EndFrame().
await task.queue_frame(EndFrame())
runner = PipelineRunner()

View File

@@ -53,4 +53,3 @@ async def configure(aiohttp_session: aiohttp.ClientSession):
token = await daily_rest_helper.get_token(url, expiry_time)
return (url, token)
return (url, token)

View File

@@ -18,7 +18,6 @@ from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import EndFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -139,7 +138,7 @@ async def main():
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
print(f"Participant left: {participant}")
await task.queue_frame(EndFrame())
await task.cancel()
runner = PipelineRunner()

View File

@@ -79,11 +79,13 @@ async def main(room_url: str, token: str):
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.queue_frame(EndFrame())
await task.cancel()
@transport.event_handler("on_call_state_updated")
async def on_call_state_updated(transport, state):
if state == "left":
# Here we don't want to cancel, we just want to finish sending
# whatever is queued, so we use an EndFrame().
await task.queue_frame(EndFrame())
runner = PipelineRunner()

View File

@@ -5,6 +5,15 @@ import sys
from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.services.cartesia import CartesiaTTSService
from pipecat.services.openai import OpenAILLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport
load_dotenv(override=True)
logger.remove(0)
@@ -12,16 +21,6 @@ logger.add(sys.stderr, level="DEBUG")
async def main(room_url: str, token: str):
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import EndFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.services.cartesia import CartesiaTTSService
from pipecat.services.openai import OpenAILLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport
transport = DailyTransport(
room_url,
token,
@@ -79,7 +78,7 @@ async def main(room_url: str, token: str):
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.queue_frame(EndFrame())
await task.cancel()
runner = PipelineRunner()

View File

@@ -1,85 +0,0 @@
<div align="center">
 <img alt="pipecat" width="300px" height="auto" src="image.png">
</div>
# Dialin example
Example project that demonstrates how to add phone number dialin to your Pipecat bots. We include examples for both Daily (`bot_daily.py`) and Twilio (`bot_twilio.py`), depending on who you want to use as a phone vendor.
- 🔁 Transport: Daily WebRTC
- 💬 Speech-to-Text: Deepgram via Daily transport
- 🤖 LLM: GPT4-o / OpenAI
- 🔉 Text-to-Speech: ElevenLabs
#### Should I use Daily or Twilio as a vendor?
If you're starting from scratch, using Daily to provision phone numbers alongside Daily as a transport offers some convenience (such as automatic call forwarding.)
If you already have Twilio numbers and workflows that you want to connect to your Pipecat bots, there is some additional configuration required (you'll need to create a `on_dialin_ready` and use the Twilio client to trigger the forward.)
You can read more about this, as well as see respective walkthroughs in our docs.
## Setup
```shell
# Install the requirements
pip install -r requirements.txt
# Setup your env
mv env.example .env
```
## Using Daily numbers
Run `bot_runner.py` to handle incoming HTTP requests:
`python bot_runner.py --host localhost`
Then target the following URL:
`POST /daily_start_bot`
For more configuration options, please consult Daily's API documentation.
## Using Twilio numbers
As above, but target the following URL:
`POST /twilio_start_bot`
For more configuration options, please consult Twilio's API documentation.
## Deployment example
A Dockerfile is included in this demo for convenience. Here is an example of how to build and deploy your bot to [fly.io](https://fly.io).
*Please note: This demo spawns agents as subprocesses for convenience / demonstration purposes. You would likely not want to do this in production as it would limit concurrency to available system resources. For more information on how to deploy your bots using VMs, refer to the Pipecat documentation.*
### Build the docker image
`docker build -t tag:project .`
### Launch the fly project
`mv fly.example.toml fly.toml`
`fly launch` (using the included fly.toml)
### Setup your secrets on Fly
Set the necessary secrets (found in `env.example`)
`fly secrets set DAILY_API_KEY=... OPENAI_API_KEY=... ELEVENLABS_API_KEY=... ELEVENLABS_VOICE_ID=...`
If you're using Twilio as a number vendor:
`fly secrets set TWILIO_ACCOUNT_SID=... TWILIO_AUTH_TOKEN=...`
### Deploy!
`fly deploy`
## Need to do something more advanced?
This demo covers the basics of bot telephony. If you want to know more about working with PSTN / SIP, please ping us on [Discord](https://discord.gg/pipecat).

View File

@@ -13,7 +13,7 @@ from dotenv import load_dotenv
from loguru import logger
from runner import configure
from pipecat.frames.frames import EndFrame, TextFrame
from pipecat.frames.frames import TextFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineTask
@@ -53,7 +53,7 @@ async def main():
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.queue_frame(EndFrame())
await task.cancel()
await runner.run(task)

View File

@@ -14,7 +14,7 @@ from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import EndFrame, Frame, MetricsFrame
from pipecat.frames.frames import Frame, MetricsFrame
from pipecat.metrics.metrics import (
LLMUsageMetricsData,
ProcessingMetricsData,
@@ -47,9 +47,7 @@ class MetricsLogger(FrameProcessor):
elif isinstance(d, LLMUsageMetricsData):
tokens = d.value
print(
f"!!! MetricsFrame: {frame}, tokens: {
tokens.prompt_tokens}, characters: {
tokens.completion_tokens}"
f"!!! MetricsFrame: {frame}, tokens: {tokens.prompt_tokens}, characters: {tokens.completion_tokens}"
)
elif isinstance(d, TTSUsageMetricsData):
print(f"!!! MetricsFrame: {frame}, characters: {d.value}")
@@ -117,7 +115,7 @@ async def main():
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.queue_frame(EndFrame())
await task.cancel()
runner = PipelineRunner()

View File

@@ -15,10 +15,16 @@ from PIL import Image
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import EndFrame, Frame, OutputImageRawFrame, SystemFrame, TextFrame
from pipecat.frames.frames import (
BotStartedSpeakingFrame,
BotStoppedSpeakingFrame,
Frame,
OutputImageRawFrame,
TextFrame,
)
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineTask
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
from pipecat.services.cartesia import CartesiaHttpTTSService
@@ -45,7 +51,7 @@ class ImageSyncAggregator(FrameProcessor):
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)
if not isinstance(frame, SystemFrame) and direction == FrameDirection.DOWNSTREAM:
if isinstance(frame, BotStartedSpeakingFrame):
await self.push_frame(
OutputImageRawFrame(
image=self._speaking_image_bytes,
@@ -53,7 +59,8 @@ class ImageSyncAggregator(FrameProcessor):
format=self._speaking_image_format,
)
)
await self.push_frame(frame)
elif isinstance(frame, BotStoppedSpeakingFrame):
await self.push_frame(
OutputImageRawFrame(
image=self._waiting_image_bytes,
@@ -61,8 +68,8 @@ class ImageSyncAggregator(FrameProcessor):
format=self._waiting_image_format,
)
)
else:
await self.push_frame(frame)
await self.push_frame(frame)
async def main():
@@ -109,16 +116,24 @@ async def main():
pipeline = Pipeline(
[
transport.input(),
image_sync_aggregator,
context_aggregator.user(),
llm,
tts,
image_sync_aggregator,
transport.output(),
context_aggregator.assistant(),
]
)
task = PipelineTask(pipeline)
task = PipelineTask(
pipeline,
PipelineParams(
allow_interruptions=True,
enable_metrics=True,
enable_usage_metrics=True,
report_only_initial_ttfb=True,
),
)
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
@@ -128,7 +143,7 @@ async def main():
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.queue_frame(EndFrame())
await task.cancel()
runner = PipelineRunner()

View File

@@ -13,7 +13,6 @@ from dotenv import load_dotenv
from loguru import logger
from runner import configure
from pipecat.frames.frames import EndFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -94,7 +93,7 @@ async def main():
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.queue_frame(EndFrame())
await task.cancel()
runner = PipelineRunner()

View File

@@ -14,7 +14,6 @@ from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import EndFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -92,7 +91,7 @@ async def main():
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.queue_frame(EndFrame())
await task.cancel()
runner = PipelineRunner()

View File

@@ -14,7 +14,6 @@ from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import EndFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -96,7 +95,7 @@ async def main():
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.queue_frame(EndFrame())
await task.cancel()
runner = PipelineRunner()

View File

@@ -19,7 +19,7 @@ from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import EndFrame, LLMMessagesFrame
from pipecat.frames.frames import LLMMessagesFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -124,7 +124,7 @@ async def main():
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.queue_frame(EndFrame())
await task.cancel()
runner = PipelineRunner()

View File

@@ -16,7 +16,6 @@ from runner import configure
from pipecat.frames.frames import (
BotInterruptionFrame,
EndFrame,
StopInterruptionFrame,
UserStartedSpeakingFrame,
UserStoppedSpeakingFrame,
@@ -106,7 +105,7 @@ async def main():
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.queue_frame(EndFrame())
await task.cancel()
runner = PipelineRunner()

View File

@@ -14,7 +14,6 @@ from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import EndFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -91,7 +90,7 @@ async def main():
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.queue_frame(EndFrame())
await task.cancel()
runner = PipelineRunner()

View File

@@ -14,7 +14,6 @@ from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import EndFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -92,7 +91,7 @@ async def main():
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.queue_frame(EndFrame())
await task.cancel()
runner = PipelineRunner()

View File

@@ -14,14 +14,12 @@ from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import EndFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.services.openai import OpenAILLMService
from pipecat.services.playht import PlayHTHttpTTSService
from pipecat.transcriptions.language import Language
from pipecat.transports.services.daily import DailyParams, DailyTransport
load_dotenv(override=True)
@@ -94,7 +92,7 @@ async def main():
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.queue_frame(EndFrame())
await task.cancel()
runner = PipelineRunner()

View File

@@ -14,7 +14,6 @@ from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import EndFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -95,7 +94,7 @@ async def main():
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.queue_frame(EndFrame())
await task.cancel()
runner = PipelineRunner()

View File

@@ -14,7 +14,6 @@ from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import EndFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -101,7 +100,7 @@ async def main():
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.queue_frame(EndFrame())
await task.cancel()
runner = PipelineRunner()

View File

@@ -14,7 +14,6 @@ from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import EndFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -89,7 +88,7 @@ async def main():
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.queue_frame(EndFrame())
await task.cancel()
runner = PipelineRunner()

View File

@@ -15,7 +15,6 @@ from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import EndFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -99,7 +98,7 @@ async def main():
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.queue_frame(EndFrame())
await task.cancel()
runner = PipelineRunner()

View File

@@ -14,7 +14,6 @@ from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import EndFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -93,7 +92,7 @@ async def main():
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.queue_frame(EndFrame())
await task.cancel()
runner = PipelineRunner()

View File

@@ -14,7 +14,6 @@ from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import EndFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -99,7 +98,7 @@ async def main():
# Register an event handler to exit the application when the user leaves.
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.queue_frame(EndFrame())
await task.cancel()
runner = PipelineRunner()

View File

@@ -14,7 +14,6 @@ from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import EndFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -90,7 +89,7 @@ async def main():
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.queue_frame(EndFrame())
await task.cancel()
runner = PipelineRunner()

View File

@@ -14,7 +14,6 @@ from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import EndFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -105,7 +104,7 @@ async def main():
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.queue_frame(EndFrame())
await task.cancel()
runner = PipelineRunner()

View File

@@ -14,7 +14,6 @@ from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import EndFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -99,7 +98,7 @@ async def main():
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.queue_frame(EndFrame())
await task.cancel()
runner = PipelineRunner()

View File

@@ -14,7 +14,6 @@ from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import EndFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -98,7 +97,7 @@ async def main():
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.queue_frame(EndFrame())
await task.cancel()
runner = PipelineRunner()

View File

@@ -14,7 +14,6 @@ from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import EndFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -98,7 +97,7 @@ async def main():
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.queue_frame(EndFrame())
await task.cancel()
runner = PipelineRunner()

View File

@@ -14,7 +14,6 @@ from loguru import logger
from runner import configure
from pipecat.audio.filters.krisp_filter import KrispFilter
from pipecat.frames.frames import EndFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -93,7 +92,7 @@ async def main():
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.queue_frame(EndFrame())
await task.cancel()
runner = PipelineRunner()

View File

@@ -14,7 +14,6 @@ from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import EndFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -93,7 +92,7 @@ async def main():
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.queue_frame(EndFrame())
await task.cancel()
runner = PipelineRunner()

View File

@@ -14,7 +14,6 @@ from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import EndFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -85,7 +84,7 @@ async def main():
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.queue_frame(EndFrame())
await task.cancel()
runner = PipelineRunner()

View File

@@ -17,7 +17,6 @@ from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import (
EndFrame,
Frame,
InputAudioRawFrame,
LLMFullResponseEndFrame,
@@ -271,7 +270,7 @@ async def main():
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.queue_frame(EndFrame())
await task.cancel()
runner = PipelineRunner()

View File

@@ -14,7 +14,6 @@ from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import EndFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -92,7 +91,7 @@ async def main():
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.queue_frame(EndFrame())
await task.cancel()
runner = PipelineRunner()

View File

@@ -15,6 +15,7 @@ from openai.types.chat import ChatCompletionToolParam
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import TTSSpeakFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -29,11 +30,8 @@ logger.add(sys.stderr, level="DEBUG")
async def start_fetch_weather(function_name, llm, context):
# note: we can't push a frame to the LLM here. the bot
# can interrupt itself and/or cause audio overlapping glitches.
# possible question for Aleix and Chad about what the right way
# to trigger speech is, now, with the new queues/async/sync refactors.
# await llm.push_frame(TextFrame("Let me check on that."))
"""Push a frame to the LLM; this is handy when the LLM response might take a while."""
await llm.push_frame(TTSSpeakFrame("Let me check on that."))
logger.debug(f"Starting fetch_weather_from_api with function_name: {function_name}")

View File

@@ -15,6 +15,7 @@ from openai.types.chat import ChatCompletionToolParam
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import TTSSpeakFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineTask
@@ -30,11 +31,8 @@ logger.add(sys.stderr, level="DEBUG")
async def start_fetch_weather(function_name, llm, context):
# note: we can't push a frame to the LLM here. the bot
# can interrupt itself and/or cause audio overlapping glitches.
# possible question for Aleix and Chad about what the right way
# to trigger speech is, now, with the new queues/async/sync refactors.
# await llm.push_frame(TextFrame("Let me check on that."))
"""Push a frame to the LLM; this is handy when the LLM response might take a while."""
await llm.push_frame(TTSSpeakFrame("Let me check on that."))
logger.debug(f"Starting fetch_weather_from_api with function_name: {function_name}")

View File

@@ -15,6 +15,7 @@ from openai.types.chat import ChatCompletionToolParam
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import TTSSpeakFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -30,11 +31,8 @@ logger.add(sys.stderr, level="DEBUG")
async def start_fetch_weather(function_name, llm, context):
# note: we can't push a frame to the LLM here. the bot
# can interrupt itself and/or cause audio overlapping glitches.
# possible question for Aleix and Chad about what the right way
# to trigger speech is, now, with the new queues/async/sync refactors.
# await llm.push_frame(TextFrame("Let me check on that."))
"""Push a frame to the LLM; this is handy when the LLM response might take a while."""
await llm.push_frame(TTSSpeakFrame("Let me check on that."))
logger.debug(f"Starting fetch_weather_from_api with function_name: {function_name}")

View File

@@ -15,6 +15,7 @@ from openai.types.chat import ChatCompletionToolParam
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import TTSSpeakFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -30,11 +31,8 @@ logger.add(sys.stderr, level="DEBUG")
async def start_fetch_weather(function_name, llm, context):
# note: we can't push a frame to the LLM here. the bot
# can interrupt itself and/or cause audio overlapping glitches.
# possible question for Aleix and Chad about what the right way
# to trigger speech is, now, with the new queues/async/sync refactors.
# await llm.push_frame(TextFrame("Let me check on that."))
"""Push a frame to the LLM; this is handy when the LLM response might take a while."""
await llm.push_frame(TTSSpeakFrame("Let me check on that."))
logger.debug(f"Starting fetch_weather_from_api with function_name: {function_name}")

View File

@@ -15,6 +15,7 @@ from openai.types.chat import ChatCompletionToolParam
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import TTSSpeakFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -30,11 +31,8 @@ logger.add(sys.stderr, level="DEBUG")
async def start_fetch_weather(function_name, llm, context):
# note: we can't push a frame to the LLM here. the bot
# can interrupt itself and/or cause audio overlapping glitches.
# possible question for Aleix and Chad about what the right way
# to trigger speech is, now, with the new queues/async/sync refactors.
# await llm.push_frame(TextFrame("Let me check on that."))
"""Push a frame to the LLM; this is handy when the LLM response might take a while."""
await llm.push_frame(TTSSpeakFrame("Let me check on that."))
logger.debug(f"Starting fetch_weather_from_api with function_name: {function_name}")

View File

@@ -15,6 +15,7 @@ from openai.types.chat import ChatCompletionToolParam
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import TTSSpeakFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -30,11 +31,8 @@ logger.add(sys.stderr, level="DEBUG")
async def start_fetch_weather(function_name, llm, context):
# note: we can't push a frame to the LLM here. the bot
# can interrupt itself and/or cause audio overlapping glitches.
# possible question for Aleix and Chad about what the right way
# to trigger speech is, now, with the new queues/async/sync refactors.
# await llm.push_frame(TextFrame("Let me check on that."))
"""Push a frame to the LLM; this is handy when the LLM response might take a while."""
await llm.push_frame(TTSSpeakFrame("Let me check on that."))
logger.debug(f"Starting fetch_weather_from_api with function_name: {function_name}")

View File

@@ -15,6 +15,7 @@ from openai.types.chat import ChatCompletionToolParam
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import TTSSpeakFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -30,11 +31,8 @@ logger.add(sys.stderr, level="DEBUG")
async def start_fetch_weather(function_name, llm, context):
# note: we can't push a frame to the LLM here. the bot
# can interrupt itself and/or cause audio overlapping glitches.
# possible question for Aleix and Chad about what the right way
# to trigger speech is, now, with the new queues/async/sync refactors.
# await llm.push_frame(TextFrame("Let me check on that."))
"""Push a frame to the LLM; this is handy when the LLM response might take a while."""
await llm.push_frame(TTSSpeakFrame("Let me check on that."))
logger.debug(f"Starting fetch_weather_from_api with function_name: {function_name}")

View File

@@ -15,6 +15,7 @@ from openai.types.chat import ChatCompletionToolParam
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import TTSSpeakFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -30,11 +31,8 @@ logger.add(sys.stderr, level="DEBUG")
async def start_fetch_weather(function_name, llm, context):
# note: we can't push a frame to the LLM here. the bot
# can interrupt itself and/or cause audio overlapping glitches.
# possible question for Aleix and Chad about what the right way
# to trigger speech is, now, with the new queues/async/sync refactors.
# await llm.push_frame(TextFrame("Let me check on that."))
"""Push a frame to the LLM; this is handy when the LLM response might take a while."""
await llm.push_frame(TTSSpeakFrame("Let me check on that."))
logger.debug(f"Starting fetch_weather_from_api with function_name: {function_name}")
@@ -95,7 +93,7 @@ async def main():
messages = [
{
"role": "system",
"content": """You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way.
"content": """You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way.
You have one functions available:

View File

@@ -15,6 +15,7 @@ from openai.types.chat import ChatCompletionToolParam
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import TTSSpeakFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -30,11 +31,8 @@ logger.add(sys.stderr, level="DEBUG")
async def start_fetch_weather(function_name, llm, context):
# note: we can't push a frame to the LLM here. the bot
# can interrupt itself and/or cause audio overlapping glitches.
# possible question for Aleix and Chad about what the right way
# to trigger speech is, now, with the new queues/async/sync refactors.
# await llm.push_frame(TextFrame("Let me check on that."))
"""Push a frame to the LLM; this is handy when the LLM response might take a while."""
await llm.push_frame(TTSSpeakFrame("Let me check on that."))
logger.debug(f"Starting fetch_weather_from_api with function_name: {function_name}")
@@ -95,7 +93,7 @@ async def main():
messages = [
{
"role": "system",
"content": """You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way.
"content": """You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way.
You have one functions available:

View File

@@ -15,6 +15,7 @@ from openai.types.chat import ChatCompletionToolParam
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import TTSSpeakFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -30,11 +31,8 @@ logger.add(sys.stderr, level="DEBUG")
async def start_fetch_weather(function_name, llm, context):
# note: we can't push a frame to the LLM here. the bot
# can interrupt itself and/or cause audio overlapping glitches.
# possible question for Aleix and Chad about what the right way
# to trigger speech is, now, with the new queues/async/sync refactors.
# await llm.push_frame(TextFrame("Let me check on that."))
"""Push a frame to the LLM; this is handy when the LLM response might take a while."""
await llm.push_frame(TTSSpeakFrame("Let me check on that."))
logger.debug(f"Starting fetch_weather_from_api with function_name: {function_name}")

View File

@@ -14,7 +14,7 @@ from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMMessagesFrame
from pipecat.frames.frames import EndFrame, LLMMessagesFrame, TTSSpeakFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -63,16 +63,36 @@ async def main():
context = OpenAILLMContext(messages)
context_aggregator = llm.create_context_aggregator(context)
async def user_idle_callback(user_idle: UserIdleProcessor):
messages.append(
{
"role": "system",
"content": "Ask the user if they are still there and try to prompt for some input, but be short.",
}
)
await user_idle.push_frame(LLMMessagesFrame(messages))
async def handle_user_idle(user_idle: UserIdleProcessor, retry_count: int) -> bool:
if retry_count == 1:
# First attempt: Add a gentle prompt to the conversation
messages.append(
{
"role": "system",
"content": "The user has been quiet. Politely and briefly ask if they're still there.",
}
)
await user_idle.push_frame(LLMMessagesFrame(messages))
return True
elif retry_count == 2:
# Second attempt: More direct prompt
messages.append(
{
"role": "system",
"content": "The user is still inactive. Ask if they'd like to continue our conversation.",
}
)
await user_idle.push_frame(LLMMessagesFrame(messages))
return True
else:
# Third attempt: End the conversation
await user_idle.push_frame(
TTSSpeakFrame("It seems like you're busy right now. Have a nice day!")
)
await task.queue_frame(EndFrame())
return False
user_idle = UserIdleProcessor(callback=user_idle_callback, timeout=5.0)
user_idle = UserIdleProcessor(callback=handle_user_idle, timeout=5.0)
pipeline = Pipeline(
[

View File

@@ -14,7 +14,6 @@ from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import EndFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -120,7 +119,7 @@ async def main():
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.queue_frame(EndFrame())
await task.cancel()
runner = PipelineRunner()

View File

@@ -169,8 +169,7 @@ class OutputGate(FrameProcessor):
self._gate_task = self.get_event_loop().create_task(self._gate_task_handler())
async def _stop(self):
self._gate_task.cancel()
await self._gate_task
await self.cancel_task(self._gate_task)
async def _gate_task_handler(self):
while True:

View File

@@ -101,12 +101,12 @@ HIGH PRIORITY SIGNALS:
Examples:
# Complete Wh-question
[{"role": "assistant", "content": "I can help you learn."},
[{"role": "assistant", "content": "I can help you learn."},
{"role": "user", "content": "What's the fastest way to learn Spanish"}]
Output: YES
# Complete Yes/No question despite STT error
[{"role": "assistant", "content": "I know about planets."},
[{"role": "assistant", "content": "I know about planets."},
{"role": "user", "content": "Is is Jupiter the biggest planet"}]
Output: YES
@@ -118,12 +118,12 @@ Output: YES
Examples:
# Direct instruction
[{"role": "assistant", "content": "I can explain many topics."},
[{"role": "assistant", "content": "I can explain many topics."},
{"role": "user", "content": "Tell me about black holes"}]
Output: YES
# Action demand
[{"role": "assistant", "content": "I can help with math."},
[{"role": "assistant", "content": "I can help with math."},
{"role": "user", "content": "Solve this equation x plus 5 equals 12"}]
Output: YES
@@ -134,12 +134,12 @@ Output: YES
Examples:
# Specific answer
[{"role": "assistant", "content": "What's your favorite color?"},
[{"role": "assistant", "content": "What's your favorite color?"},
{"role": "user", "content": "I really like blue"}]
Output: YES
# Option selection
[{"role": "assistant", "content": "Would you prefer morning or evening?"},
[{"role": "assistant", "content": "Would you prefer morning or evening?"},
{"role": "user", "content": "Morning"}]
Output: YES
@@ -153,17 +153,17 @@ MEDIUM PRIORITY SIGNALS:
Examples:
# Self-correction reaching completion
[{"role": "assistant", "content": "What would you like to know?"},
[{"role": "assistant", "content": "What would you like to know?"},
{"role": "user", "content": "Tell me about... no wait, explain how rainbows form"}]
Output: YES
# Topic change with complete thought
[{"role": "assistant", "content": "The weather is nice today."},
[{"role": "assistant", "content": "The weather is nice today."},
{"role": "user", "content": "Actually can you tell me who invented the telephone"}]
Output: YES
# Mid-sentence completion
[{"role": "assistant", "content": "Hello I'm ready."},
[{"role": "assistant", "content": "Hello I'm ready."},
{"role": "user", "content": "What's the capital of? France"}]
Output: YES
@@ -175,12 +175,12 @@ Output: YES
Examples:
# Acknowledgment
[{"role": "assistant", "content": "Should we talk about history?"},
[{"role": "assistant", "content": "Should we talk about history?"},
{"role": "user", "content": "Sure"}]
Output: YES
# Disagreement with completion
[{"role": "assistant", "content": "Is that what you meant?"},
[{"role": "assistant", "content": "Is that what you meant?"},
{"role": "user", "content": "No not really"}]
Output: YES
@@ -194,12 +194,12 @@ LOW PRIORITY SIGNALS:
Examples:
# Word repetition but complete
[{"role": "assistant", "content": "I can help with that."},
[{"role": "assistant", "content": "I can help with that."},
{"role": "user", "content": "What what is the time right now"}]
Output: YES
# Missing punctuation but complete
[{"role": "assistant", "content": "I can explain that."},
[{"role": "assistant", "content": "I can explain that."},
{"role": "user", "content": "Please tell me how computers work"}]
Output: YES
@@ -211,12 +211,12 @@ Output: YES
Examples:
# Filler words but complete
[{"role": "assistant", "content": "What would you like to know?"},
[{"role": "assistant", "content": "What would you like to know?"},
{"role": "user", "content": "Um uh how do airplanes fly"}]
Output: YES
# Thinking pause but incomplete
[{"role": "assistant", "content": "I can explain anything."},
[{"role": "assistant", "content": "I can explain anything."},
{"role": "user", "content": "Well um I want to know about the"}]
Output: NO
@@ -241,17 +241,17 @@ DECISION RULES:
Examples:
# Incomplete despite corrections
[{"role": "assistant", "content": "What would you like to know about?"},
[{"role": "assistant", "content": "What would you like to know about?"},
{"role": "user", "content": "Can you tell me about"}]
Output: NO
# Complete despite multiple artifacts
[{"role": "assistant", "content": "I can help you learn."},
[{"role": "assistant", "content": "I can help you learn."},
{"role": "user", "content": "How do you I mean what's the best way to learn programming"}]
Output: YES
# Trailing off incomplete
[{"role": "assistant", "content": "I can explain anything."},
[{"role": "assistant", "content": "I can explain anything."},
{"role": "user", "content": "I was wondering if you could tell me why"}]
Output: NO
"""
@@ -374,8 +374,7 @@ class OutputGate(FrameProcessor):
self._gate_task = self.get_event_loop().create_task(self._gate_task_handler())
async def _stop(self):
self._gate_task.cancel()
await self._gate_task
await self.cancel_task(self._gate_task)
async def _gate_task_handler(self):
while True:

View File

@@ -44,9 +44,7 @@ from pipecat.processors.aggregators.openai_llm_context import (
)
from pipecat.processors.filters.function_filter import FunctionFilter
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
from pipecat.processors.user_idle_processor import UserIdleProcessor
from pipecat.services.cartesia import CartesiaTTSService
from pipecat.services.deepgram import DeepgramSTTService
from pipecat.services.google import GoogleLLMContext, GoogleLLMService
from pipecat.sync.base_notifier import BaseNotifier
from pipecat.sync.event_notifier import EventNotifier
@@ -440,11 +438,11 @@ class CompletenessCheck(FrameProcessor):
if isinstance(frame, UserStartedSpeakingFrame):
if self._idle_task:
self._idle_task.cancel()
await self.cancel_task(self._idle_task)
elif isinstance(frame, TextFrame) and frame.text.startswith("YES"):
logger.debug("Completeness check YES")
if self._idle_task:
self._idle_task.cancel()
await self.cancel_task(self._idle_task)
await self.push_frame(UserStoppedSpeakingFrame())
await self._audio_accumulator.reset()
await self._notifier.notify()
@@ -602,8 +600,7 @@ class OutputGate(FrameProcessor):
self._gate_task = self.get_event_loop().create_task(self._gate_task_handler())
async def _stop(self):
self._gate_task.cancel()
await self._gate_task
await self.cancel_task(self._gate_task)
async def _gate_task_handler(self):
while True:

View File

@@ -15,6 +15,7 @@ from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.audio.vad.vad_analyzer import VADParams
from pipecat.frames.frames import LLMMessagesAppendFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -71,6 +72,21 @@ async def main():
),
)
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
await task.queue_frames(
[
LLMMessagesAppendFrame(
messages=[
{
"role": "assistant",
"content": "Greet the user.",
}
]
)
]
)
runner = PipelineRunner()
await runner.run(task)

View File

@@ -15,7 +15,6 @@ from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.audio.vad.vad_analyzer import VADParams
from pipecat.frames.frames import EndFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -124,7 +123,7 @@ async def main():
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
print(f"Participant left: {participant}")
await task.queue_frame(EndFrame())
await task.cancel()
runner = PipelineRunner()

View File

@@ -7,7 +7,7 @@
import asyncio
import os
import sys
from typing import List
from typing import List, Optional
import aiohttp
from dotenv import load_dotenv
@@ -15,7 +15,7 @@ from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import EndFrame, TranscriptionMessage, TranscriptionUpdateFrame
from pipecat.frames.frames import TranscriptionMessage, TranscriptionUpdateFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -33,13 +33,49 @@ logger.add(sys.stderr, level="DEBUG")
class TranscriptHandler:
"""Simple handler to demonstrate transcript processing.
"""Handles real-time transcript processing and output.
Maintains a list of conversation messages and logs them with timestamps.
Maintains a list of conversation messages and outputs them either to a log
or to a file as they are received. Each message includes its timestamp and role.
Attributes:
messages: List of all processed transcript messages
output_file: Optional path to file where transcript is saved. If None, outputs to log only.
"""
def __init__(self):
def __init__(self, output_file: Optional[str] = None):
"""Initialize handler with optional file output.
Args:
output_file: Path to output file. If None, outputs to log only.
"""
self.messages: List[TranscriptionMessage] = []
self.output_file: Optional[str] = output_file
logger.debug(
f"TranscriptHandler initialized {'with output_file=' + output_file if output_file else 'with log output only'}"
)
async def save_message(self, message: TranscriptionMessage):
"""Save a single transcript message.
Outputs the message to the log and optionally to a file.
Args:
message: The message to save
"""
timestamp = f"[{message.timestamp}] " if message.timestamp else ""
line = f"{timestamp}{message.role}: {message.content}"
# Always log the message
logger.info(f"Transcript: {line}")
# Optionally write to file
if self.output_file:
try:
with open(self.output_file, "a", encoding="utf-8") as f:
f.write(line + "\n")
except Exception as e:
logger.error(f"Error saving transcript message to file: {e}")
async def on_transcript_update(
self, processor: TranscriptProcessor, frame: TranscriptionUpdateFrame
@@ -50,13 +86,11 @@ class TranscriptHandler:
processor: The TranscriptProcessor that emitted the update
frame: TranscriptionUpdateFrame containing new messages
"""
self.messages.extend(frame.messages)
logger.debug(f"Received transcript update with {len(frame.messages)} new messages")
# Log the new messages
logger.info("New transcript messages:")
for msg in frame.messages:
timestamp = f"[{msg.timestamp}] " if msg.timestamp else ""
logger.info(f"{timestamp}{msg.role}: {msg.content}")
self.messages.append(msg)
await self.save_message(msg)
async def main():
@@ -99,7 +133,8 @@ async def main():
# Create transcript processor and handler
transcript = TranscriptProcessor()
transcript_handler = TranscriptHandler()
transcript_handler = TranscriptHandler() # Output to log only
# transcript_handler = TranscriptHandler(output_file="transcript.txt") # Output to file and log
pipeline = Pipeline(
[
@@ -110,8 +145,8 @@ async def main():
llm, # LLM
tts, # TTS
transport.output(), # Transport bot output
context_aggregator.assistant(), # Assistant spoken responses
transcript.assistant(), # Assistant transcripts
context_aggregator.assistant(), # Assistant spoken responses
]
)
@@ -130,7 +165,8 @@ async def main():
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.queue_frame(EndFrame())
# Stop the pipeline immediately when the participant leaves
await task.cancel()
runner = PipelineRunner()

View File

@@ -7,7 +7,7 @@
import asyncio
import os
import sys
from typing import List
from typing import List, Optional
import aiohttp
from dotenv import load_dotenv
@@ -15,7 +15,7 @@ from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import EndFrame, TranscriptionMessage, TranscriptionUpdateFrame
from pipecat.frames.frames import TranscriptionMessage, TranscriptionUpdateFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -33,13 +33,49 @@ logger.add(sys.stderr, level="DEBUG")
class TranscriptHandler:
"""Simple handler to demonstrate transcript processing.
"""Handles real-time transcript processing and output.
Maintains a list of conversation messages and logs them with timestamps.
Maintains a list of conversation messages and outputs them either to a log
or to a file as they are received. Each message includes its timestamp and role.
Attributes:
messages: List of all processed transcript messages
output_file: Optional path to file where transcript is saved. If None, outputs to log only.
"""
def __init__(self):
def __init__(self, output_file: Optional[str] = None):
"""Initialize handler with optional file output.
Args:
output_file: Path to output file. If None, outputs to log only.
"""
self.messages: List[TranscriptionMessage] = []
self.output_file: Optional[str] = output_file
logger.debug(
f"TranscriptHandler initialized {'with output_file=' + output_file if output_file else 'with log output only'}"
)
async def save_message(self, message: TranscriptionMessage):
"""Save a single transcript message.
Outputs the message to the log and optionally to a file.
Args:
message: The message to save
"""
timestamp = f"[{message.timestamp}] " if message.timestamp else ""
line = f"{timestamp}{message.role}: {message.content}"
# Always log the message
logger.info(f"Transcript: {line}")
# Optionally write to file
if self.output_file:
try:
with open(self.output_file, "a", encoding="utf-8") as f:
f.write(line + "\n")
except Exception as e:
logger.error(f"Error saving transcript message to file: {e}")
async def on_transcript_update(
self, processor: TranscriptProcessor, frame: TranscriptionUpdateFrame
@@ -50,13 +86,11 @@ class TranscriptHandler:
processor: The TranscriptProcessor that emitted the update
frame: TranscriptionUpdateFrame containing new messages
"""
self.messages.extend(frame.messages)
logger.debug(f"Received transcript update with {len(frame.messages)} new messages")
# Log the new messages
logger.info("New transcript messages:")
for msg in frame.messages:
timestamp = f"[{msg.timestamp}] " if msg.timestamp else ""
logger.info(f"{timestamp}{msg.role}: {msg.content}")
self.messages.append(msg)
await self.save_message(msg)
async def main():
@@ -99,7 +133,8 @@ async def main():
# Create transcript processor and handler
transcript = TranscriptProcessor()
transcript_handler = TranscriptHandler()
transcript_handler = TranscriptHandler() # Output to log only
# transcript_handler = TranscriptHandler(output_file="transcript.txt") # Output to file and log
pipeline = Pipeline(
[
@@ -110,8 +145,8 @@ async def main():
llm, # LLM
tts, # TTS
transport.output(), # Transport bot output
context_aggregator.assistant(), # Assistant spoken responses
transcript.assistant(), # Assistant transcripts
context_aggregator.assistant(), # Assistant spoken responses
]
)
@@ -130,7 +165,8 @@ async def main():
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.queue_frame(EndFrame())
# Stop the pipeline immediately when the participant leaves
await task.cancel()
runner = PipelineRunner()

View File

@@ -7,7 +7,7 @@
import asyncio
import os
import sys
from typing import List
from typing import List, Optional
import aiohttp
from dotenv import load_dotenv
@@ -15,7 +15,7 @@ from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import EndFrame, TranscriptionMessage, TranscriptionUpdateFrame
from pipecat.frames.frames import TranscriptionMessage, TranscriptionUpdateFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -34,13 +34,49 @@ logger.add(sys.stderr, level="DEBUG")
class TranscriptHandler:
"""Simple handler to demonstrate transcript processing.
"""Handles real-time transcript processing and output.
Maintains a list of conversation messages and logs them with timestamps.
Maintains a list of conversation messages and outputs them either to a log
or to a file as they are received. Each message includes its timestamp and role.
Attributes:
messages: List of all processed transcript messages
output_file: Optional path to file where transcript is saved. If None, outputs to log only.
"""
def __init__(self):
def __init__(self, output_file: Optional[str] = None):
"""Initialize handler with optional file output.
Args:
output_file: Path to output file. If None, outputs to log only.
"""
self.messages: List[TranscriptionMessage] = []
self.output_file: Optional[str] = output_file
logger.debug(
f"TranscriptHandler initialized {'with output_file=' + output_file if output_file else 'with log output only'}"
)
async def save_message(self, message: TranscriptionMessage):
"""Save a single transcript message.
Outputs the message to the log and optionally to a file.
Args:
message: The message to save
"""
timestamp = f"[{message.timestamp}] " if message.timestamp else ""
line = f"{timestamp}{message.role}: {message.content}"
# Always log the message
logger.info(f"Transcript: {line}")
# Optionally write to file
if self.output_file:
try:
with open(self.output_file, "a", encoding="utf-8") as f:
f.write(line + "\n")
except Exception as e:
logger.error(f"Error saving transcript message to file: {e}")
async def on_transcript_update(
self, processor: TranscriptProcessor, frame: TranscriptionUpdateFrame
@@ -51,13 +87,11 @@ class TranscriptHandler:
processor: The TranscriptProcessor that emitted the update
frame: TranscriptionUpdateFrame containing new messages
"""
self.messages.extend(frame.messages)
logger.debug(f"Received transcript update with {len(frame.messages)} new messages")
# Log the new messages
logger.info("New transcript messages:")
for msg in frame.messages:
timestamp = f"[{msg.timestamp}] " if msg.timestamp else ""
logger.info(f"{timestamp}{msg.role}: {msg.content}")
self.messages.append(msg)
await self.save_message(msg)
async def main():
@@ -102,7 +136,8 @@ async def main():
# Create transcript processor and handler
transcript = TranscriptProcessor()
transcript_handler = TranscriptHandler()
transcript_handler = TranscriptHandler() # Output to log only
# transcript_handler = TranscriptHandler(output_file="transcript.txt") # Output to file and log
pipeline = Pipeline(
[
@@ -113,8 +148,8 @@ async def main():
llm, # LLM
tts, # TTS
transport.output(), # Transport bot output
context_aggregator.assistant(), # Assistant spoken responses
transcript.assistant(), # Assistant transcripts
context_aggregator.assistant(), # Assistant spoken responses
]
)
@@ -140,7 +175,8 @@ async def main():
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.queue_frame(EndFrame())
# Stop the pipeline immediately when the participant leaves
await task.cancel()
runner = PipelineRunner()

View File

@@ -0,0 +1,180 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import os
import sys
import aiohttp
from dotenv import load_dotenv
from loguru import logger
from runner import configure
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import (
BotStartedSpeakingFrame,
BotStoppedSpeakingFrame,
Frame,
LLMFullResponseEndFrame,
LLMFullResponseStartFrame,
LLMTextFrame,
StartInterruptionFrame,
)
from pipecat.observers.base_observer import BaseObserver
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
from pipecat.services.cartesia import CartesiaTTSService
from pipecat.services.openai import OpenAILLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport
load_dotenv(override=True)
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
class DebugObserver(BaseObserver):
"""Observer to log interruptions and bot speaking events to the console.
Logs all frame instances of:
- StartInterruptionFrame
- BotStartedSpeakingFrame
- BotStoppedSpeakingFrame
This allows you to see the frame flow from processor to processor through the pipeline for these frames.
Log format: [EVENT TYPE]: [source processor] → [destination processor] at [timestamp]s
"""
async def on_push_frame(
self,
src: FrameProcessor,
dst: FrameProcessor,
frame: Frame,
direction: FrameDirection,
timestamp: int,
):
# Convert timestamp to seconds for readability
time_sec = timestamp / 1_000_000_000
# Create direction arrow
arrow = "" if direction == FrameDirection.DOWNSTREAM else ""
if isinstance(frame, StartInterruptionFrame):
logger.info(f"⚡ INTERRUPTION START: {src} {arrow} {dst} at {time_sec:.2f}s")
elif isinstance(frame, BotStartedSpeakingFrame):
logger.info(f"🤖 BOT START SPEAKING: {src} {arrow} {dst} at {time_sec:.2f}s")
elif isinstance(frame, BotStoppedSpeakingFrame):
logger.info(f"🤖 BOT STOP SPEAKING: {src} {arrow} {dst} at {time_sec:.2f}s")
class LLMLogObserver(BaseObserver):
"""Observer to log LLM activity to the console.
Logs all frame instances of:
- LLMFullResponseStartFrame (only from LLM service)
- LLMTextFrame
- LLMFullResponseEndFrame (only from LLM service)
This allows you to track when the LLM starts responding, what it generates, and when it finishes.
Log format: [LLM EVENT]: [details] at [timestamp]s
"""
async def on_push_frame(
self,
src: FrameProcessor,
dst: FrameProcessor,
frame: Frame,
direction: FrameDirection,
timestamp: int,
):
time_sec = timestamp / 1_000_000_000
# Only log start/end frames from OpenAILLMService
if isinstance(frame, (LLMFullResponseStartFrame, LLMFullResponseEndFrame)):
if isinstance(src, OpenAILLMService):
event = "START" if isinstance(frame, LLMFullResponseStartFrame) else "END"
logger.info(f"🧠 LLM {event} RESPONSE at {time_sec:.2f}s")
# Log all LLMTextFrames
elif isinstance(frame, LLMTextFrame):
logger.info(f"🧠 LLM GENERATING: {frame.text!r} at {time_sec:.2f}s")
async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
transport = DailyTransport(
room_url,
token,
"Respond bot",
DailyParams(
audio_out_enabled=True,
transcription_enabled=True,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
),
)
tts = CartesiaTTSService(
api_key=os.getenv("CARTESIA_API_KEY"),
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
)
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
messages = [
{
"role": "system",
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
},
]
context = OpenAILLMContext(messages)
context_aggregator = llm.create_context_aggregator(context)
pipeline = Pipeline(
[
transport.input(), # Transport user input
context_aggregator.user(), # User responses
llm, # LLM
tts, # TTS
transport.output(), # Transport bot output
context_aggregator.assistant(), # Assistant spoken responses
]
)
task = PipelineTask(
pipeline,
PipelineParams(
allow_interruptions=True,
enable_metrics=True,
enable_usage_metrics=True,
report_only_initial_ttfb=True,
observers=[DebugObserver(), LLMLogObserver()],
),
)
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
await transport.capture_participant_transcription(participant["id"])
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([context_aggregator.user().get_context_frame()])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.cancel()
runner = PipelineRunner()
await runner.run(task)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,130 @@
#
# Copyright (c) 2024, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import os
import sys
from pathlib import Path
import aiohttp
from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import Frame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
from pipecat.services.cartesia import CartesiaTTSService
from pipecat.services.deepgram import DeepgramSTTService
from pipecat.services.google import GoogleLLMService, LLMSearchResponseFrame
from pipecat.transports.services.daily import DailyParams, DailyTransport
sys.path.append(str(Path(__file__).parent.parent))
from runner import configure
load_dotenv(override=True)
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
# Function handlers for the LLM
search_tool = {"google_search_retrieval": {}}
tools = [search_tool]
system_instruction = """
You are an expert at providing the most recent news from any place. Your responses will be converted to audio, so avoid using special characters or overly complex formatting.
Always use the google search API to retrieve the latest news. You must also use it to check which day is today.
You can:
- Use the Google search API to check the current date.
- Provide the most recent and relevant news from any place by using the google search API.
- Answer any questions the user may have, ensuring your responses are accurate and concise.
Start each interaction by asking the user about which place they would like to know the information.
"""
class LLMSearchLoggerProcessor(FrameProcessor):
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)
if isinstance(frame, LLMSearchResponseFrame):
print(f"LLMSearchLoggerProcessor: {frame}")
await self.push_frame(frame)
async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
transport = DailyTransport(
room_url,
token,
"Latest news!",
DailyParams(
audio_out_enabled=True,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
vad_audio_passthrough=True,
),
)
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
tts = CartesiaTTSService(
api_key=os.getenv("CARTESIA_API_KEY"),
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
)
# Initialize the Gemini Multimodal Live model
llm = GoogleLLMService(
api_key=os.getenv("GOOGLE_API_KEY"),
system_instruction=system_instruction,
tools=tools,
)
context = OpenAILLMContext(
[
{
"role": "user",
"content": "Start by greeting the user warmly, introducing yourself, and mentioning the current day. Be friendly and engaging to set a positive tone for the interaction.",
}
],
)
context_aggregator = llm.create_context_aggregator(context)
llm_search_logger = LLMSearchLoggerProcessor()
pipeline = Pipeline(
[
transport.input(),
stt,
context_aggregator.user(),
llm,
llm_search_logger,
tts,
transport.output(),
context_aggregator.assistant(),
]
)
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
await task.queue_frames([context_aggregator.user().get_context_frame()])
runner = PipelineRunner()
await runner.run(task)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,43 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import sys
from loguru import logger
from pipecat.frames.frames import Frame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
class NullProcessor(FrameProcessor):
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)
async def main():
"""This test shows heartbeat monitoring by displaying a warning when
heartbeats are not received.
"""
pipeline = Pipeline([NullProcessor()])
task = PipelineTask(pipeline, PipelineParams(enable_heartbeats=True))
runner = PipelineRunner()
await runner.run(task)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -67,8 +67,8 @@ talking_frame = SpriteFrame(images=sprites)
class TalkingAnimation(FrameProcessor):
"""This class starts a talking animation when it receives an first AudioFrame,
and then returns to a "quiet" sprite when it sees a TTSStoppedFrame.
"""This class starts a talking animation when it receives an first BotStartedSpeakingFrame,
and then returns to a "quiet" sprite when it sees a BotStoppedSpeakingFrame.
"""
def __init__(self):

51
examples/news-chatbot/.gitignore vendored Normal file
View File

@@ -0,0 +1,51 @@
# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
build/
dist/
*.egg-info/
.installed.cfg
*.egg
.pytest_cache/
.coverage
.coverage.*
.env
.venv
env/
venv/
ENV/
.mypy_cache/
.dmypy.json
dmypy.json
# JavaScript/Node.js
node_modules/
dist/
dist-ssr/
*.local
.env.local
.env.development.local
.env.test.local
.env.production.local
# Logs
logs/
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
pnpm-debug.log*
# Editor/IDE
.vscode/*
!.vscode/extensions.json
.idea/
*.swp
*.swo
.DS_Store
# Project specific
runpod.toml

View File

@@ -0,0 +1,48 @@
# News Chatbot
A simple AI-powered chatbot that leverages Gemini's real-time search capabilities in a voice AI application.
This example demonstrates Gemini's ability to query Google search in real time and return relevant responses, including links to the URLs that Gemini searched.
All the details about grounding with Google Search can be found [here](https://ai.google.dev/gemini-api/docs/grounding?lang=python).
## Quick Start
### First, start the bot server:
1. Navigate to the server directory:
```bash
cd server
```
2. Create and activate a virtual environment:
```bash
python3 -m venv venv
source venv/bin/activate # On Windows: venv\Scripts\activate
```
3. Install requirements:
```bash
pip install -r requirements.txt
```
4. Copy env.example to .env and configure:
- Add your API keys
5. Start the server:
```bash
python server.py
```
### Next, connect using the client app:
For client-side setup, refer to the [JavaScript Guide](client/javascript/README.md).
## Important Note
Ensure the bot server is running before using any client implementations.
## Requirements
- Python 3.10+
- Node.js 16+ (for JavaScript and React implementations)
- Daily API key
- Gemini API key (for Gemini bot)
- Cartesia API key
- Modern web browser with WebRTC support

View File

@@ -0,0 +1,27 @@
# JavaScript Implementation
Basic implementation using the [Pipecat JavaScript SDK](https://docs.pipecat.ai/client/js/introduction).
## Setup
1. Run the bot server. See the [server README](../../README).
2. Navigate to the `client/javascript` directory:
```bash
cd client/javascript
```
3. Install dependencies:
```bash
npm install
```
4. Run the client app:
```
npm run dev
```
5. Visit http://localhost:5173 in your browser.

View File

@@ -0,0 +1,40 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>AI Chatbot</title>
</head>
<body>
<div class="container">
<div class="status-bar">
<div class="status">
Status: <span id="connection-status">Disconnected</span>
</div>
<div class="controls">
<button id="connect-btn">Connect</button>
<button id="disconnect-btn" disabled>Disconnect</button>
</div>
</div>
<div class="main-content">
<div class="bot-container">
<div id="search-result-container">
</div>
<audio id="bot-audio" autoplay></audio>
</div>
</div>
<div class="debug-panel">
<h3>Debug Info</h3>
<div id="debug-log"></div>
</div>
</div>
<script type="module" src="/src/app.js"></script>
<link rel="stylesheet" href="/src/style.css">
</body>
</html>

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,21 @@
{
"name": "client",
"version": "1.0.0",
"main": "index.js",
"scripts": {
"dev": "vite",
"build": "vite build",
"preview": "vite preview"
},
"keywords": [],
"author": "",
"license": "ISC",
"description": "",
"devDependencies": {
"vite": "^6.0.2"
},
"dependencies": {
"@pipecat-ai/client-js": "^0.3.2",
"@pipecat-ai/daily-transport": "^0.3.4"
}
}

View File

@@ -0,0 +1,341 @@
/**
* Copyright (c) 20242025, Daily
*
* SPDX-License-Identifier: BSD 2-Clause License
*/
/**
* RTVI Client Implementation
*
* This client connects to an RTVI-compatible bot server using WebRTC (via Daily).
* It handles audio/video streaming and manages the connection lifecycle.
*
* Requirements:
* - A running RTVI bot server (defaults to http://localhost:7860)
* - The server must implement the /connect endpoint that returns Daily.co room credentials
* - Browser with WebRTC support
*/
import {LogLevel, RTVIClient, RTVIClientHelper, RTVIEvent} from '@pipecat-ai/client-js';
import { DailyTransport } from '@pipecat-ai/daily-transport';
class SearchResponseHelper extends RTVIClientHelper {
constructor(contentPanel) {
super()
this.contentPanel = contentPanel
}
handleMessage(rtviMessage) {
console.log("SearchResponseHelper, received message:", rtviMessage)
if (rtviMessage.data) {
// Clear existing content
this.contentPanel.innerHTML = "";
// Create a container for all content
const contentContainer = document.createElement('div');
contentContainer.className = "content-container";
// Add the search_result
if (rtviMessage.data.search_result) {
const searchResultDiv = document.createElement('div');
searchResultDiv.className = "search-result";
searchResultDiv.textContent = rtviMessage.data.search_result;
contentContainer.appendChild(searchResultDiv);
}
// Add the sources
if (rtviMessage.data.origins) {
const sourcesDiv = document.createElement('div');
sourcesDiv.className = "sources";
const sourcesTitle = document.createElement('h3');
sourcesTitle.className = "sources-title";
sourcesTitle.textContent = "Sources:";
sourcesDiv.appendChild(sourcesTitle);
rtviMessage.data.origins.forEach(origin => {
const sourceLink = document.createElement('a');
sourceLink.className = "source-link";
sourceLink.href = origin.site_uri;
sourceLink.target = "_blank";
sourceLink.textContent = origin.site_title;
sourcesDiv.appendChild(sourceLink);
});
contentContainer.appendChild(sourcesDiv);
}
// Add the rendered_content in an iframe
if (rtviMessage.data.rendered_content) {
const iframe = document.createElement('iframe');
iframe.className = "iframe-container";
iframe.srcdoc = rtviMessage.data.rendered_content;
contentContainer.appendChild(iframe);
}
// Append the content container to the content panel
this.contentPanel.appendChild(contentContainer);
}
}
getMessageTypes() {
return ["bot-llm-search-response"]
}
}
/**
* ChatbotClient handles the connection and media management for a real-time
* voice and video interaction with an AI bot.
*/
class ChatbotClient {
constructor() {
// Initialize client state
this.rtviClient = null;
this.setupDOMElements();
this.setupEventListeners();
}
/**
* Set up references to DOM elements and create necessary media elements
*/
setupDOMElements() {
// Get references to UI control elements
this.connectBtn = document.getElementById('connect-btn');
this.disconnectBtn = document.getElementById('disconnect-btn');
this.statusSpan = document.getElementById('connection-status');
this.debugLog = document.getElementById('debug-log');
this.searchResultContainer = document.getElementById('search-result-container');
// Create an audio element for bot's voice output
this.botAudio = document.createElement('audio');
this.botAudio.autoplay = true;
this.botAudio.playsInline = true;
document.body.appendChild(this.botAudio);
}
/**
* Set up event listeners for connect/disconnect buttons
*/
setupEventListeners() {
this.connectBtn.addEventListener('click', () => this.connect());
this.disconnectBtn.addEventListener('click', () => this.disconnect());
}
/**
* Add a timestamped message to the debug log
*/
log(message) {
const entry = document.createElement('div');
entry.textContent = `${new Date().toISOString()} - ${message}`;
// Add styling based on message type
if (message.startsWith('User: ')) {
entry.style.color = '#2196F3'; // blue for user
} else if (message.startsWith('Bot: ')) {
entry.style.color = '#4CAF50'; // green for bot
}
this.debugLog.appendChild(entry);
this.debugLog.scrollTop = this.debugLog.scrollHeight;
console.log(message);
}
/**
* Update the connection status display
*/
updateStatus(status) {
this.statusSpan.textContent = status;
this.log(`Status: ${status}`);
}
/**
* Check for available media tracks and set them up if present
* This is called when the bot is ready or when the transport state changes to ready
*/
setupMediaTracks() {
if (!this.rtviClient) return;
// Get current tracks from the client
const tracks = this.rtviClient.tracks();
// Set up any available bot tracks
if (tracks.bot?.audio) {
this.setupAudioTrack(tracks.bot.audio);
}
}
/**
* Set up listeners for track events (start/stop)
* This handles new tracks being added during the session
*/
setupTrackListeners() {
if (!this.rtviClient) return;
// Listen for new tracks starting
this.rtviClient.on(RTVIEvent.TrackStarted, (track, participant) => {
// Only handle non-local (bot) tracks
if (!participant?.local && track.kind === 'audio') {
this.setupAudioTrack(track);
}
});
// Listen for tracks stopping
this.rtviClient.on(RTVIEvent.TrackStopped, (track, participant) => {
this.log(
`Track stopped event: ${track.kind} from ${
participant?.name || 'unknown'
}`
);
});
}
/**
* Set up an audio track for playback
* Handles both initial setup and track updates
*/
setupAudioTrack(track) {
this.log('Setting up audio track');
// Check if we're already playing this track
if (this.botAudio.srcObject) {
const oldTrack = this.botAudio.srcObject.getAudioTracks()[0];
if (oldTrack?.id === track.id) return;
}
// Create a new MediaStream with the track and set it as the audio source
this.botAudio.srcObject = new MediaStream([track]);
}
/**
* Initialize and connect to the bot
* This sets up the RTVI client, initializes devices, and establishes the connection
*/
async connect() {
try {
// Create a new Daily transport for WebRTC communication
const transport = new DailyTransport();
// Initialize the RTVI client with our configuration
this.rtviClient = new RTVIClient({
transport,
params: {
// The baseURL and endpoint of your bot server that the client will connect to
baseUrl: 'http://localhost:7860',
endpoints: {
connect: '/connect',
},
},
enableMic: true, // Enable microphone for user input
enableCam: false,
callbacks: {
// Handle connection state changes
onConnected: () => {
this.updateStatus('Connected');
this.connectBtn.disabled = true;
this.disconnectBtn.disabled = false;
this.log('Client connected');
},
onDisconnected: () => {
this.updateStatus('Disconnected');
this.connectBtn.disabled = false;
this.disconnectBtn.disabled = true;
this.log('Client disconnected');
},
// Handle transport state changes
onTransportStateChanged: (state) => {
this.updateStatus(`Transport: ${state}`);
this.log(`Transport state changed: ${state}`);
if (state === 'ready') {
this.setupMediaTracks();
}
},
// Handle bot connection events
onBotConnected: (participant) => {
this.log(`Bot connected: ${JSON.stringify(participant)}`);
},
onBotDisconnected: (participant) => {
this.log(`Bot disconnected: ${JSON.stringify(participant)}`);
},
onBotReady: (data) => {
this.log(`Bot ready: ${JSON.stringify(data)}`);
this.setupMediaTracks();
},
// Transcript events
onUserTranscript: (data) => {
// Only log final transcripts
if (data.final) {
this.log(`User: ${data.text}`);
}
},
onBotTranscript: (data) => {
this.log(`Bot: ${data.text}`);
},
// Error handling
onMessageError: (error) => {
console.log('Message error:', error);
},
onError: (error) => {
console.log('Error:', error);
},
},
});
//this.rtviClient.setLogLevel(LogLevel.DEBUG)
this.rtviClient.registerHelper("llm", new SearchResponseHelper(this.searchResultContainer))
// Set up listeners for media track events
this.setupTrackListeners();
// Initialize audio devices
this.log('Initializing devices...');
await this.rtviClient.initDevices();
// Connect to the bot
this.log('Connecting to bot...');
await this.rtviClient.connect();
this.log('Connection complete');
} catch (error) {
// Handle any errors during connection
this.log(`Error connecting: ${error.message}`);
this.log(`Error stack: ${error.stack}`);
this.updateStatus('Error');
// Clean up if there's an error
if (this.rtviClient) {
try {
await this.rtviClient.disconnect();
} catch (disconnectError) {
this.log(`Error during disconnect: ${disconnectError.message}`);
}
}
}
}
/**
* Disconnect from the bot and clean up media resources
*/
async disconnect() {
if (this.rtviClient) {
try {
// Disconnect the RTVI client
await this.rtviClient.disconnect();
this.rtviClient = null;
// Clean up audio
if (this.botAudio.srcObject) {
this.botAudio.srcObject.getTracks().forEach((track) => track.stop());
this.botAudio.srcObject = null;
}
// Clean up video
this.searchResultContainer.innerHTML = '';
} catch (error) {
this.log(`Error disconnecting: ${error.message}`);
}
}
}
}
// Initialize the client when the page loads
window.addEventListener('DOMContentLoaded', () => {
new ChatbotClient();
});

View File

@@ -0,0 +1,134 @@
body {
margin: 0;
padding: 20px;
font-family: Arial, sans-serif;
background-color: #f0f0f0;
}
.container {
max-width: 1200px;
margin: 0 auto;
}
.status-bar {
display: flex;
justify-content: space-between;
align-items: center;
padding: 10px;
background-color: #fff;
border-radius: 8px;
margin-bottom: 20px;
}
.controls button {
padding: 8px 16px;
margin-left: 10px;
border: none;
border-radius: 4px;
cursor: pointer;
}
#connect-btn {
background-color: #4caf50;
color: white;
}
#disconnect-btn {
background-color: #f44336;
color: white;
}
button:disabled {
opacity: 0.5;
cursor: not-allowed;
}
.main-content {
background-color: #fff;
border-radius: 8px;
padding: 20px;
margin-bottom: 20px;
}
.bot-container {
display: flex;
flex-direction: column;
align-items: center;
}
#search-result-container {
background-color: #e0e0e0;
padding: 20px;
width: calc(100% - 40px);
height: 450px;
overflow: auto;
}
/* Container for all content */
.content-container {
display: flex;
flex-direction: column;
gap: 20px; /* Space between elements */
font-family: Arial, sans-serif;
}
/* Styles for the search result */
.search-result {
font-size: 16px;
line-height: 1.5;
color: #333;
}
/* Styles for the sources container */
.sources {
display: flex;
flex-direction: column;
gap: 8px; /* Space between source links */
}
.sources-title {
font-size: 16px;
font-weight: bold;
color: #444;
}
/* Styles for source links */
.source-link {
text-decoration: none;
color: #1a73e8;
}
.source-link:hover {
text-decoration: underline;
}
/* Styles for the iframe container */
.iframe-container {
flex: none;
width: 100%;
height: 400px; /* Adjust height as needed */
border: none;
}
.debug-panel {
background-color: #fff;
border-radius: 8px;
padding: 20px;
}
.debug-panel h3 {
margin: 0 0 10px 0;
font-size: 16px;
font-weight: bold;
}
#debug-log {
height: 200px;
overflow-y: auto;
background-color: #f8f8f8;
padding: 10px;
border-radius: 4px;
font-family: monospace;
font-size: 12px;
line-height: 1.4;
}

View File

@@ -0,0 +1,13 @@
import { defineConfig } from 'vite';
export default defineConfig({
server: {
proxy: {
// Proxy /api requests to the backend server
'/connect': {
target: 'http://0.0.0.0:7860', // Replace with your backend URL
changeOrigin: true,
},
},
},
});

View File

@@ -0,0 +1,52 @@
# News Chatbot Server
A FastAPI server that manages bot instances and provide endpoint for Pipecat client connections.
## Endpoints
- `POST /connect` - Pipecat client connection endpoint
## Environment Variables
Copy `env.example` to `.env` and configure:
```ini
# Required API Keys
DAILY_API_KEY= # Your Daily API key
DEEPGRAM_API_KEY= # Your Deepgram API key
GOOGLE_API_KEY= # Your Google/Gemini API key
CARTESIA_API_KEY= # Your Cartesia API key
# Optional Configuration
DAILY_API_URL= # Optional: Daily API URL (defaults to https://api.daily.co/v1)
DAILY_SAMPLE_ROOM_URL= # Optional: Fixed room URL for development
HOST= # Optional: Host address (defaults to 0.0.0.0)
FAST_API_PORT= # Optional: Port number (defaults to 7860)
```
## Running the Server
Set up and activate your virtual environment:
```bash
python3 -m venv venv
source venv/bin/activate # On Windows: venv\Scripts\activate
```
Install dependencies:
```bash
pip install -r requirements.txt
```
If you want to use the local version of `pipecat` in this repo rather than the last published version, also run:
```bash
pip install --editable "../../../[daily,deepgram,google,cartesia,openai,silero]"
```
Run the server:
```bash
python server.py
```

View File

@@ -0,0 +1,5 @@
DAILY_SAMPLE_ROOM_URL=https://yourdomain.daily.co/yourroom # (for joining the bot to the same room repeatedly for local dev)
DAILY_API_KEY=
CARTESIA_API_KEY=
DEEPGRAM_API_KEY=
GOOGLE_API_KEY=

View File

@@ -0,0 +1,166 @@
#
# Copyright (c) 2024-2025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import os
import sys
from pathlib import Path
import aiohttp
from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import Frame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
from pipecat.processors.frameworks.rtvi import RTVIConfig, RTVIProcessor
from pipecat.services.cartesia import CartesiaTTSService
from pipecat.services.deepgram import DeepgramSTTService
from pipecat.services.google import GoogleLLMService, LLMSearchResponseFrame
from pipecat.transports.services.daily import DailyParams, DailyTransport
from pipecat.utils.text.markdown_text_filter import MarkdownTextFilter
sys.path.append(str(Path(__file__).parent.parent))
from runner import configure
load_dotenv(override=True)
logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
# Function handlers for the LLM
# https://ai.google.dev/gemini-api/docs/grounding?lang=python#dynamic-retrieval
# Some queries are likely to benefit more from Grounding with Google Search than others.
# The dynamic retrieval feature gives you additional control over when to use Grounding with Google Search.
# If the dynamic retrieval mode is unspecified, Grounding with Google Search is always triggered.
# If the mode is set to dynamic, the model decides when to use grounding based on a threshold that you can configure.
# The threshold is a floating-point value in the range [0,1] and defaults to 0.3.
# If the threshold value is 0, the response is always grounded with Google Search; if it's 1, it never is.
search_tool = {
"google_search_retrieval": {
"dynamic_retrieval_config": {
"mode": "MODE_DYNAMIC",
"dynamic_threshold": 0,
} # always grounding
}
}
tools = [search_tool]
system_instruction = """
You are an expert at providing the most recent news from any place. Your responses will be converted to audio, so ensure they are formatted in plain text without special characters (e.g., *, _, -) or overly complex formatting.
Guidelines:
- Use the Google search API to retrieve the current date and provide the latest news.
- Always deliver accurate and concise responses.
- Ensure all responses are clear, using plain text only. Avoid any special characters or symbols.
Start every interaction by asking how you can assist the user.
"""
class LLMSearchLoggerProcessor(FrameProcessor):
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)
if isinstance(frame, LLMSearchResponseFrame):
print(f"LLMSearchLoggerProcessor: {frame}")
await self.push_frame(frame)
async def main():
async with aiohttp.ClientSession() as session:
(room_url, token) = await configure(session)
transport = DailyTransport(
room_url,
token,
"Latest news!",
DailyParams(
audio_out_enabled=True,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
vad_audio_passthrough=True,
),
)
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
tts = CartesiaTTSService(
api_key=os.getenv("CARTESIA_API_KEY"),
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22", # British Lady
text_filter=MarkdownTextFilter(),
)
llm = GoogleLLMService(
api_key=os.getenv("GOOGLE_API_KEY"),
system_instruction=system_instruction,
tools=tools,
)
context = OpenAILLMContext(
[
{
"role": "user",
"content": "Start by greeting the user warmly, introducing yourself, and mentioning the current day. Be friendly and engaging to set a positive tone for the interaction.",
}
],
)
context_aggregator = llm.create_context_aggregator(context)
llm_search_logger = LLMSearchLoggerProcessor()
#
# RTVI events for Pipecat client UI
#
rtvi = RTVIProcessor(config=RTVIConfig(config=[]))
pipeline = Pipeline(
[
transport.input(),
stt,
rtvi,
context_aggregator.user(),
llm,
llm_search_logger,
tts,
transport.output(),
context_aggregator.assistant(),
]
)
task = PipelineTask(
pipeline,
PipelineParams(
allow_interruptions=True,
observers=[rtvi.observer()],
),
)
@rtvi.event_handler("on_client_ready")
async def on_client_ready(rtvi):
await rtvi.set_bot_ready()
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
await task.queue_frames([context_aggregator.user().get_context_frame()])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
print(f"Participant left: {participant}")
await task.cancel()
runner = PipelineRunner()
await runner.run(task)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,4 @@
python-dotenv
fastapi[all]
uvicorn
pipecat-ai[daily,google,deepgram,cartesia,silero,openai]

View File

@@ -0,0 +1,63 @@
#
# Copyright (c) 20242025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import argparse
import os
import aiohttp
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper
async def configure(aiohttp_session: aiohttp.ClientSession):
(url, token, _) = await configure_with_args(aiohttp_session)
return (url, token)
async def configure_with_args(
aiohttp_session: aiohttp.ClientSession, parser: argparse.ArgumentParser | None = None
):
if not parser:
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
parser.add_argument(
"-u", "--url", type=str, required=False, help="URL of the Daily room to join"
)
parser.add_argument(
"-k",
"--apikey",
type=str,
required=False,
help="Daily API Key (needed to create an owner token for the room)",
)
args, unknown = parser.parse_known_args()
url = args.url or os.getenv("DAILY_SAMPLE_ROOM_URL")
key = args.apikey or os.getenv("DAILY_API_KEY")
if not url:
raise Exception(
"No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL."
)
if not key:
raise Exception(
"No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers."
)
daily_rest_helper = DailyRESTHelper(
daily_api_key=key,
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
aiohttp_session=aiohttp_session,
)
# Create a meeting token for the given room with an expiration 1 hour in
# the future.
expiry_time: float = 60 * 60
token = await daily_rest_helper.get_token(url, expiry_time)
return (url, token, args)

View File

@@ -0,0 +1,147 @@
#
# Copyright (c) 2025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import argparse
import os
import subprocess
from contextlib import asynccontextmanager
from typing import Any, Dict
import aiohttp
from dotenv import load_dotenv
from fastapi import FastAPI, HTTPException, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper, DailyRoomParams
# Load environment variables from .env file
load_dotenv(override=True)
# Dictionary to track bot processes: {pid: (process, room_url)}
bot_procs = {}
# Store Daily API helpers
daily_helpers = {}
def cleanup():
"""Cleanup function to terminate all bot processes.
Called during server shutdown.
"""
for entry in bot_procs.values():
proc = entry[0]
proc.terminate()
proc.wait()
@asynccontextmanager
async def lifespan(app: FastAPI):
"""FastAPI lifespan manager that handles startup and shutdown tasks.
- Creates aiohttp session
- Initializes Daily API helper
- Cleans up resources on shutdown
"""
aiohttp_session = aiohttp.ClientSession()
daily_helpers["rest"] = DailyRESTHelper(
daily_api_key=os.getenv("DAILY_API_KEY", ""),
daily_api_url=os.getenv("DAILY_API_URL", "https://api.daily.co/v1"),
aiohttp_session=aiohttp_session,
)
yield
await aiohttp_session.close()
cleanup()
# Initialize FastAPI app with lifespan manager
app = FastAPI(lifespan=lifespan)
# Configure CORS to allow requests from any origin
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
async def create_room_and_token() -> tuple[str, str]:
"""Helper function to create a Daily room and generate an access token.
Returns:
tuple[str, str]: A tuple containing (room_url, token)
Raises:
HTTPException: If room creation or token generation fails
"""
room = await daily_helpers["rest"].create_room(DailyRoomParams())
if not room.url:
raise HTTPException(status_code=500, detail="Failed to create room")
token = await daily_helpers["rest"].get_token(room.url)
if not token:
raise HTTPException(status_code=500, detail=f"Failed to get token for room: {room.url}")
return room.url, token
@app.post("/connect")
async def bot_connect(request: Request) -> Dict[Any, Any]:
"""Connect endpoint that creates a room and returns connection credentials.
This endpoint is called by client to establish a connection.
Returns:
Dict[Any, Any]: Authentication bundle containing room_url and token
Raises:
HTTPException: If room creation, token generation, or bot startup fails
"""
print("Creating room for RTVI connection")
room_url, token = await create_room_and_token()
print(f"Room URL: {room_url}")
# Start the bot process
try:
bot_file = "news_bot"
proc = subprocess.Popen(
[f"python3 -m {bot_file} -u {room_url} -t {token}"],
shell=True,
bufsize=1,
cwd=os.path.dirname(os.path.abspath(__file__)),
)
bot_procs[proc.pid] = (proc, room_url)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to start subprocess: {e}")
# Return the authentication bundle in format expected by DailyTransport
return {"room_url": room_url, "token": token}
if __name__ == "__main__":
import uvicorn
# Parse command line arguments for server configuration
default_host = os.getenv("HOST", "0.0.0.0")
default_port = int(os.getenv("FAST_API_PORT", "7860"))
parser = argparse.ArgumentParser(description="Daily Travel Companion FastAPI server")
parser.add_argument("--host", type=str, default=default_host, help="Host address")
parser.add_argument("--port", type=int, default=default_port, help="Port number")
parser.add_argument("--reload", action="store_true", help="Reload code on change")
config = parser.parse_args()
# Start the FastAPI server
uvicorn.run(
"server:app",
host=config.host,
port=config.port,
reload=config.reload,
)

View File

@@ -0,0 +1,149 @@
<div align="center">
 <img alt="pipecat" width="300px" height="auto" src="image.png">
</div>
# Phone Chatbot
Example project that demonstrates how to add phone funtionality to your Pipecat bots. We include examples for Daily (`bot_daily.py`) dial-in and dial-out, and Twilio (`bot_twilio.py`) dial-in, depending on who you want to use as a phone vendor.
- 🔁 Transport: Daily WebRTC
- 💬 Speech-to-Text: Deepgram via Daily transport
- 🤖 LLM: GPT4-o / OpenAI
- 🔉 Text-to-Speech: ElevenLabs
#### Should I use Daily or Twilio as a vendor?
If you're starting from scratch, using Daily to provision phone numbers alongside Daily as a transport offers some convenience (such as automatic call forwarding.)
If you already have Twilio numbers and workflows that you want to connect to your Pipecat bots, there is some additional configuration required (you'll need to create a `on_dialin_ready` and use the Twilio client to trigger the forward.)
You can read more about this, as well as see respective walkthroughs in our docs.
## Setup
1. Create and activate a virtual environment:
```shell
python3 -m venv venv
source venv/bin/activate # On Windows: venv\Scripts\activate
```
2. Install requirements:
```shell
pip install -r requirements.txt
```
3. Copy env.example to .env and configure:
```shell
cp env.example .env
```
4. Install [ngrok](https://ngrok.com/) so your local server can receive requests from Daily's servers.
## Using Daily numbers
### Running the example
To run either the dial-in or dial-out example, follow these steps to get started:
1. Run `bot_runner.py` to handle incoming HTTP requests:
```shell
python bot_runner.py --host localhost
```
2. Start ngrok running in a terminal window:
```shell
ngrok http --domain yourdomain.ngrok.app 8000
```
3. In a different terminal window, run the Daily bot file:
```shell
python bot_daily.py
```
### Dial-in
To dial-in to the bot, you will need to enable dial-in for your Daily domain. Follow [this guide](https://docs.daily.co/guides/products/dial-in-dial-out/dialin-pinless#provisioning-sip-interconnect-and-pinless-dialin-workflow) to set up your domain.
Note: For the `room_creation_api` property, point at your ngrok hostname: `"room_creation_api": "https://yourdomain.ngrok.app/daily_start_bot"`.
Once your domain is configured, receiving a phone call at a number associated with your Daily account will result in a POST to the `/daily_start_bot` endpoint, which will start a bot session.
### Dial-out
For the bot to dial out to a number, make a POST request to `/daily_start_bot` and include the dial-out phone number in the body of the request as `dialoutNumber`.
For example:
```shell
url -X "POST" "http://localhost:7860/daily_start_bot" \
-H 'Content-Type: application/json; charset=utf-8' \
-d $'{
"dialoutNumber": "+12125551234"
}'
```
### More information
For more configuration options, please consult [Daily's API documentation](https://docs.daily.co).
## Using Twilio numbers
### Running the example
Follow these steps to get started:
1. Run `bot_runner.py` to handle incoming HTTP requests:
```shell
python bot_runner.py --host localhost
```
2. Start ngrok running in a terminal window:
```shell
ngrok http --domain yourdomain.ngrok.app 8000
```
3. In a different terminal window, run the Daily bot file:
```shell
python bot_twilio.py
```
As above, but target the following URL:
`POST /twilio_start_bot`
For more configuration options, please consult Twilio's API documentation.
## Deployment example
A Dockerfile is included in this demo for convenience. Here is an example of how to build and deploy your bot to [fly.io](https://fly.io).
_Please note: This demo spawns agents as subprocesses for convenience / demonstration purposes. You would likely not want to do this in production as it would limit concurrency to available system resources. For more information on how to deploy your bots using VMs, refer to the Pipecat documentation._
### Build the docker image
`docker build -t tag:project .`
### Launch the fly project
`mv fly.example.toml fly.toml`
`fly launch` (using the included fly.toml)
### Setup your secrets on Fly
Set the necessary secrets (found in `env.example`)
`fly secrets set DAILY_API_KEY=... OPENAI_API_KEY=... ELEVENLABS_API_KEY=... ELEVENLABS_VOICE_ID=...`
If you're using Twilio as a number vendor:
`fly secrets set TWILIO_ACCOUNT_SID=... TWILIO_AUTH_TOKEN=...`
### Deploy!
`fly deploy`
## Need to do something more advanced?
This demo covers the basics of bot telephony. If you want to know more about working with PSTN / SIP, please ping us on [Discord](https://discord.gg/pipecat)!

View File

@@ -7,7 +7,6 @@ from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import EndFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -25,12 +24,11 @@ daily_api_key = os.getenv("DAILY_API_KEY", "")
daily_api_url = os.getenv("DAILY_API_URL", "https://api.daily.co/v1")
async def main(room_url: str, token: str, callId: str, callDomain: str):
# diallin_settings are only needed if Daily's SIP URI is used
async def main(room_url: str, token: str, callId: str, callDomain: str, dialout_number: str | None):
# dialin_settings are only needed if Daily's SIP URI is used
# If you are handling this via Twilio, Telnyx, set this to None
# and handle call-forwarding when on_dialin_ready fires.
diallin_settings = DailyDialinSettings(call_id=callId, call_domain=callDomain)
dialin_settings = DailyDialinSettings(call_id=callId, call_domain=callDomain)
transport = DailyTransport(
room_url,
token,
@@ -38,7 +36,7 @@ async def main(room_url: str, token: str, callId: str, callDomain: str):
DailyParams(
api_url=daily_api_url,
api_key=daily_api_key,
dialin_settings=diallin_settings,
dialin_settings=dialin_settings,
audio_in_enabled=True,
audio_out_enabled=True,
camera_out_enabled=False,
@@ -58,7 +56,7 @@ async def main(room_url: str, token: str, callId: str, callDomain: str):
messages = [
{
"role": "system",
"content": "You are Chatbot, a friendly, helpful robot. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way, but keep your responses brief. Start by saying 'Oh, hello! Who dares dial me at this hour?!'.",
"content": "You are Chatbot, a friendly, helpful robot. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way, but keep your responses brief. Start by saying 'Oh, hello! I'm a friendly chatbot. How can I help you?'.",
},
]
@@ -78,14 +76,45 @@ async def main(room_url: str, token: str, callId: str, callDomain: str):
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
await transport.capture_participant_transcription(participant["id"])
await task.queue_frames([context_aggregator.user().get_context_frame()])
if dialout_number:
logger.debug("dialout number detected; doing dialout")
# Configure some handlers for dialing out
@transport.event_handler("on_joined")
async def on_joined(transport, data):
logger.debug(f"Joined; starting dialout to: {dialout_number}")
await transport.start_dialout({"phoneNumber": dialout_number})
@transport.event_handler("on_dialout_connected")
async def on_dialout_connected(transport, data):
logger.debug(f"Dial-out connected: {data}")
@transport.event_handler("on_dialout_answered")
async def on_dialout_answered(transport, data):
logger.debug(f"Dial-out answered: {data}")
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
await transport.capture_participant_transcription(participant["id"])
# unlike the dialin case, for the dialout case, the caller will speak first. Presumably
# they will answer the phone and say "Hello?" Since we've captured their transcript,
# That will put a frame into the pipeline and prompt an LLM completion, which is how the
# bot will then greet the user.
else:
logger.debug("no dialout number; assuming dialin")
# Different handlers for dialin
@transport.event_handler("on_first_participant_joined")
async def on_first_participant_joined(transport, participant):
await transport.capture_participant_transcription(participant["id"])
# For the dialin case, we want the bot to answer the phone and greet the user. We
# can prompt the bot to speak by putting the context into the pipeline.
await task.queue_frames([context_aggregator.user().get_context_frame()])
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.queue_frame(EndFrame())
await task.cancel()
runner = PipelineRunner()
@@ -98,6 +127,7 @@ if __name__ == "__main__":
parser.add_argument("-t", type=str, help="Token")
parser.add_argument("-i", type=str, help="Call ID")
parser.add_argument("-d", type=str, help="Call Domain")
parser.add_argument("-o", type=str, help="Dialout number", default=None)
config = parser.parse_args()
asyncio.run(main(config.u, config.t, config.i, config.d))
asyncio.run(main(config.u, config.t, config.i, config.d, config.o))

View File

@@ -73,24 +73,27 @@ action using the Twilio Client library.
"""
async def _create_daily_room(room_url, callId, callDomain=None, vendor="daily"):
async def _create_daily_room(room_url, callId, callDomain=None, dialoutNumber=None, vendor="daily"):
if not room_url:
params = DailyRoomParams(
properties=DailyRoomProperties(
# Note: these are the default values, except for the display name
sip=DailyRoomSipParams(
display_name="dialin-user", video=False, sip_mode="dial-in", num_endpoints=1
)
# Create base properties with SIP settings
properties = DailyRoomProperties(
sip=DailyRoomSipParams(
display_name="dialin-user", video=False, sip_mode="dial-in", num_endpoints=1
)
)
# Only enable dialout if dialoutNumber is provided
if dialoutNumber:
properties.enable_dialout = True
params = DailyRoomParams(properties=properties)
print(f"Creating new room...")
room: DailyRoomObject = await daily_helpers["rest"].create_room(params=params)
else:
# Check passed room URL exist (we assume that it already has a sip set up!)
try:
print(f"Joining existing room: {room_url}")
room: DailyRoomObject = await daily_helpers["rest"].get_room_from_url(room_url)
except Exception:
raise HTTPException(status_code=500, detail=f"Room not found: {room_url}")
@@ -107,6 +110,8 @@ async def _create_daily_room(room_url, callId, callDomain=None, vendor="daily"):
# Note: this is mostly for demonstration purposes (refer to 'deployment' in docs)
if vendor == "daily":
bot_proc = f"python3 -m bot_daily -u {room.url} -t {token} -i {callId} -d {callDomain}"
if dialoutNumber:
bot_proc += f" -o {dialoutNumber}"
else:
bot_proc = f"python3 -m bot_twilio -u {room.url} -t {token} -i {callId} -s {room.config.sip_endpoint}"
@@ -179,11 +184,15 @@ async def daily_start_bot(request: Request) -> JSONResponse:
return JSONResponse({"test": True})
callId = data.get("callId", None)
callDomain = data.get("callDomain", None)
dialoutNumber = data.get("dialoutNumber", None)
except Exception:
raise HTTPException(status_code=500, detail="Missing properties 'callId' or 'callDomain'")
raise HTTPException(
status_code=500, detail="Missing properties 'callId', 'callDomain', or 'dialoutNumber'"
)
print(f"CallId: {callId}, CallDomain: {callDomain}")
room: DailyRoomObject = await _create_daily_room(room_url, callId, callDomain, "daily")
room: DailyRoomObject = await _create_daily_room(
room_url, callId, callDomain, dialoutNumber, "daily"
)
# Grab a token for the user to join with
return JSONResponse({"room_url": room.url, "sipUri": room.config.sip_endpoint})

View File

@@ -8,7 +8,6 @@ from loguru import logger
from twilio.rest import Client
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import EndFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
@@ -87,7 +86,7 @@ async def main(room_url: str, token: str, callId: str, sipUri: str):
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
await task.queue_frame(EndFrame())
await task.cancel()
@transport.event_handler("on_dialin_ready")
async def on_dialin_ready(transport, cdata):

View File

Before

Width:  |  Height:  |  Size: 19 KiB

After

Width:  |  Height:  |  Size: 19 KiB

View File

@@ -81,6 +81,7 @@ class CallContainerModel: ObservableObject {
@MainActor
func disconnect() {
self.rtviClientIOS?.disconnect(completion: nil)
self.rtviClientIOS?.release()
}
func showError(message: String) {
@@ -135,7 +136,7 @@ class CallContainerModel: ObservableObject {
}
}
extension CallContainerModel:RTVIClientDelegate, LLMHelperDelegate {
extension CallContainerModel:RTVIClientDelegate {
private func handleEvent(eventName: String, eventValue: Any? = nil) {
if let value = eventValue {

View File

@@ -13,7 +13,7 @@ struct SettingsView: View {
var body: some View {
NavigationView {
Form {
Section(header: Text("Audio Settings")) {
Section {
List(model.availableMics, id: \.self.id.id) { mic in
Button(action: {
model.selectMic(mic.id)
@@ -27,6 +27,11 @@ struct SettingsView: View {
}
}
}
} header: {
VStack(alignment: .leading) {
Text("Audio Settings")
Text("(No selection = system default)")
}
}
Section(header: Text("Start options")) {
Toggle("Enable Microphone", isOn: $isMicEnabled)

View File

@@ -59,6 +59,12 @@ Install dependencies:
pip install -r requirements.txt
```
If you want to use the local version of `pipecat` in this repo rather than the last published version, also run:
```bash
pip install --editable "../../../[daily,elevenlabs,openai,silero,google]"
```
Run the server:
```bash

View File

@@ -31,7 +31,6 @@ from pipecat.audio.vad.vad_analyzer import VADParams
from pipecat.frames.frames import (
BotStartedSpeakingFrame,
BotStoppedSpeakingFrame,
EndFrame,
Frame,
OutputImageRawFrame,
SpriteFrame,
@@ -41,17 +40,8 @@ from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
from pipecat.processors.frameworks.rtvi import (
RTVIBotTranscriptionProcessor,
RTVIConfig,
RTVIMetricsProcessor,
RTVIProcessor,
RTVISpeakingProcessor,
RTVIUserTranscriptionProcessor,
)
from pipecat.services.elevenlabs import ElevenLabsTTSService
from pipecat.processors.frameworks.rtvi import RTVIConfig, RTVIProcessor
from pipecat.services.gemini_multimodal_live.gemini import GeminiMultimodalLiveLLMService
from pipecat.services.openai import OpenAILLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport
load_dotenv(override=True)
@@ -168,20 +158,6 @@ async def main():
#
# RTVI events for Pipecat client UI
#
# This will send `user-*-speaking` and `bot-*-speaking` messages.
rtvi_speaking = RTVISpeakingProcessor()
# This will emit UserTranscript events.
rtvi_user_transcription = RTVIUserTranscriptionProcessor()
# This will emit BotTranscript events.
rtvi_bot_transcription = RTVIBotTranscriptionProcessor()
# This will send `metrics` messages.
rtvi_metrics = RTVIMetricsProcessor()
# Handles RTVI messages from the client
rtvi = RTVIProcessor(config=RTVIConfig(config=[]))
pipeline = Pipeline(
@@ -190,11 +166,7 @@ async def main():
rtvi,
context_aggregator.user(),
llm,
rtvi_speaking,
rtvi_user_transcription,
rtvi_bot_transcription,
ta,
rtvi_metrics,
transport.output(),
context_aggregator.assistant(),
]
@@ -206,6 +178,7 @@ async def main():
allow_interruptions=True,
enable_metrics=True,
enable_usage_metrics=True,
observers=[rtvi.observer()],
),
)
await task.queue_frame(quiet_frame)
@@ -222,7 +195,7 @@ async def main():
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
print(f"Participant left: {participant}")
await task.queue_frame(EndFrame())
await task.cancel()
runner = PipelineRunner()

View File

@@ -31,7 +31,6 @@ from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import (
BotStartedSpeakingFrame,
BotStoppedSpeakingFrame,
EndFrame,
Frame,
OutputImageRawFrame,
SpriteFrame,
@@ -41,14 +40,7 @@ from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
from pipecat.processors.frameworks.rtvi import (
RTVIBotTranscriptionProcessor,
RTVIConfig,
RTVIMetricsProcessor,
RTVIProcessor,
RTVISpeakingProcessor,
RTVIUserTranscriptionProcessor,
)
from pipecat.processors.frameworks.rtvi import RTVIConfig, RTVIProcessor
from pipecat.services.elevenlabs import ElevenLabsTTSService
from pipecat.services.openai import OpenAILLMService
from pipecat.transports.services.daily import DailyParams, DailyTransport
@@ -189,34 +181,16 @@ async def main():
#
# RTVI events for Pipecat client UI
#
# This will send `user-*-speaking` and `bot-*-speaking` messages.
rtvi_speaking = RTVISpeakingProcessor()
# This will emit UserTranscript events.
rtvi_user_transcription = RTVIUserTranscriptionProcessor()
# This will emit BotTranscript events.
rtvi_bot_transcription = RTVIBotTranscriptionProcessor()
# This will send `metrics` messages.
rtvi_metrics = RTVIMetricsProcessor()
# Handles RTVI messages from the client
rtvi = RTVIProcessor(config=RTVIConfig(config=[]))
pipeline = Pipeline(
[
transport.input(),
rtvi,
rtvi_speaking,
rtvi_user_transcription,
context_aggregator.user(),
llm,
rtvi_bot_transcription,
tts,
ta,
rtvi_metrics,
transport.output(),
context_aggregator.assistant(),
]
@@ -228,6 +202,7 @@ async def main():
allow_interruptions=True,
enable_metrics=True,
enable_usage_metrics=True,
observers=[rtvi.observer()],
),
)
await task.queue_frame(quiet_frame)
@@ -244,7 +219,7 @@ async def main():
@transport.event_handler("on_participant_left")
async def on_participant_left(transport, participant, reason):
print(f"Participant left: {participant}")
await task.queue_frame(EndFrame())
await task.cancel()
runner = PipelineRunner()

View File

@@ -66,7 +66,7 @@ The build UI files can be found in `frontend/out`
Start the API / bot manager:
`python src/bot_runner.py`
`python src/bot_runner.py --host localhost`
If you'd like to run a custom domain or port:

View File

@@ -4,6 +4,7 @@ ELEVENLABS_API_KEY=
ELEVENLABS_VOICE_ID=
FAL_KEY=
OPENAI_API_KEY=
GOOGLE_API_KEY=
ENV= # dev | production
RUN_AS_VM= # Set this if you want to run bots on process (not launch a new VM)

View File

@@ -1,4 +1,4 @@
import React, { useState } from "react";
import React, { useState, useEffect } from "react";
import {
useDaily,
useParticipantIds,
@@ -33,7 +33,9 @@ const Story: React.FC<StoryProps> = ({ handleLeave }) => {
setTimeout(() => daily.setLocalAudio(true), 500);
setStoryState("user");
} else {
daily.setLocalAudio(false);
// Uncomment the next line to mute the mic while the
// assistant it talking. Leave it commented to allow for interruptions
// daily.setLocalAudio(false);
setStoryState("assistant");
}
},
@@ -58,7 +60,7 @@ const Story: React.FC<StoryProps> = ({ handleLeave }) => {
{participantIds.length >= 1 ? (
<VideoTile
sessionId={participantIds[0]}
inactive={storyState === "user"}
inactive={false}
/>
) : (
<span className="p-3 rounded-full bg-gray-900/60 animate-pulse">
@@ -71,7 +73,7 @@ const Story: React.FC<StoryProps> = ({ handleLeave }) => {
)}
<DailyAudio />
</div>
<UserInputIndicator active={storyState === "user"} />
<UserInputIndicator active={true} />
</div>
);
};

View File

@@ -43,25 +43,8 @@
transition: opacity 0.5s ease;
}
@keyframes pulse {
0% {
outline-width: 6px;
@apply outline-teal-500/10;
}
50% {
outline-width: 24px;
@apply outline-teal-500/50;
}
100% {
outline-width: 6px;
@apply outline-teal-500/10;
}
}
.micIconActive{
@apply bg-teal-950 border-teal-500 outline-teal-500/20;
animation: pulse 2s infinite ease-in-out;
}
.micIconActive svg{

View File

@@ -1,4 +1,4 @@
import React, { useState, useEffect } from "react";
import React, { useState, useEffect, useRef } from "react";
import { useAppMessage } from "@daily-co/daily-react";
import { DailyEventObjectAppMessage } from "@daily-co/daily-js";
@@ -13,12 +13,31 @@ interface Props {
export default function UserInputIndicator({ active }: Props) {
const [transcription, setTranscription] = useState<string[]>([]);
const timeoutRef = useRef<NodeJS.Timeout>();
const resetTimeout = () => {
if (timeoutRef.current) {
clearTimeout(timeoutRef.current);
}
timeoutRef.current = setTimeout(() => {
setTranscription([]);
}, 5000);
};
useEffect(() => {
return () => {
if (timeoutRef.current) {
clearTimeout(timeoutRef.current);
}
};
}, []);
useAppMessage({
onAppMessage: (e: DailyEventObjectAppMessage<any>) => {
if (e.fromId && e.fromId === "transcription") {
if (e.data.user_id === "" && e.data.is_final) {
setTranscription((t) => [...t, ...e.data.text.split(" ")]);
resetTimeout();
}
}
},

View File

@@ -2,4 +2,4 @@ async_timeout
fastapi
uvicorn
python-dotenv
pipecat-ai[daily,elevenlabs,openai,fal]
pipecat-ai[daily,openai,fal,google,cartesia]

Some files were not shown because too many files have changed in this diff Show More