Compare commits
137 Commits
mb/pyproje
...
hush/inter
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fdf0652141 | ||
|
|
237c400f2d | ||
|
|
b6afce2a92 | ||
|
|
d7f31e0cbd | ||
|
|
c662a2d820 | ||
|
|
89f0ff17c0 | ||
|
|
b5465364fa | ||
|
|
c024eb7b8c | ||
|
|
608570e89d | ||
|
|
3ad61a8a04 | ||
|
|
4c4bae2db6 | ||
|
|
901b6b5913 | ||
|
|
71cd0f1c87 | ||
|
|
a2a419e6db | ||
|
|
bbbbdc459a | ||
|
|
d203528dad | ||
|
|
4bcca7956e | ||
|
|
68a4cf4c68 | ||
|
|
0508ddddfb | ||
|
|
8714c9137f | ||
|
|
4c029fcfa7 | ||
|
|
5c86f8e687 | ||
|
|
54a4d8a9f8 | ||
|
|
38af514d95 | ||
|
|
6aa80c0b8e | ||
|
|
e720573e60 | ||
|
|
541a43905b | ||
|
|
707df913cd | ||
|
|
3f3d757581 | ||
|
|
7c781ce816 | ||
|
|
f3efc9da00 | ||
|
|
827a70104d | ||
|
|
a40327305c | ||
|
|
168af44429 | ||
|
|
5f8433476c | ||
|
|
6a6fea74f5 | ||
|
|
91b557ecbf | ||
|
|
be85291414 | ||
|
|
09f171b69d | ||
|
|
929fd98958 | ||
|
|
1cfbfcaf11 | ||
|
|
cd5a3c13bd | ||
|
|
9b871b0cc5 | ||
|
|
0d499a8aa3 | ||
|
|
45292ab13d | ||
|
|
be6ea0dbf6 | ||
|
|
fb18ae174e | ||
|
|
c4506523ab | ||
|
|
b360cb31dc | ||
|
|
07f104199c | ||
|
|
bc1949b4bf | ||
|
|
2035dd8b39 | ||
|
|
24c8189327 | ||
|
|
998ac32627 | ||
|
|
50645c1c4f | ||
|
|
8ce29ee8f2 | ||
|
|
7b8aeef4cc | ||
|
|
6a24457f0e | ||
|
|
2c01c2b5b3 | ||
|
|
1c2e114fa2 | ||
|
|
0f137e36c2 | ||
|
|
b7f12a96f1 | ||
|
|
3331f71e17 | ||
|
|
55d200e2d1 | ||
|
|
3fae00e067 | ||
|
|
78cdefd191 | ||
|
|
42502a4f3b | ||
|
|
fc67cc3302 | ||
|
|
241ab19228 | ||
|
|
c08e8ec8fb | ||
|
|
eb9bc9644e | ||
|
|
3a306dae90 | ||
|
|
e503ea7466 | ||
|
|
c42cc8254f | ||
|
|
a8e21f7d5d | ||
|
|
c6ef8de578 | ||
|
|
fc571fba42 | ||
|
|
0502ee2b5a | ||
|
|
9ec047094b | ||
|
|
d991c106c8 | ||
|
|
312fb23c89 | ||
|
|
4d7f21d44e | ||
|
|
ec25d0a7c9 | ||
|
|
2b8218deaa | ||
|
|
11119430cd | ||
|
|
9ca79232c1 | ||
|
|
9ea06c33f7 | ||
|
|
30a1dd202e | ||
|
|
809ab0b7b6 | ||
|
|
2b5db9c562 | ||
|
|
b4a886b59f | ||
|
|
07eb00722b | ||
|
|
96652b8fba | ||
|
|
df1fcf0c68 | ||
|
|
711f740d9e | ||
|
|
a0bda98c20 | ||
|
|
1c1bae35ab | ||
|
|
56c52c2cf2 | ||
|
|
740aee1a1a | ||
|
|
f0391c3280 | ||
|
|
64e48e4660 | ||
|
|
b8147bdbbd | ||
|
|
315e45d41b | ||
|
|
c057139c48 | ||
|
|
c61e07132d | ||
|
|
a5f5e418a8 | ||
|
|
31acfaa091 | ||
|
|
69541c8835 | ||
|
|
af94620839 | ||
|
|
cec8a74293 | ||
|
|
228a55ac1e | ||
|
|
ab9831daf0 | ||
|
|
e8c3f5dea6 | ||
|
|
4288b5e780 | ||
|
|
23343dd7e7 | ||
|
|
88de5dd415 | ||
|
|
33f87589d1 | ||
|
|
7ed14ad91f | ||
|
|
86c6141580 | ||
|
|
c97643c797 | ||
|
|
434d346079 | ||
|
|
64ae8d2394 | ||
|
|
786f24c9db | ||
|
|
38951aab56 | ||
|
|
ed8b0655a8 | ||
|
|
0b2b9f5f1b | ||
|
|
ad1841b739 | ||
|
|
b0c002c128 | ||
|
|
820176084c | ||
|
|
5b7e31beff | ||
|
|
41a22d3bf4 | ||
|
|
84fecabac5 | ||
|
|
bbe01d10ef | ||
|
|
4364990fd0 | ||
|
|
e576fa481f | ||
|
|
0594a203fc | ||
|
|
248206e234 |
61
.github/workflows/python-compatibility.yaml
vendored
Normal file
61
.github/workflows/python-compatibility.yaml
vendored
Normal file
@@ -0,0 +1,61 @@
|
||||
name: Python Compatibility Test
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main, develop]
|
||||
paths: ['pyproject.toml']
|
||||
pull_request:
|
||||
branches: [main, develop]
|
||||
paths: ['pyproject.toml']
|
||||
|
||||
jobs:
|
||||
test-compatibility:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ['3.10.18', '3.11.13', '3.12.11', '3.13.5']
|
||||
|
||||
name: Python ${{ matrix.python-version }}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install system dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y \
|
||||
portaudio19-dev \
|
||||
libcairo2-dev \
|
||||
libgirepository1.0-dev \
|
||||
pkg-config
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v4
|
||||
with:
|
||||
version: 'latest'
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
run: |
|
||||
uv python install ${{ matrix.python-version }}
|
||||
uv python pin ${{ matrix.python-version }}
|
||||
|
||||
- name: Test uv sync with all extras (Python < 3.13)
|
||||
if: "!startsWith(matrix.python-version, '3.13.')"
|
||||
run: |
|
||||
uv sync --group dev --all-extras --no-extra krisp
|
||||
|
||||
- name: Test uv sync without PyTorch extras (Python 3.13+)
|
||||
if: startsWith(matrix.python-version, '3.13.')
|
||||
run: |
|
||||
uv sync --group dev --all-extras \
|
||||
--no-extra krisp \
|
||||
--no-extra ultravox \
|
||||
--no-extra local-smart-turn \
|
||||
--no-extra moondream \
|
||||
--no-extra mlx-whisper
|
||||
|
||||
- name: Verify installation
|
||||
run: |
|
||||
uv run python --version
|
||||
uv run python -c "import pipecat; print('✅ Pipecat imports successfully')"
|
||||
42
.github/workflows/update-lockfile.yaml
vendored
42
.github/workflows/update-lockfile.yaml
vendored
@@ -1,42 +0,0 @@
|
||||
name: Update lockfile
|
||||
|
||||
on:
|
||||
push:
|
||||
paths:
|
||||
- 'pyproject.toml'
|
||||
branches:
|
||||
- main
|
||||
workflow_dispatch: # Allows manual triggering from GitHub UI
|
||||
|
||||
jobs:
|
||||
update-lockfile:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
# This gives the workflow permission to push back to the repo
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v1
|
||||
|
||||
- name: Update lockfile
|
||||
run: uv lock
|
||||
|
||||
- name: Check for changes
|
||||
id: verify-changed-files
|
||||
run: |
|
||||
if [ -n "$(git status --porcelain)" ]; then
|
||||
echo "changed=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "changed=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Commit lockfile
|
||||
if: steps.verify-changed-files.outputs.changed == 'true'
|
||||
run: |
|
||||
git config --local user.email "action@github.com"
|
||||
git config --local user.name "GitHub Action"
|
||||
git add uv.lock
|
||||
git commit -m "chore: update uv.lock after dependency changes"
|
||||
git push
|
||||
174
CHANGELOG.md
174
CHANGELOG.md
@@ -5,13 +5,147 @@ All notable changes to **Pipecat** will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [Unreleased]
|
||||
## [0.0.80] - 2025-08-13
|
||||
|
||||
### Added
|
||||
|
||||
- Added `GeminiTTSService` which uses Google Gemini to generate TTS output. The
|
||||
Gemini model can be prompted to insert styled speech to control the TTS
|
||||
output.
|
||||
|
||||
- For `OpenAILLMService` and its subclasses, added the ability to retry
|
||||
executing a chat completion after a timeout period. The new args are
|
||||
`retry_timeout_secs` and `retry_on_timeout`. This feature is disabled by
|
||||
default.
|
||||
|
||||
- Added Exotel support to Pipecat's development runner. You can now connect
|
||||
using the runner with `uv run bot.py -t exotel` and an ngrok connection to
|
||||
HTTP port 7860.
|
||||
|
||||
- Added `enable_direct_mode` argument to `FrameProcessor`. The direct mode is
|
||||
for processors which require very little I/O or compute resources, that is
|
||||
processors that can perform their task almost immediately. These type of
|
||||
processors don't need any of the internal tasks and queues usually created by
|
||||
frame processors which means overall application performance might be slightly
|
||||
increased. Use with care.
|
||||
|
||||
- Added TTFB metrics for `HeyGenVideoService` and `TavusVideoService`.
|
||||
|
||||
- Added `endpoint_id` parameter to `AzureSTTService`. ([Custom EndpointId](https://docs.azure.cn/en-us/ai-services/speech-service/how-to-recognize-speech?pivots=programming-language-python#use-a-custom-endpoint))
|
||||
|
||||
### Changed
|
||||
|
||||
- `WatchdogPriorityQueue` now requires the items to be inserted to always be
|
||||
tuples and the size of the tuple needs to be specified in the constructor when
|
||||
creating the queue with the `tuple_size` argument.
|
||||
|
||||
- Updated Moondream to revision `2025-01-09`.
|
||||
|
||||
- Updated `PlayHTHttpTTSService` to no longer use the `pyht` client to remove
|
||||
compatibility issues with other packages. Now you can use the PlayHT HTTP
|
||||
service with other services, like GoogleLLMService.
|
||||
|
||||
- Updated `pyproject.toml` to once again pin `numba` to `>=0.61.2` in order to
|
||||
resolve package versioning issues.
|
||||
|
||||
- Updated the `STTMuteFilter` to include `VADUserStartedSpeakingFrame` and
|
||||
`VADUserStoppedSpeakingFrame` in the list of frames to filter when the
|
||||
filtering is on.
|
||||
|
||||
### Performance
|
||||
|
||||
- Improving the latency of the `HeyGenVideoService`.
|
||||
|
||||
- Improved some frame processors performance by using the new frame processor
|
||||
direct mode. In direct mode a frame processor will process frames right away
|
||||
avoiding the need for internal queues and tasks. This is useful for some
|
||||
simple processors. For example, in processors that wrap other processors
|
||||
(e.g. `Pipeline`, `ParallelPipeline`), we add one processor before and one
|
||||
after the wrapped processors (internally, you will see them as sources and
|
||||
sinks). These sources and sinks don't do any special processing and they
|
||||
basically forward frames. So, for these simple processors we now enable the
|
||||
new direct mode which avoids creating any internal tasks (and queues) and
|
||||
therefore improves performance.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue with the `BaseWhisperSTTService` where the language was
|
||||
specified as an enum and not a string.
|
||||
|
||||
- Fixed an issue where `SmallWebRTCTransport` ended before TTS finished.
|
||||
|
||||
- Fixed an issue in `OpenAIRealtimeBetaLLMService` where specifying a `text`
|
||||
`modalities` didn't result in text being outputted from the model.
|
||||
|
||||
- Added SSML reserved character escaping to `AzureBaseTTSService` to properly
|
||||
handle special characters in text sent to Azure TTS. This fixes an issue
|
||||
where characters like `&`, `<`, `>`, `"`, and `'` in LLM-generated text would
|
||||
cause TTS failures.
|
||||
|
||||
- Fixed a `WatchdogPriorityQueue` issue that could cause an exception when
|
||||
compating watchdog cancel sentinel items with other items in the queue.
|
||||
|
||||
- Fixed an issue that would cause system frames to not be processed with higher
|
||||
priority than other frames. This could cause slower interruption times.
|
||||
|
||||
- Fixed an issue where retrying a websocket connection error would result in an
|
||||
error.
|
||||
|
||||
### Other
|
||||
|
||||
- Add foundation example `19b-openai-realtime-beta-text.py`, showing how to use
|
||||
`OpenAIRealtimeBetaLLMService` to output text to a TTS service.
|
||||
|
||||
- Add vision support to release evals so we can run the foundational examples 12
|
||||
series.
|
||||
|
||||
- Added foundational example `15a-switch-languages.py` to release evals. It is
|
||||
able to detect if we switched the language properly.
|
||||
|
||||
- Updated foundational examples to show how to enclose complex logic
|
||||
(e.g. `ParallelPipeline`) into a single processor so the main pipeline becomes
|
||||
simpler.
|
||||
|
||||
- Added `07n-interruptible-gemini.py`, demonstrating how to use
|
||||
`GeminiTTSService`.
|
||||
|
||||
## [0.0.79] - 2025-08-07
|
||||
|
||||
### Changed
|
||||
|
||||
- Changed `pipecat-ai`'s `openai` dependency to `>=1.74.0,<=1.99.1` due to a
|
||||
breaking change in `openai` 1.99.2 ([commit](https://github.com/openai/openai-python/commit/657f551dbe583ffb259d987dafae12c6211fba06))
|
||||
|
||||
### Deprecated
|
||||
|
||||
- `TTSService.say()` is deprecated, push a `TTSSpeakFrame` instead. Calling
|
||||
functions directly is a discouraged pattern in Pipecat because, for example,
|
||||
it might cause issues with frame ordering.
|
||||
|
||||
- `LLMMessagesFrame` is deprecated, in favor of either:
|
||||
|
||||
- `LLMMessagesUpdateFrame` with `run_llm=True`
|
||||
- `OpenAILLMContextFrame` with desired messages in a new context
|
||||
|
||||
- `LLMUserResponseAggregator` and `LLMAssistantResponseAggregator` are
|
||||
deprecated, as they depended on the now-deprecated `LLMMessagesFrame`. Use
|
||||
`LLMUserContextAggregator` and `LLMAssistantResponseAggregator` (or
|
||||
LLM-specific subclasses thereof) instead.
|
||||
|
||||
## [0.0.78] - 2025-08-07
|
||||
|
||||
### Added
|
||||
|
||||
- Added `enable_emulated_vad_interruptions` to `LLMUserAggregatorParams`.
|
||||
When user speech is emulated (e.g. when a transcription is received but
|
||||
VAD doesn't detect speech), this parameter controls whether the emulated
|
||||
speech can interrupt the bot. Default is False (emulated speech is ignored
|
||||
while the bot is speaking).
|
||||
|
||||
- Added new `handle_sigint` and `handle_sigterm` to `RunnerArguments`. This
|
||||
allows applications to know what settings they should use for the environment
|
||||
they are running on.
|
||||
they are running on. Also, added `pipeline_idle_timeout_secs` to be able to
|
||||
control the `PipelineTask` idle timeout.
|
||||
|
||||
- Added `processor` field to `ErrorFrame` to indicate `FrameProcessor` that
|
||||
generated the error.
|
||||
@@ -46,6 +180,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
- Added Chinese, Japanese, Korean word timestamp support to
|
||||
`CartesiaTTSService`.
|
||||
|
||||
- Added `region` parameter to `GladiaSTTService`. Accepted values: eu-west
|
||||
(default), us-west.
|
||||
|
||||
### Changed
|
||||
|
||||
- System frames are now queued. Before, system frames could be generated from
|
||||
@@ -77,8 +214,30 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
- The development runner now strips any provided protocol (e.g. https://) from
|
||||
the proxy address and issues a warning. It also strips trailing `/`.
|
||||
|
||||
### Deprecated
|
||||
|
||||
- In the `pipecat.runner.daily`, the `configure_with_args()` function is
|
||||
deprecated. Use the `configure()` function instead.
|
||||
|
||||
- The development runner's `/connect` endpoint is deprecated and will be
|
||||
removed in a future version. Use the `/start` endpoint in its place. In the
|
||||
meantime, both endpoints work and deliver equivalent functionality.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed a `DailyTransport` issue that would result in an unhandled
|
||||
`concurrent.futures.CancelledError` when a future is cancelled.
|
||||
|
||||
- Fixed a `RivaSTTService` issue that would result in an unhandled
|
||||
`concurrent.futures.CancelledError` when a future is cancelled when reading
|
||||
from the audio chunks from the incoming audio stream.
|
||||
|
||||
- Fixed an issue in the `BaseOutputTransport`, mainly reproducible with
|
||||
`FastAPIWebsocketOutputTransport` when the audio mixer was enabled, where the
|
||||
loop could consume 100% CPU by continuously returning without delay, preventing
|
||||
other asyncio tasks (such as cancellation or shutdown signals) from being
|
||||
processed.
|
||||
|
||||
- Fixed an issue where `BotStartedSpeakingFrame` and `BotStoppedSpeakingFrame`
|
||||
were not emitted when using `TavusVideoService` or `HeyGenVideoService`.
|
||||
|
||||
@@ -98,14 +257,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
- Fixed an issue in `TaskObserver` (a proxy to all observers) that was degrading
|
||||
global performance.
|
||||
|
||||
### Deprecated
|
||||
### Other
|
||||
|
||||
- In the `pipecat.runner.daily`, the `configure_with_args()` function is
|
||||
deprecated. Use the `configure()` function instead.
|
||||
|
||||
- The development runner's `/connect` endpoint is deprecated and will be
|
||||
removed in a future version. Use the `/start` endpoint in its place. In the
|
||||
meantime, both endpoints work and deliver equivalent functionality.
|
||||
- Added `07aa-interruptible-soniox.py`, `07ab-interruptible-inworld-http.py`,
|
||||
`07ac-interruptible-asyncai.py` and `07ac-interruptible-asyncai-http.py`
|
||||
release evals.
|
||||
|
||||
## [0.0.77] - 2025-07-31
|
||||
|
||||
|
||||
@@ -31,6 +31,23 @@ git push origin your-branch-name
|
||||
|
||||
Our maintainers will review your PR, and once everything is good, your contributions will be merged!
|
||||
|
||||
## Dependency Management
|
||||
|
||||
This project uses [uv](https://docs.astral.sh/uv/) for dependency management. The `uv.lock` file is committed to ensure reproducible builds.
|
||||
|
||||
### Adding or Updating Dependencies
|
||||
|
||||
1. Edit `pyproject.toml` to add/update dependencies
|
||||
2. Run `uv lock` to update the lockfile with new dependency resolution
|
||||
3. Run `uv sync` to install the updated dependencies locally
|
||||
4. Always commit both files together:
|
||||
```bash
|
||||
git add pyproject.toml uv.lock
|
||||
git commit -m "feat: add new dependency for feature X"
|
||||
```
|
||||
|
||||
**Important:** Never manually edit `uv.lock`. It's auto-generated by `uv lock`.
|
||||
|
||||
## Code Style and Documentation
|
||||
|
||||
### Python Code Style
|
||||
|
||||
28
README.md
28
README.md
@@ -112,6 +112,13 @@ You can get started with Pipecat running on your local machine, then move your a
|
||||
|
||||
## 🛠️ Contributing to the framework
|
||||
|
||||
### Prerequisites
|
||||
|
||||
**Minimum Python Version:** 3.10
|
||||
**Recommended Python Version:** 3.11-3.12
|
||||
|
||||
### Setup Steps
|
||||
|
||||
1. Clone the repository and navigate to it:
|
||||
|
||||
```bash
|
||||
@@ -122,7 +129,7 @@ You can get started with Pipecat running on your local machine, then move your a
|
||||
2. Install development and testing dependencies:
|
||||
|
||||
```bash
|
||||
uv sync --group dev --all-extras --no-extra krisp
|
||||
uv sync --group dev --all-extras --no-extra gstreamer --no-extra krisp --no-extra local
|
||||
```
|
||||
|
||||
3. Install the git pre-commit hooks:
|
||||
@@ -131,6 +138,25 @@ You can get started with Pipecat running on your local machine, then move your a
|
||||
uv run pre-commit install
|
||||
```
|
||||
|
||||
### Python 3.13+ Compatibility
|
||||
|
||||
Some features require PyTorch, which doesn't yet support Python 3.13+. Install using:
|
||||
|
||||
```bash
|
||||
uv sync --group dev --all-extras \
|
||||
--no-extra gstreamer \
|
||||
--no-extra krisp \
|
||||
--no-extra local \
|
||||
--no-extra local-smart-turn \
|
||||
--no-extra mlx-whisper \
|
||||
--no-extra moondream \
|
||||
--no-extra ultravox
|
||||
```
|
||||
|
||||
> **Tip:** For full compatibility, use Python 3.12: `uv python pin 3.12`
|
||||
|
||||
> **Note**: Some extras (local, gstreamer) require system dependencies. See documentation if you encounter build errors.
|
||||
|
||||
### Running tests
|
||||
|
||||
To run all tests, from the root directory:
|
||||
|
||||
@@ -29,6 +29,9 @@ CARTESIA_API_KEY=...
|
||||
DAILY_API_KEY=...
|
||||
DAILY_SAMPLE_ROOM_URL=https://...
|
||||
|
||||
# Deepgram
|
||||
DEEPGRAM_API_KEY=...
|
||||
|
||||
# ElevenLabs
|
||||
ELEVENLABS_API_KEY=...
|
||||
ELEVENLABS_VOICE_ID=...
|
||||
@@ -44,6 +47,7 @@ FIREWORKS_API_KEY=...
|
||||
|
||||
# Gladia
|
||||
GLADIA_API_KEY=...
|
||||
GLADIA_REGION=...
|
||||
|
||||
# Google
|
||||
GOOGLE_API_KEY=...
|
||||
@@ -135,4 +139,4 @@ SAMBANOVA_API_KEY=...
|
||||
SENTRY_DSN=...
|
||||
|
||||
# Heygen
|
||||
HEYGEN_API_KEY=...
|
||||
HEYGEN_API_KEY=...
|
||||
|
||||
@@ -43,7 +43,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
base_url=os.getenv("PIPER_BASE_URL"), aiohttp_session=session, sample_rate=24000
|
||||
)
|
||||
|
||||
task = PipelineTask(Pipeline([tts, transport.output()]))
|
||||
task = PipelineTask(
|
||||
Pipeline([tts, transport.output()]),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
# Register an event handler so we can play the audio when the client joins
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -44,7 +44,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
aiohttp_session=session,
|
||||
)
|
||||
|
||||
task = PipelineTask(Pipeline([tts, transport.output()]))
|
||||
task = PipelineTask(
|
||||
Pipeline([tts, transport.output()]),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
# Register an event handler so we can play the audio when the client joins
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -41,7 +41,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
task = PipelineTask(Pipeline([tts, transport.output()]))
|
||||
task = PipelineTask(
|
||||
Pipeline([tts, transport.output()]),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
# Register an event handler so we can play the audio when the client joins
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -38,7 +38,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
|
||||
tts = FastPitchTTSService(api_key=os.getenv("NVIDIA_API_KEY"))
|
||||
|
||||
task = PipelineTask(Pipeline([tts, transport.output()]))
|
||||
task = PipelineTask(
|
||||
Pipeline([tts, transport.output()]),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
# Register an event handler so we can play the audio when the client joins
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -9,10 +9,14 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.frames.frames import EndFrame, LLMMessagesFrame
|
||||
from pipecat.frames.frames import EndFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import (
|
||||
OpenAILLMContext,
|
||||
OpenAILLMContextFrame,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
@@ -51,12 +55,15 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
}
|
||||
]
|
||||
|
||||
task = PipelineTask(Pipeline([llm, tts, transport.output()]))
|
||||
task = PipelineTask(
|
||||
Pipeline([llm, tts, transport.output()]),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
# Register an event handler so we can play the audio when the client joins
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
await task.queue_frames([LLMMessagesFrame(messages), EndFrame()])
|
||||
await task.queue_frames([OpenAILLMContextFrame(OpenAILLMContext(messages)), EndFrame()])
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
|
||||
@@ -51,7 +51,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
key=os.getenv("FAL_KEY"),
|
||||
)
|
||||
|
||||
task = PipelineTask(Pipeline([imagegen, transport.output()]))
|
||||
task = PipelineTask(
|
||||
Pipeline([imagegen, transport.output()]),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
# Register an event handler so we can play the audio when the client joins
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -52,6 +52,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
# Register an event handler so we can play the audio when the client joins
|
||||
|
||||
@@ -110,7 +110,7 @@ async def run_example(webrtc_connection: SmallWebRTCConnection):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
runner = PipelineRunner(handle_sigint=False)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
@@ -15,13 +15,16 @@ from pipecat.frames.frames import (
|
||||
DataFrame,
|
||||
Frame,
|
||||
LLMFullResponseStartFrame,
|
||||
LLMMessagesFrame,
|
||||
TextFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.sync_parallel_pipeline import SyncParallelPipeline
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import (
|
||||
OpenAILLMContext,
|
||||
OpenAILLMContextFrame,
|
||||
)
|
||||
from pipecat.processors.aggregators.sentence import SentenceAggregator
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
@@ -153,9 +156,12 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
}
|
||||
]
|
||||
frames.append(MonthFrame(month=month))
|
||||
frames.append(LLMMessagesFrame(messages))
|
||||
frames.append(OpenAILLMContextFrame(OpenAILLMContext(messages)))
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
# Set up transport event handlers
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -15,7 +15,6 @@ from loguru import logger
|
||||
|
||||
from pipecat.frames.frames import (
|
||||
Frame,
|
||||
LLMMessagesFrame,
|
||||
OutputAudioRawFrame,
|
||||
TextFrame,
|
||||
TTSAudioRawFrame,
|
||||
@@ -25,6 +24,10 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.sync_parallel_pipeline import SyncParallelPipeline
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import (
|
||||
OpenAILLMContext,
|
||||
OpenAILLMContextFrame,
|
||||
)
|
||||
from pipecat.processors.aggregators.sentence import SentenceAggregator
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.cartesia.tts import CartesiaHttpTTSService
|
||||
@@ -137,7 +140,7 @@ async def main():
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
await task.queue_frame(LLMMessagesFrame(messages))
|
||||
await task.queue_frame(OpenAILLMContextFrame(OpenAILLMContext(messages)))
|
||||
await task.stop_when_done()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
@@ -119,6 +119,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -137,6 +137,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -88,6 +88,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -87,6 +87,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -146,6 +146,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -129,6 +129,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -86,6 +86,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -101,6 +101,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -93,6 +93,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -89,6 +89,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -16,13 +16,16 @@ from langchain_openai import ChatOpenAI
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMMessagesFrame
|
||||
from pipecat.frames.frames import LLMMessagesUpdateFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMAssistantResponseAggregator,
|
||||
LLMUserResponseAggregator,
|
||||
LLMAssistantContextAggregator,
|
||||
LLMUserContextAggregator,
|
||||
)
|
||||
from pipecat.processors.aggregators.openai_llm_context import (
|
||||
OpenAILLMContext,
|
||||
)
|
||||
from pipecat.processors.frameworks.langchain import LangchainProcessor
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
@@ -97,8 +100,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
)
|
||||
lc = LangchainProcessor(history_chain)
|
||||
|
||||
tma_in = LLMUserResponseAggregator()
|
||||
tma_out = LLMAssistantResponseAggregator()
|
||||
context = OpenAILLMContext()
|
||||
tma_in = LLMUserContextAggregator(context=context)
|
||||
tma_out = LLMAssistantContextAggregator(context=context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
@@ -118,17 +122,18 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
# the `LLMMessagesFrame` will be picked up by the LangchainProcessor using
|
||||
# An `OpenAILLMContextFrame` will be picked up by the LangchainProcessor using
|
||||
# only the content of the last message to inject it in the prompt defined
|
||||
# above. So no role is required here.
|
||||
messages = [({"content": "Please briefly introduce yourself to the user."})]
|
||||
await task.queue_frames([LLMMessagesFrame(messages)])
|
||||
await task.queue_frames([LLMMessagesUpdateFrame(messages, run_llm=True)])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -92,6 +92,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@stt.event_handler("on_speech_started")
|
||||
|
||||
@@ -86,6 +86,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -93,6 +93,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -5,16 +5,27 @@
|
||||
#
|
||||
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import (
|
||||
BotStartedSpeakingFrame,
|
||||
Frame,
|
||||
LLMFullResponseStartFrame,
|
||||
LLMTextFrame,
|
||||
TranscriptionFrame,
|
||||
TTSSpeakFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
@@ -49,6 +60,65 @@ transport_params = {
|
||||
}
|
||||
|
||||
|
||||
class TranscriptionLogger(FrameProcessor):
|
||||
"""Custom processor that logs transcription frames."""
|
||||
|
||||
async def process_frame(self, frame, direction):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
# Only log TranscriptionFrame objects
|
||||
if isinstance(frame, TranscriptionFrame):
|
||||
logger.info(f"[TRANSCRIPTION]: {frame.text}")
|
||||
|
||||
# Always pass the frame through to maintain pipeline flow
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
class InterventionProcessor(FrameProcessor):
|
||||
"""Custom processor that logs LLM response frames."""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self._timer_task = None
|
||||
|
||||
async def process_frame(self, frame, direction):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
# Log LLM response start frames
|
||||
if isinstance(frame, LLMFullResponseStartFrame):
|
||||
logger.info(f"[LLM_START]: Starting LLM response")
|
||||
|
||||
# Cancel any existing timer
|
||||
if self._timer_task and not self._timer_task.done():
|
||||
self._timer_task.cancel()
|
||||
|
||||
# Start a new 500ms timer
|
||||
self._timer_task = asyncio.create_task(self._log_after_delay())
|
||||
|
||||
# Cancel timer if bot started speaking before 500ms
|
||||
elif isinstance(frame, BotStartedSpeakingFrame):
|
||||
logger.info(f"[BOT_SPEAKING]: Bot started speaking, canceling intervention timer")
|
||||
if self._timer_task and not self._timer_task.done():
|
||||
self._timer_task.cancel()
|
||||
|
||||
# Log LLM text frames
|
||||
elif isinstance(frame, LLMTextFrame):
|
||||
logger.info(f"[LLM_TEXT]: {frame.text}")
|
||||
|
||||
# Always pass the frame through to maintain pipeline flow
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
async def _log_after_delay(self):
|
||||
"""Log a message after 500ms delay."""
|
||||
try:
|
||||
await asyncio.sleep(0.5) # 500ms
|
||||
logger.info(f"500ms passed since LLMFullResponseStartFrame")
|
||||
await self.queue_frame(TTSSpeakFrame("um..."))
|
||||
except asyncio.CancelledError:
|
||||
# Timer was cancelled, which is fine
|
||||
pass
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
@@ -71,13 +141,21 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
# Create transcription logger instance
|
||||
transcription_logger = TranscriptionLogger()
|
||||
|
||||
# Create LLM logger instance
|
||||
intervention = InterventionProcessor()
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt,
|
||||
transcription_logger, # Log transcription frames
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
intervention, # Log LLM response frames
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
@@ -89,6 +167,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -89,6 +89,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -91,6 +91,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -95,6 +95,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -90,6 +90,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -94,6 +94,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -92,6 +92,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -55,6 +55,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
|
||||
stt = GladiaSTTService(
|
||||
api_key=os.getenv("GLADIA_API_KEY", ""),
|
||||
region=os.getenv("GLADIA_REGION"),
|
||||
params=GladiaInputParams(
|
||||
language_config=LanguageConfig(
|
||||
languages=[Language.EN],
|
||||
@@ -97,6 +98,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -85,6 +85,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -90,6 +90,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -91,6 +91,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
163
examples/foundational/07n-interruptible-gemini.py
Normal file
163
examples/foundational/07n-interruptible-gemini.py
Normal file
@@ -0,0 +1,163 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""
|
||||
A conversational AI bot using Gemini for both LLM and TTS.
|
||||
|
||||
This example demonstrates how to use Gemini's TTS capabilities with the new
|
||||
GeminiTTSService, which uses Gemini's TTS-specific models instead of Google Cloud TTS.
|
||||
|
||||
Features showcased:
|
||||
- Gemini LLM for conversation
|
||||
- Gemini TTS with natural voice control
|
||||
- Support for different voice personalities
|
||||
- Style and tone control through natural language prompts
|
||||
|
||||
Run with:
|
||||
python examples/foundational/gemini-tts.py
|
||||
|
||||
Make sure to set your environment variables:
|
||||
export GOOGLE_API_KEY=your_api_key_here
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.google.llm import GoogleLLMService
|
||||
from pipecat.services.google.stt import GoogleSTTService
|
||||
from pipecat.services.google.tts import GeminiTTSService
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot with Gemini TTS")
|
||||
|
||||
stt = GoogleSTTService(
|
||||
params=GoogleSTTService.InputParams(languages=Language.EN_US),
|
||||
credentials=os.getenv("GOOGLE_TEST_CREDENTIALS"),
|
||||
)
|
||||
|
||||
tts = GeminiTTSService(
|
||||
api_key=os.getenv("GOOGLE_API_KEY"),
|
||||
model="gemini-2.5-flash-preview-tts", # TTS-specific model
|
||||
voice_id="Charon",
|
||||
params=GeminiTTSService.InputParams(language=Language.EN_US),
|
||||
)
|
||||
|
||||
llm = GoogleLLMService(
|
||||
api_key=os.getenv("GOOGLE_API_KEY"),
|
||||
model="gemini-2.5-flash",
|
||||
)
|
||||
|
||||
# System message that instructs the AI on how to speak
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": """You are a helpful AI assistant in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way.
|
||||
|
||||
IMPORTANT: Since you're using Gemini TTS which supports natural voice control, you can include speaking instructions in your responses. For example:
|
||||
- "Say cheerfully: Welcome to our conversation!"
|
||||
- "Read this in a calm, professional tone: Here are the details you requested."
|
||||
- "Speak in an excited whisper: I have some great news to share!"
|
||||
- "Say slowly and clearly: Let me explain this step by step."
|
||||
|
||||
Feel free to use natural language instructions to control your voice style, tone, pace, and emotion. The TTS system will interpret these instructions and adjust the speech accordingly.
|
||||
|
||||
Your output will be converted to audio, so avoid special characters in your answers. Respond to what the user said in a creative and helpful way.""",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # Gemini TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation with a styled introduction
|
||||
messages.append(
|
||||
{
|
||||
"role": "system",
|
||||
"content": "Say cheerfully and warmly: Hello! I'm your AI assistant powered by Gemini's new TTS technology. I can speak with different voices, tones, and styles. How can I help you today?",
|
||||
}
|
||||
)
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
@@ -98,6 +98,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -91,6 +91,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -89,6 +89,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -94,6 +94,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -88,6 +88,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -85,6 +85,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -266,6 +266,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -89,6 +89,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -82,6 +82,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -93,6 +93,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -88,6 +88,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -91,6 +91,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -95,6 +95,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -94,6 +94,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -6,9 +6,13 @@ from typing import Tuple
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from pipecat.frames.frames import AudioFrame, EndFrame, ImageFrame, LLMMessagesFrame, TextFrame
|
||||
from pipecat.frames.frames import AudioFrame, EndFrame, ImageFrame, TextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.processors.aggregators import SentenceAggregator
|
||||
from pipecat.processors.aggregators.openai_llm_context import (
|
||||
OpenAILLMContext,
|
||||
OpenAILLMContextFrame,
|
||||
)
|
||||
from pipecat.runner.daily import configure
|
||||
from pipecat.services.azure import AzureLLMService, AzureTTSService
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
@@ -79,7 +83,7 @@ async def main():
|
||||
sentence_aggregator = SentenceAggregator()
|
||||
pipeline = Pipeline([llm, sentence_aggregator, tts1], source_queue, sink_queue)
|
||||
|
||||
await source_queue.put(LLMMessagesFrame(messages))
|
||||
await source_queue.put(OpenAILLMContextFrame(OpenAILLMContext(messages)))
|
||||
await source_queue.put(EndFrame())
|
||||
await pipeline.run_pipeline()
|
||||
|
||||
|
||||
@@ -80,6 +80,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -97,6 +97,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
async def run_tk():
|
||||
|
||||
@@ -92,6 +92,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -143,7 +143,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
|
||||
@@ -11,7 +11,7 @@ from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import Frame, TextFrame, UserImageRequestFrame
|
||||
from pipecat.frames.frames import Frame, TextFrame, TTSSpeakFrame, UserImageRequestFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
@@ -103,7 +103,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
@@ -116,7 +119,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
image_requester.set_participant_id(client_id)
|
||||
|
||||
# Welcome message
|
||||
await tts.say("Hi there! Feel free to ask me what I see.")
|
||||
await task.queue_frame(TTSSpeakFrame("Hi there! Feel free to ask me what I see."))
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -11,7 +11,7 @@ from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import Frame, TextFrame, UserImageRequestFrame
|
||||
from pipecat.frames.frames import Frame, TextFrame, TTSSpeakFrame, UserImageRequestFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
@@ -109,6 +109,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
@@ -122,7 +123,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
image_requester.set_participant_id(client_id)
|
||||
|
||||
# Welcome message
|
||||
await tts.say("Hi there! Feel free to ask me what I see.")
|
||||
await task.queue_frame(TTSSpeakFrame("Hi there! Feel free to ask me what I see."))
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -11,7 +11,7 @@ from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import Frame, TextFrame, UserImageRequestFrame
|
||||
from pipecat.frames.frames import Frame, TextFrame, TTSSpeakFrame, UserImageRequestFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
@@ -109,6 +109,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
@@ -122,7 +123,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
image_requester.set_participant_id(client_id)
|
||||
|
||||
# Welcome message
|
||||
await tts.say("Hi there! Feel free to ask me what I see.")
|
||||
await task.queue_frame(TTSSpeakFrame("Hi there! Feel free to ask me what I see."))
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -11,7 +11,7 @@ from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import Frame, TextFrame, UserImageRequestFrame
|
||||
from pipecat.frames.frames import Frame, TextFrame, TTSSpeakFrame, UserImageRequestFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
@@ -109,6 +109,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
@@ -122,7 +123,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
image_requester.set_participant_id(client_id)
|
||||
|
||||
# Welcome message
|
||||
await tts.say("Hi there! Feel free to ask me what I see.")
|
||||
await task.queue_frame(TTSSpeakFrame("Hi there! Feel free to ask me what I see."))
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -60,7 +60,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
|
||||
pipeline = Pipeline([transport.input(), stt, tl])
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -54,7 +54,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
|
||||
pipeline = Pipeline([transport.input(), stt, tl])
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -47,6 +47,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
|
||||
stt = GladiaSTTService(
|
||||
api_key=os.getenv("GLADIA_API_KEY"),
|
||||
region=os.getenv("GLADIA_REGION"),
|
||||
# live_options=LiveOptions(language=Language.FR),
|
||||
)
|
||||
|
||||
@@ -54,7 +55,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
|
||||
pipeline = Pipeline([transport.input(), stt, tl])
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -56,6 +56,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
|
||||
stt = GladiaSTTService(
|
||||
api_key=os.getenv("GLADIA_API_KEY"),
|
||||
region=os.getenv("GLADIA_REGION"),
|
||||
params=GladiaInputParams(
|
||||
language_config=LanguageConfig(
|
||||
languages=[Language.EN], # Input in English
|
||||
@@ -75,7 +76,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
|
||||
pipeline = Pipeline([transport.input(), stt, tl])
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -53,7 +53,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
|
||||
pipeline = Pipeline([transport.input(), stt, tl])
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -87,6 +87,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
|
||||
@@ -54,7 +54,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
|
||||
pipeline = Pipeline([transport.input(), stt, tl])
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -91,6 +91,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
|
||||
@@ -74,7 +74,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
|
||||
pipeline = Pipeline([transport.input(), stt, tl])
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -60,7 +60,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
|
||||
pipeline = Pipeline([transport.input(), stt, tl])
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -138,6 +138,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -132,6 +132,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -180,6 +180,7 @@ If you need to use a tool, simply use the tool. Do not tell the user the tool yo
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -124,6 +124,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -169,6 +169,7 @@ indicate you should use the get_image tool are:
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -191,6 +191,7 @@ indicate you should use the get_image tool are:
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -126,6 +126,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -116,6 +116,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -125,6 +125,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -124,6 +124,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -122,6 +122,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -131,6 +131,7 @@ Start by asking me for my location. Then, use 'get_weather_current' to give me a
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -131,6 +131,7 @@ Start by asking me for my location. Then, use 'get_weather_current' to give me a
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -63,8 +63,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = AzureTTSService(
|
||||
api_key=os.getenv("AZURE_API_KEY"),
|
||||
region="eastus",
|
||||
api_key=os.getenv("AZURE_SPEECH_API_KEY"),
|
||||
region=os.getenv("AZURE_SPEECH_REGION"),
|
||||
voice="en-US-JennyNeural",
|
||||
params=AzureTTSService.InputParams(language="en-US", rate="1.1", style="cheerful"),
|
||||
)
|
||||
@@ -125,6 +125,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -94,6 +94,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -121,6 +121,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -127,6 +127,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -123,6 +123,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -137,6 +137,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -130,6 +130,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -124,6 +124,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -140,6 +140,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
176
examples/foundational/14v-function-calling-openai.py
Normal file
176
examples/foundational/14v-function-calling-openai.py
Normal file
@@ -0,0 +1,176 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.llm_service import FunctionCallParams
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.services.openai.stt import OpenAISTTService
|
||||
from pipecat.services.openai.tts import OpenAITTSService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
async def fetch_weather_from_api(params: FunctionCallParams):
|
||||
await params.result_callback({"conditions": "nice", "temperature": "75"})
|
||||
|
||||
|
||||
async def fetch_restaurant_recommendation(params: FunctionCallParams):
|
||||
await params.result_callback({"name": "The Golden Dragon"})
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = OpenAISTTService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o-transcribe",
|
||||
prompt="Expect words related weather, such as temperature and conditions. And restaurant names.",
|
||||
)
|
||||
|
||||
# voice choices: ash, ballad, or any other voice available in the OpenAI TTS API
|
||||
# see https://www.openai.fm/
|
||||
tts = OpenAITTSService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
voice="ballad",
|
||||
instructions="Please speak clearly and at a moderate pace.",
|
||||
)
|
||||
|
||||
# model choices: gpt-4o, gpt-4.1, etc.
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
# You can also register a function_name of None to get all functions
|
||||
# sent to the same callback with an additional function_name parameter.
|
||||
llm.register_function("get_current_weather", fetch_weather_from_api)
|
||||
llm.register_function("get_restaurant_recommendation", fetch_restaurant_recommendation)
|
||||
|
||||
@llm.event_handler("on_function_calls_started")
|
||||
async def on_function_calls_started(service, function_calls):
|
||||
await tts.queue_frame(TTSSpeakFrame("Let me check on that."))
|
||||
|
||||
weather_function = FunctionSchema(
|
||||
name="get_current_weather",
|
||||
description="Get the current weather",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||
},
|
||||
},
|
||||
required=["location", "format"],
|
||||
)
|
||||
restaurant_function = FunctionSchema(
|
||||
name="get_restaurant_recommendation",
|
||||
description="Get a restaurant recommendation",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
},
|
||||
required=["location"],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[weather_function, restaurant_function])
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
@@ -12,6 +12,7 @@ from loguru import logger
|
||||
from openai.types.chat import ChatCompletionToolParam
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import Frame
|
||||
from pipecat.pipeline.parallel_pipeline import ParallelPipeline
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -31,29 +32,54 @@ from pipecat.transports.services.daily import DailyParams
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
current_voice = "News Lady"
|
||||
class SwitchVoices(ParallelPipeline):
|
||||
def __init__(self):
|
||||
self._current_voice = "News Lady"
|
||||
|
||||
news_lady = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="bf991597-6c13-47e4-8411-91ec2de5c466", # Newslady
|
||||
)
|
||||
|
||||
async def switch_voice(params: FunctionCallParams):
|
||||
global current_voice
|
||||
current_voice = params.arguments["voice"]
|
||||
await params.result_callback(
|
||||
{
|
||||
"voice": f"You are now using your {current_voice} voice. Your responses should now be as if you were a {current_voice}."
|
||||
}
|
||||
)
|
||||
british_lady = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
barbershop_man = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="a0e99841-438c-4a64-b679-ae501e7d6091", # Barbershop Man
|
||||
)
|
||||
|
||||
async def news_lady_filter(frame) -> bool:
|
||||
return current_voice == "News Lady"
|
||||
super().__init__(
|
||||
# News Lady voice
|
||||
[FunctionFilter(self.news_lady_filter), news_lady],
|
||||
# British Reading Lady voice
|
||||
[FunctionFilter(self.british_lady_filter), british_lady],
|
||||
# Barbershop Man voice
|
||||
[FunctionFilter(self.barbershop_man_filter), barbershop_man],
|
||||
)
|
||||
|
||||
@property
|
||||
def current_voice(self):
|
||||
return self._current_voice
|
||||
|
||||
async def british_lady_filter(frame) -> bool:
|
||||
return current_voice == "British Lady"
|
||||
async def switch_voice(self, params: FunctionCallParams):
|
||||
self._current_voice = params.arguments["voice"]
|
||||
await params.result_callback(
|
||||
{
|
||||
"voice": f"You are now using your {self.current_voice} voice. Your responses should now be as if you were a {self.current_voice}."
|
||||
}
|
||||
)
|
||||
|
||||
async def news_lady_filter(self, _: Frame) -> bool:
|
||||
return self.current_voice == "News Lady"
|
||||
|
||||
async def barbershop_man_filter(frame) -> bool:
|
||||
return current_voice == "Barbershop Man"
|
||||
async def british_lady_filter(self, _: Frame) -> bool:
|
||||
return self.current_voice == "British Lady"
|
||||
|
||||
async def barbershop_man_filter(self, _: Frame) -> bool:
|
||||
return self.current_voice == "Barbershop Man"
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
@@ -83,23 +109,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
news_lady = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="bf991597-6c13-47e4-8411-91ec2de5c466", # Newslady
|
||||
)
|
||||
|
||||
british_lady = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
barbershop_man = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="a0e99841-438c-4a64-b679-ae501e7d6091", # Barbershop Man
|
||||
)
|
||||
tts = SwitchVoices()
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
llm.register_function("switch_voice", switch_voice)
|
||||
llm.register_function("switch_voice", tts.switch_voice)
|
||||
|
||||
tools = [
|
||||
ChatCompletionToolParam(
|
||||
@@ -136,14 +149,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
stt,
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
ParallelPipeline( # TTS (one of the following vocies)
|
||||
[FunctionFilter(news_lady_filter), news_lady], # News Lady voice
|
||||
[
|
||||
FunctionFilter(british_lady_filter),
|
||||
british_lady,
|
||||
], # British Reading Lady voice
|
||||
[FunctionFilter(barbershop_man_filter), barbershop_man], # Barbershop Man voice
|
||||
),
|
||||
tts, # TTS with switch voice functionality
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
@@ -155,6 +161,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
@@ -164,7 +171,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
messages.append(
|
||||
{
|
||||
"role": "system",
|
||||
"content": f"Please introduce yourself to the user and let them know the voices you can do. Your initial responses should be as if you were a {current_voice}.",
|
||||
"content": f"Please introduce yourself to the user and let them know the voices you can do. Your initial responses should be as if you were a {tts.current_voice}.",
|
||||
}
|
||||
)
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@@ -13,6 +13,7 @@ from loguru import logger
|
||||
from openai.types.chat import ChatCompletionToolParam
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import Frame
|
||||
from pipecat.pipeline.parallel_pipeline import ParallelPipeline
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -32,23 +33,42 @@ from pipecat.transports.services.daily import DailyParams
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
current_language = "English"
|
||||
class SwitchLanguage(ParallelPipeline):
|
||||
def __init__(self):
|
||||
self._current_language = "English"
|
||||
|
||||
english_tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
async def switch_language(params: FunctionCallParams):
|
||||
global current_language
|
||||
current_language = params.arguments["language"]
|
||||
await params.result_callback(
|
||||
{"voice": f"Your answers from now on should be in {current_language}."}
|
||||
)
|
||||
spanish_tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="d4db5fb9-f44b-4bd1-85fa-192e0f0d75f9", # Spanish-speaking Lady
|
||||
)
|
||||
|
||||
super().__init__(
|
||||
# English
|
||||
[FunctionFilter(self.english_filter), english_tts],
|
||||
# Spanish
|
||||
[FunctionFilter(self.spanish_filter), spanish_tts],
|
||||
)
|
||||
|
||||
async def english_filter(frame) -> bool:
|
||||
return current_language == "English"
|
||||
@property
|
||||
def current_language(self):
|
||||
return self._current_language
|
||||
|
||||
async def switch_language(self, params: FunctionCallParams):
|
||||
self._current_language = params.arguments["language"]
|
||||
await params.result_callback(
|
||||
{"voice": f"Your answers from now on should be in {self.current_language}."}
|
||||
)
|
||||
|
||||
async def spanish_filter(frame) -> bool:
|
||||
return current_language == "Spanish"
|
||||
async def english_filter(self, _: Frame) -> bool:
|
||||
return self.current_language == "English"
|
||||
|
||||
async def spanish_filter(self, _: Frame) -> bool:
|
||||
return self.current_language == "Spanish"
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
@@ -80,18 +100,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
api_key=os.getenv("DEEPGRAM_API_KEY"), live_options=LiveOptions(language="multi")
|
||||
)
|
||||
|
||||
english_tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
spanish_tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="d4db5fb9-f44b-4bd1-85fa-192e0f0d75f9", # Spanish-speaking Lady
|
||||
)
|
||||
tts = SwitchLanguage()
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
llm.register_function("switch_language", switch_language)
|
||||
llm.register_function("switch_language", tts.switch_language)
|
||||
|
||||
tools = [
|
||||
ChatCompletionToolParam(
|
||||
@@ -128,10 +140,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
stt, # STT
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
ParallelPipeline( # TTS (bot will speak the chosen language)
|
||||
[FunctionFilter(english_filter), english_tts], # English
|
||||
[FunctionFilter(spanish_filter), spanish_tts], # Spanish
|
||||
),
|
||||
tts, # TTS (bot will speak the chosen language)
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
@@ -143,6 +152,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
@@ -152,7 +162,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
messages.append(
|
||||
{
|
||||
"role": "system",
|
||||
"content": f"Please introduce yourself to the user and let them know the languages you speak. Your initial responses should be in {current_language}.",
|
||||
"content": f"Please introduce yourself to the user and let them know the languages you speak. Your initial responses should be in {tts.current_language}.",
|
||||
}
|
||||
)
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user