Compare commits
286 Commits
v0.0.102
...
mb/remove-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4b4e8b839c | ||
|
|
86c2dd5cfc | ||
|
|
7fe458fe59 | ||
|
|
faed775d90 | ||
|
|
b63ca524f5 | ||
|
|
907ff58d41 | ||
|
|
97b93ebe57 | ||
|
|
3ae173520e | ||
|
|
c184ac09b8 | ||
|
|
3c20eda8bf | ||
|
|
d69a337def | ||
|
|
f7434cdde1 | ||
|
|
e21e8585f0 | ||
|
|
8b6aa4b912 | ||
|
|
a4b6db6fb4 | ||
|
|
edc79d374a | ||
|
|
e521aef5df | ||
|
|
3cfff51205 | ||
|
|
3d8e3a4043 | ||
|
|
7ee0400c4c | ||
|
|
781d191509 | ||
|
|
a8cb2a26d1 | ||
|
|
b1df1ba5d4 | ||
|
|
eee2ef7e85 | ||
|
|
ff0f3dce32 | ||
|
|
bca42f7d68 | ||
|
|
27940d83a2 | ||
|
|
937c691f2a | ||
|
|
6803d38d3f | ||
|
|
44993fe9e3 | ||
|
|
0fe4c732b7 | ||
|
|
ceead60ef2 | ||
|
|
e028194dbe | ||
|
|
81f4672535 | ||
|
|
9273b158ea | ||
|
|
353a28842c | ||
|
|
3e6c59c736 | ||
|
|
0ca8c850fb | ||
|
|
73ee4da7d4 | ||
|
|
2f60074da3 | ||
|
|
751b1b8100 | ||
|
|
d899f0af11 | ||
|
|
c09ae6ba6d | ||
|
|
a187a4b3b2 | ||
|
|
68e19a730b | ||
|
|
67cb7d575f | ||
|
|
a84930dc3e | ||
|
|
54fd73c460 | ||
|
|
c954e1c898 | ||
|
|
db76cd052a | ||
|
|
167e68672b | ||
|
|
69d916ca51 | ||
|
|
9890b93d08 | ||
|
|
f928206b3a | ||
|
|
f421ad9cf6 | ||
|
|
d918a20b75 | ||
|
|
d91c230b85 | ||
|
|
b6f21ab15d | ||
|
|
6f0061ab96 | ||
|
|
761397d1f9 | ||
|
|
d9bb4d07c6 | ||
|
|
ee46cbce4c | ||
|
|
b4b9976b9c | ||
|
|
b78a293ffb | ||
|
|
0a89d24f70 | ||
|
|
8c9ccf8f82 | ||
|
|
bcc2b4def4 | ||
|
|
57d25c564c | ||
|
|
6cda2ff941 | ||
|
|
323477bfa4 | ||
|
|
fa692ec989 | ||
|
|
23ad181515 | ||
|
|
6f7664846c | ||
|
|
081aaa50dc | ||
|
|
aff8ab8a40 | ||
|
|
0f7e6e14ab | ||
|
|
65f563ad34 | ||
|
|
9c2ac661a3 | ||
|
|
cdd65b6c0a | ||
|
|
71fc078c24 | ||
|
|
7556427862 | ||
|
|
bcf11ecbd4 | ||
|
|
ff174dd1c2 | ||
|
|
e804060e17 | ||
|
|
30db5fea7c | ||
|
|
c527e1f30f | ||
|
|
029f3dbefb | ||
|
|
03cb0054f9 | ||
|
|
6a7e9358c6 | ||
|
|
6a3718d33d | ||
|
|
b67af19d47 | ||
|
|
6d9c07b945 | ||
|
|
18429f80f1 | ||
|
|
0a54dc9721 | ||
|
|
521f669051 | ||
|
|
abb20f34ba | ||
|
|
b1e72ad4b7 | ||
|
|
f610fb95f9 | ||
|
|
827032fefb | ||
|
|
af4ef95dc6 | ||
|
|
0370bb15e4 | ||
|
|
2b3595485f | ||
|
|
af4226adbf | ||
|
|
29e2a861dc | ||
|
|
63c664becb | ||
|
|
fecf462139 | ||
|
|
023063759a | ||
|
|
c49eda98e7 | ||
|
|
5d07326e36 | ||
|
|
fa659311b6 | ||
|
|
125c423356 | ||
|
|
c9615c8db6 | ||
|
|
28c542f6ed | ||
|
|
f5b86d9cdc | ||
|
|
5708c81b93 | ||
|
|
f4e9825c03 | ||
|
|
82ce3ea8de | ||
|
|
62ada92188 | ||
|
|
273692421f | ||
|
|
5d8a5bf750 | ||
|
|
0a3e212f93 | ||
|
|
43d686c622 | ||
|
|
4d136e1e28 | ||
|
|
2024285c75 | ||
|
|
bc830c16f1 | ||
|
|
fb27642190 | ||
|
|
463ea3725b | ||
|
|
6c609031ee | ||
|
|
18630c9478 | ||
|
|
3a8d3cc841 | ||
|
|
2963c7589d | ||
|
|
63caa403cb | ||
|
|
ebb42a3c6d | ||
|
|
cc54ff4708 | ||
|
|
846cf0794d | ||
|
|
498349c17e | ||
|
|
474b27305f | ||
|
|
20509e8f96 | ||
|
|
5b2fa69bdc | ||
|
|
4f8cacc769 | ||
|
|
0145fb4ea0 | ||
|
|
8e52df7f03 | ||
|
|
8ee99e37ff | ||
|
|
bae4211369 | ||
|
|
859cd7c920 | ||
|
|
d608c400f9 | ||
|
|
94e93bed83 | ||
|
|
b1cee140b9 | ||
|
|
352361bdd2 | ||
|
|
baa61468a1 | ||
|
|
7501ba2e45 | ||
|
|
200716e8fe | ||
|
|
421696e1c2 | ||
|
|
50ef4909e3 | ||
|
|
63df4642b5 | ||
|
|
43869a499d | ||
|
|
d2bf3952ec | ||
|
|
92c380ee77 | ||
|
|
a55ba40921 | ||
|
|
fb1bfd03dd | ||
|
|
a7edd8e441 | ||
|
|
2a07138abf | ||
|
|
a0a7b3101d | ||
|
|
39dc4ba99c | ||
|
|
ad942f6e4c | ||
|
|
97d34ef9e1 | ||
|
|
c054780477 | ||
|
|
88a2dbdb82 | ||
|
|
d386a0efda | ||
|
|
b718a23c17 | ||
|
|
e38f7d9451 | ||
|
|
b00d454842 | ||
|
|
0fa51811ea | ||
|
|
323ee00b83 | ||
|
|
0c73b77327 | ||
|
|
416e1cf877 | ||
|
|
b4c5cb258b | ||
|
|
728a97ade3 | ||
|
|
28677ec829 | ||
|
|
17886d14e8 | ||
|
|
caf5dacbe8 | ||
|
|
b8b531b66a | ||
|
|
a14690e3a0 | ||
|
|
d913d954db | ||
|
|
e98bb1df66 | ||
|
|
a7ada79fd9 | ||
|
|
a5b5a8e5cf | ||
|
|
1daea78b91 | ||
|
|
7910f20e14 | ||
|
|
d7d94a29f0 | ||
|
|
6066eec853 | ||
|
|
cd379671aa | ||
|
|
8006223911 | ||
|
|
ce51df677c | ||
|
|
68ebd3d063 | ||
|
|
94a651cee2 | ||
|
|
1cad4210ce | ||
|
|
1cec8d119d | ||
|
|
7dc16b1d92 | ||
|
|
247f0bbcd3 | ||
|
|
d2372c127a | ||
|
|
3b1ba57452 | ||
|
|
02c2778b8d | ||
|
|
fa6a6dabee | ||
|
|
3a77b4c1d8 | ||
|
|
3537420d91 | ||
|
|
65fb88e61e | ||
|
|
b345f48ac1 | ||
|
|
f181e12d8f | ||
|
|
36de6003d0 | ||
|
|
dba4de77bf | ||
|
|
507765625f | ||
|
|
8f5e5e8e7c | ||
|
|
c682a44bb6 | ||
|
|
cb7023681f | ||
|
|
012ef41ff4 | ||
|
|
66b7b4a5d4 | ||
|
|
f6bb5fa124 | ||
|
|
b08548af9d | ||
|
|
ab92a0e1d7 | ||
|
|
e37f2f99c4 | ||
|
|
e43351f5f8 | ||
|
|
444cbb6499 | ||
|
|
8a4ab611be | ||
|
|
2489c76bc6 | ||
|
|
73cb96bf66 | ||
|
|
79ec61d1d8 | ||
|
|
ca440594fe | ||
|
|
6c25dd4aa2 | ||
|
|
09bb6bb03b | ||
|
|
746fdfbfef | ||
|
|
f7af9f1efd | ||
|
|
a5f95acaf5 | ||
|
|
e50b138ab2 | ||
|
|
3640c7a2dd | ||
|
|
2454bedf29 | ||
|
|
3adb2f50a6 | ||
|
|
01b7a93e08 | ||
|
|
347eaf582d | ||
|
|
25ca296477 | ||
|
|
3fce88555f | ||
|
|
9e6f27c9f1 | ||
|
|
94f01af545 | ||
|
|
432870cc36 | ||
|
|
e065907745 | ||
|
|
b7a5ca3d1e | ||
|
|
9569625f03 | ||
|
|
18afe37bd1 | ||
|
|
2b9777b812 | ||
|
|
8866ab1585 | ||
|
|
f0995164d9 | ||
|
|
136732afae | ||
|
|
3410eb82b3 | ||
|
|
794811fbdb | ||
|
|
abea22ec57 | ||
|
|
08beb0264a | ||
|
|
2e15b4842c | ||
|
|
6d95a2425c | ||
|
|
4667a3d66d | ||
|
|
0bf2477d2c | ||
|
|
71a752c971 | ||
|
|
358f237507 | ||
|
|
d99a256715 | ||
|
|
dcbcab1542 | ||
|
|
a966947220 | ||
|
|
16b060d9e9 | ||
|
|
ed7fde324e | ||
|
|
beb4e86b5f | ||
|
|
e75ccd9c2f | ||
|
|
a80919ceff | ||
|
|
1fe4538982 | ||
|
|
9a48d93bd2 | ||
|
|
0c3e59ed61 | ||
|
|
ec2b38dc29 | ||
|
|
2036757b84 | ||
|
|
0574167fbd | ||
|
|
972ad93e18 | ||
|
|
ac53594967 | ||
|
|
b063d9d43b | ||
|
|
48e93beadf | ||
|
|
883b24f577 | ||
|
|
ed3ec045aa | ||
|
|
67d39a97f7 | ||
|
|
a4e187e138 | ||
|
|
9f380170d7 | ||
|
|
12f27f9cda |
27
.claude-plugin/marketplace.json
Normal file
27
.claude-plugin/marketplace.json
Normal file
@@ -0,0 +1,27 @@
|
||||
{
|
||||
"name": "pipecat-dev-skills",
|
||||
"owner": {
|
||||
"name": "Pipecat"
|
||||
},
|
||||
"metadata": {
|
||||
"description": "Development workflow skills for contributing to the Pipecat project",
|
||||
"version": "1.0.0"
|
||||
},
|
||||
"plugins": [
|
||||
{
|
||||
"name": "pipecat-dev",
|
||||
"description": "Development workflow skills for contributing to the Pipecat project",
|
||||
"version": "1.0.0",
|
||||
"source": "./",
|
||||
"skills": [
|
||||
"./.claude/skills/changelog",
|
||||
"./.claude/skills/cleanup",
|
||||
"./.claude/skills/code-review",
|
||||
"./.claude/skills/docstring",
|
||||
"./.claude/skills/pr-description",
|
||||
"./.claude/skills/pr-submit",
|
||||
"./.claude/skills/update-docs"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -26,7 +26,7 @@ Create changelog files for the important commits in this PR. The PR number is pr
|
||||
- `{PR_NUMBER}.performance.md` - for performance improvements
|
||||
- `{PR_NUMBER}.other.md` - for other changes
|
||||
|
||||
4. Each changelog file should at least contain a main single line starting with `- ` followed by a clear description of the change.
|
||||
4. Each changelog file should at least contain a main single line starting with `- ` followed by a clear description of the change. No line wrapping.
|
||||
|
||||
5. If the change is complicated, changelog files can have indented lines after the main line with additional details or code samples.
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Code Cleanup Skill
|
||||
|
||||
The **Code Cleanup Skill** reviews, refactors, and documents code changes in your current branch, ensuring alignment with **Pipecat’s architecture, coding standards, and example patterns**.
|
||||
The **Code Cleanup Skill** reviews, refactors, and documents code changes in your current branch, ensuring alignment with **Pipecat's architecture, coding standards, and example patterns**.
|
||||
It focuses on **readability, correctness, performance, and consistency**, while avoiding breaking changes.
|
||||
|
||||
---
|
||||
@@ -28,9 +28,9 @@ This skill analyzes all changes introduced in your branch and performs the follo
|
||||
|
||||
Invoke the skill using any of the following commands:
|
||||
|
||||
- “Clean up my branch code”
|
||||
- “Refactor the changes in my branch”
|
||||
- “Review and improve my branch code”
|
||||
- "Clean up my branch code"
|
||||
- "Refactor the changes in my branch"
|
||||
- "Review and improve my branch code"
|
||||
- `/cleanup`
|
||||
|
||||
---
|
||||
|
||||
@@ -3,21 +3,20 @@ name: docstring
|
||||
description: Document a Python module and its classes using Google style
|
||||
---
|
||||
|
||||
Document a Python module and its classes using Google-style docstrings following project conventions. The class name is provided as an argument.
|
||||
Document a Python module or class using Google-style docstrings following project conventions. The argument can be a class name or a module path.
|
||||
|
||||
## Instructions
|
||||
|
||||
1. First, find the class in the codebase:
|
||||
```
|
||||
Search for "class ClassName" in src/pipecat/
|
||||
```
|
||||
1. Determine what to document based on the argument:
|
||||
|
||||
2. If multiple files contain that class name:
|
||||
- List all matches with their file paths
|
||||
- Ask the user which one they want to document
|
||||
- Wait for confirmation before proceeding
|
||||
**If a module path is provided** (e.g. `src/pipecat/audio/vad/vad_analyzer.py`):
|
||||
- Use that file directly
|
||||
|
||||
3. Once the file is identified, read the module to understand its structure:
|
||||
**If a class name is provided** (e.g. `VADAnalyzer`):
|
||||
- Search for `class ClassName` in `src/pipecat/`
|
||||
- If multiple files contain that class name, list all matches with their file paths, ask the user which one they want to document, and wait for confirmation
|
||||
|
||||
2. Once the file is identified, read the module to understand its structure:
|
||||
- Identify all classes, functions, and important type aliases
|
||||
- Understand the purpose of each component
|
||||
|
||||
|
||||
28
.claude/skills/pr-submit/SKILL.md
Normal file
28
.claude/skills/pr-submit/SKILL.md
Normal file
@@ -0,0 +1,28 @@
|
||||
---
|
||||
name: pr-submit
|
||||
description: Create and submit a GitHub PR from the current branch
|
||||
---
|
||||
|
||||
Submit the current changes as a GitHub pull request.
|
||||
|
||||
## Instructions
|
||||
|
||||
1. Check the current state of the repository:
|
||||
- Run `git status` to see staged, unstaged, and untracked changes
|
||||
- Run `git diff` to see current changes
|
||||
- Run `git log --oneline -10` to see recent commits
|
||||
|
||||
2. If there are uncommitted changes relevant to the PR:
|
||||
- Ask the user if they want a specific prefix for the branch name (e.g., `alice/`, `fix/`, `feat/`)
|
||||
- Create a new branch based on the current branch
|
||||
- Commit the changes using multiple commits if the changes are unrelated
|
||||
|
||||
3. Push the branch and create the PR:
|
||||
- Push with `-u` flag to set upstream tracking
|
||||
- Create the PR using `gh pr create`
|
||||
|
||||
4. After the PR is created:
|
||||
- Run `/changelog <pr_number>` to generate changelog files, then commit and push them
|
||||
- Run `/pr-description <pr_number>` to update the PR description
|
||||
|
||||
5. Return the PR URL to the user.
|
||||
250
.claude/skills/update-docs/SKILL.md
Normal file
250
.claude/skills/update-docs/SKILL.md
Normal file
@@ -0,0 +1,250 @@
|
||||
---
|
||||
name: update-docs
|
||||
description: Update documentation pages to match source code changes on the current branch
|
||||
---
|
||||
|
||||
Update documentation pages to reflect source code changes on the current branch. Analyzes the diff against main, maps changed source files to their corresponding doc pages, and makes targeted edits.
|
||||
|
||||
## Arguments
|
||||
|
||||
```
|
||||
/update-docs [DOCS_PATH]
|
||||
```
|
||||
|
||||
- `DOCS_PATH` (optional): Path to the docs repository root. If not provided, ask the user.
|
||||
|
||||
Examples:
|
||||
- `/update-docs /Users/me/src/docs`
|
||||
- `/update-docs`
|
||||
|
||||
## Instructions
|
||||
|
||||
### Step 1: Resolve docs path
|
||||
|
||||
If `DOCS_PATH` was provided as an argument, use it. Otherwise, ask the user for the path to their docs repository.
|
||||
|
||||
Verify the path exists and contains `server/services/` subdirectory.
|
||||
|
||||
### Step 2: Create docs branch
|
||||
|
||||
Get the current pipecat branch name:
|
||||
```bash
|
||||
git rev-parse --abbrev-ref HEAD
|
||||
```
|
||||
|
||||
In the docs repo, create a new branch off main with a matching name:
|
||||
```bash
|
||||
cd DOCS_PATH && git checkout main && git pull && git checkout -b {branch-name}-docs
|
||||
```
|
||||
|
||||
For example, if the pipecat branch is `feat/new-service`, the docs branch becomes `feat/new-service-docs`.
|
||||
|
||||
All doc edits in subsequent steps are made on this branch.
|
||||
|
||||
### Step 3: Detect changed source files
|
||||
|
||||
Run:
|
||||
```bash
|
||||
git diff main..HEAD --name-only
|
||||
```
|
||||
|
||||
Filter to files that could affect documentation:
|
||||
- `src/pipecat/services/**/*.py` (service implementations)
|
||||
- `src/pipecat/transports/**/*.py` (transport implementations)
|
||||
- `src/pipecat/serializers/**/*.py` (serializer implementations)
|
||||
- `src/pipecat/processors/**/*.py` (processor implementations)
|
||||
- `src/pipecat/audio/**/*.py` (audio utilities)
|
||||
- `src/pipecat/turns/**/*.py` (turn management)
|
||||
- `src/pipecat/observers/**/*.py` (observers)
|
||||
- `src/pipecat/pipeline/**/*.py` (pipeline core)
|
||||
|
||||
Ignore `__init__.py`, `__pycache__`, test files, and files that only contain type re-exports.
|
||||
|
||||
### Step 4: Map source files to doc pages
|
||||
|
||||
For each changed source file, find the corresponding doc page. Read the mapping file at `.claude/skills/update-docs/SOURCE_DOC_MAPPING.md` and apply its tiered lookup: tier 1 (known exceptions) → tier 2 (pattern matching) → tier 3 (search fallback). **First match wins.**
|
||||
|
||||
### Step 5: Analyze each source-doc pair
|
||||
|
||||
For each mapped pair:
|
||||
|
||||
1. **Read the full source file** to understand current state
|
||||
2. **Read the diff** for that file: `git diff main..HEAD -- <source_file>`
|
||||
3. **Read the current doc page** in full
|
||||
|
||||
Identify what changed by comparing source to docs:
|
||||
|
||||
- **Constructor parameters**: Compare `__init__` signature to the Configuration section's `<ParamField>` entries
|
||||
- **InputParams fields**: Compare `InputParams(BaseModel)` class fields to the InputParams table
|
||||
- **Event handlers**: Compare `_register_event_handler` calls and event handler definitions to Event Handlers section
|
||||
- **Class names / imports**: Check if Usage examples reference correct names
|
||||
- **Behavioral changes**: Check if Notes section needs updating
|
||||
|
||||
### Step 6: Make targeted edits
|
||||
|
||||
For each doc page that needs updates, edit **only the sections that need changes**. Preserve all other content exactly as-is.
|
||||
|
||||
#### Rules
|
||||
|
||||
- **Never remove content** unless the corresponding source code was removed
|
||||
- **Never rewrite sections** that are already accurate
|
||||
- **Match existing formatting** — if the page uses `<ParamField>` tags, use them; if it uses tables, use tables
|
||||
- **Keep descriptions concise** — match the tone and length of surrounding content
|
||||
- **Preserve CardGroup, links, and examples** unless they reference removed functionality
|
||||
- **Don't touch frontmatter** unless the class was renamed
|
||||
|
||||
#### Section-specific guidance
|
||||
|
||||
**Configuration** (constructor params):
|
||||
- Use `<ParamField path="name" type="type" default="value">` format if the page already uses it
|
||||
- Add new params in logical order (required first, then optional)
|
||||
- Remove params that no longer exist in source
|
||||
- Update types/defaults that changed
|
||||
|
||||
**InputParams** (runtime settings):
|
||||
- Use markdown table format: `| Parameter | Type | Default | Description |`
|
||||
- Match the field names and types from the `InputParams(BaseModel)` class
|
||||
- Include the default values from the source
|
||||
|
||||
**Usage** (code examples):
|
||||
- Update import paths, class names, and parameter names
|
||||
- Only modify examples if they would break or be misleading with the new API
|
||||
- Don't rewrite working examples just to add new optional params
|
||||
|
||||
**Notes**:
|
||||
- Add notes for new behavioral gotchas or breaking changes
|
||||
- Remove notes about limitations that were fixed
|
||||
- Keep existing notes that are still accurate
|
||||
|
||||
**Event Handlers**:
|
||||
- Update the event table and example code
|
||||
- Add new events, remove deleted ones
|
||||
- Update handler signatures if they changed
|
||||
|
||||
**Overview / Key Features / Prerequisites**:
|
||||
- Only update if the PR fundamentally changes what the service does (new capability, removed capability, renamed class)
|
||||
- Most PRs will NOT need changes to these sections
|
||||
|
||||
### Step 7: Update guides
|
||||
|
||||
Guides at `DOCS_PATH/guides/` reference specific class names, parameters, imports, and code patterns. After completing reference doc edits, check if any guides need updates too.
|
||||
|
||||
For each changed source file, collect the class names, renamed parameters, and changed imports from the diff. Search the guides directory:
|
||||
```bash
|
||||
grep -rl "ClassName\|old_param_name" DOCS_PATH/guides/
|
||||
```
|
||||
|
||||
For each guide that references changed code:
|
||||
1. Read the full guide
|
||||
2. Update class names, parameter names, import paths, and code examples that are now incorrect
|
||||
3. **Don't rewrite prose** — only fix the specific references that changed
|
||||
4. Leave guides alone if they reference the service generally but don't use any changed APIs
|
||||
|
||||
Guide directories:
|
||||
- `guides/learn/` — conceptual tutorials (pipeline, LLM, STT, TTS, etc.)
|
||||
- `guides/fundamentals/` — practical how-tos (metrics, recording, transcripts, etc.)
|
||||
- `guides/features/` — feature-specific guides (Gemini Live, OpenAI audio, WhatsApp, etc.)
|
||||
- `guides/telephony/` — telephony integration guides (Twilio, Plivo, Telnyx, etc.)
|
||||
|
||||
### Step 8: Identify doc gaps
|
||||
|
||||
After processing all mapped pairs, check for two kinds of gaps:
|
||||
|
||||
**Missing pages**: Source files that had no doc page mapping (neither tier 1, 2, nor 3) and are not marked as "(skip)". For each, tell the user:
|
||||
- The source file path
|
||||
- The main class(es) it defines
|
||||
- Whether a new doc page should be created
|
||||
|
||||
**Missing sections**: Mapped doc pages that are missing standard sections compared to the source. For example, a transport page with no Configuration section, or a service page with no InputParams table when the source defines `InputParams(BaseModel)`. Flag these and offer to add the missing sections.
|
||||
|
||||
If the user wants a new page, create it using this template structure:
|
||||
```
|
||||
---
|
||||
title: "Service Name"
|
||||
description: "Brief description"
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
[Description from class docstring or source analysis]
|
||||
|
||||
<CardGroup cols={2}>
|
||||
[Cards for API reference and examples if available]
|
||||
</CardGroup>
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
pip install "pipecat-ai[package-name]"
|
||||
```
|
||||
|
||||
## Prerequisites
|
||||
|
||||
[Environment variables and account setup]
|
||||
|
||||
## Configuration
|
||||
|
||||
[ParamField entries for constructor params]
|
||||
|
||||
## InputParams
|
||||
|
||||
[Table of InputParams fields, if the service has them]
|
||||
|
||||
## Usage
|
||||
|
||||
### Basic Setup
|
||||
|
||||
```python
|
||||
[Minimal working example]
|
||||
```
|
||||
|
||||
## Notes
|
||||
|
||||
[Important caveats]
|
||||
|
||||
## Event Handlers
|
||||
|
||||
[Event table and example code]
|
||||
```
|
||||
|
||||
### Step 9: Output summary
|
||||
|
||||
After all edits are complete, print a summary:
|
||||
|
||||
```
|
||||
## Documentation Updates
|
||||
|
||||
### Updated reference pages
|
||||
- `server/services/stt/deepgram.mdx` — Updated Configuration (added `new_param`), InputParams (updated `language` default)
|
||||
- `server/services/tts/elevenlabs.mdx` — Updated Event Handlers (added `on_connected`)
|
||||
|
||||
### Updated guides
|
||||
- `guides/learn/speech-to-text.mdx` — Updated code example (renamed `old_param` → `new_param`)
|
||||
|
||||
### Unmapped source files
|
||||
- `src/pipecat/services/newprovider/tts.py` — NewProviderTTSService (no doc page exists)
|
||||
|
||||
### Skipped files
|
||||
- `src/pipecat/services/ai_service.py` — internal base class
|
||||
```
|
||||
|
||||
## Guidelines
|
||||
|
||||
- **Be conservative** — only change what the diff warrants. Don't "improve" docs beyond what changed in source.
|
||||
- **Read before editing** — always read the full doc page before making changes so you understand the existing structure.
|
||||
- **Preserve voice** — match the writing style of the existing doc page, don't impose a different tone.
|
||||
- **One PR at a time** — this skill operates on the current branch's diff against main. Don't look at other branches.
|
||||
- **Parallel analysis** — when multiple source files map to different doc pages, analyze and edit them in parallel for efficiency.
|
||||
- **Shared source files** — files like `services/google/google.py` are shared bases. Check which services import from them and update all affected doc pages.
|
||||
|
||||
## Checklist
|
||||
|
||||
Before finishing, verify:
|
||||
|
||||
- [ ] All changed source files were checked against the mapping table
|
||||
- [ ] Each doc page edit matches the actual source code change (not guessed)
|
||||
- [ ] No content was removed unless the corresponding source was removed
|
||||
- [ ] New parameters have accurate types and defaults from source
|
||||
- [ ] Formatting matches the existing page style
|
||||
- [ ] Guides referencing changed APIs were checked and updated
|
||||
- [ ] Unmapped files were reported to the user
|
||||
79
.claude/skills/update-docs/SOURCE_DOC_MAPPING.md
Normal file
79
.claude/skills/update-docs/SOURCE_DOC_MAPPING.md
Normal file
@@ -0,0 +1,79 @@
|
||||
# Source-to-Doc Mapping
|
||||
|
||||
Maps pipecat source files to their documentation pages. Source paths are relative to `src/pipecat/`. Doc paths are relative to `DOCS_PATH`.
|
||||
|
||||
## Name mismatches
|
||||
|
||||
These source paths don't follow the standard `services/{provider}/{type}.py` → `server/services/{type}/{provider}.mdx` pattern.
|
||||
|
||||
| Source path | Doc page |
|
||||
|---|---|
|
||||
| `services/google/llm.py` | `server/services/llm/gemini.mdx` |
|
||||
| `services/google/llm_vertex.py` | `server/services/llm/google-vertex.mdx` |
|
||||
| `services/google/google.py` | (shared base — check which services use it) |
|
||||
| `services/google/gemini_live/**` | `server/services/s2s/gemini-live.mdx` |
|
||||
| `services/google/gemini_live/llm_vertex.py` | `server/services/s2s/gemini-live-vertex.mdx` |
|
||||
| `services/aws_nova_sonic/**` | `server/services/s2s/aws.mdx` |
|
||||
| `services/ultravox/**` | `server/services/s2s/ultravox.mdx` |
|
||||
| `services/grok/realtime/**` | `server/services/s2s/grok.mdx` |
|
||||
| `services/openai/realtime/**` | `server/services/s2s/openai.mdx` |
|
||||
| `processors/frameworks/rtvi.py` | `server/frameworks/rtvi/rtvi-processor.mdx` and `server/frameworks/rtvi/rtvi-observer.mdx` |
|
||||
| `processors/transcript_processor.py` | `server/utilities/transcript-processor.mdx` |
|
||||
| `processors/user_idle_processor.py` | `server/utilities/user-idle-processor.mdx` |
|
||||
| `processors/idle_frame_processor.py` | `server/pipeline/pipeline-idle-detection.mdx` |
|
||||
| `pipeline/task.py` | `server/pipeline/pipeline-task.mdx` |
|
||||
| `pipeline/runner.py` | `server/utilities/runner/guide.mdx` |
|
||||
| `transports/base_transport.py` | `server/services/transport/transport-params.mdx` |
|
||||
|
||||
## Skip list
|
||||
|
||||
These files should never trigger doc updates.
|
||||
|
||||
| Pattern | Reason |
|
||||
|---|---|
|
||||
| `services/ai_service.py` | Internal base class |
|
||||
| `services/stt_service.py` | Internal base class |
|
||||
| `services/tts_service.py` | Internal base class |
|
||||
| `services/llm_service.py` | Internal base class |
|
||||
| `services/websocket_service.py` | Internal base class |
|
||||
| `services/openai_realtime_beta/**` | Deprecated |
|
||||
| `services/openai_realtime/**` | Deprecated |
|
||||
| `services/gemini_multimodal_live/**` | Deprecated |
|
||||
| `services/aws/agent_core.py` | Internal |
|
||||
| `services/aws/sagemaker/**` | No doc page |
|
||||
| `transports/base_input.py` | Internal base class |
|
||||
| `transports/base_output.py` | Internal base class |
|
||||
| `transports/websocket/client.py` | No doc page |
|
||||
| `serializers/base_serializer.py` | Internal base class |
|
||||
| `serializers/protobuf.py` | Internal |
|
||||
| `processors/audio/**` | Internal |
|
||||
| `pipeline/pipeline.py` | Core architecture, not a service doc |
|
||||
|
||||
## Pattern matching
|
||||
|
||||
For files not in the tables above, apply these patterns. Convert underscores to hyphens in provider names for doc filenames.
|
||||
|
||||
| Source pattern | Doc pattern |
|
||||
|---|---|
|
||||
| `services/{provider}/stt*.py` | `server/services/stt/{provider}.mdx` |
|
||||
| `services/{provider}/tts*.py` | `server/services/tts/{provider}.mdx` |
|
||||
| `services/{provider}/llm*.py` | `server/services/llm/{provider}.mdx` |
|
||||
| `services/{provider}/image*.py` | `server/services/image-generation/{provider}.mdx` |
|
||||
| `services/{provider}/video*.py` | `server/services/video/{provider}.mdx` |
|
||||
| `services/{provider}/realtime/**` | `server/services/s2s/{provider}.mdx` |
|
||||
| `transports/{name}/**` | `server/services/transport/{name}.mdx` |
|
||||
| `serializers/{name}.py` | `server/services/serializers/{name}.mdx` |
|
||||
| `observers/**` | `server/utilities/observers/` (match by class name) |
|
||||
| `audio/vad/**` | `server/utilities/audio/` (match by class name) |
|
||||
| `audio/filters/**` | `server/utilities/audio/` (match by class name) |
|
||||
| `audio/mixers/**` | `server/utilities/audio/` (match by class name) |
|
||||
| `processors/filters/**` | `server/utilities/filters/` (match by class name) |
|
||||
|
||||
If the doc file doesn't exist at the resolved path, the file is **unmapped**.
|
||||
|
||||
## Search fallback
|
||||
|
||||
For files that don't match any table or pattern above:
|
||||
1. Extract the main class name(s) from the source file
|
||||
2. Search the docs directory for that class name: `grep -r "ClassName" DOCS_PATH/server/`
|
||||
3. If found in a doc page, use that as the mapping
|
||||
5
.github/workflows/coverage.yaml
vendored
5
.github/workflows/coverage.yaml
vendored
@@ -29,6 +29,7 @@ jobs:
|
||||
|
||||
- name: Install system packages
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y portaudio19-dev
|
||||
|
||||
- name: Install dependencies
|
||||
@@ -36,11 +37,13 @@ jobs:
|
||||
uv sync --group dev \
|
||||
--extra anthropic \
|
||||
--extra aws \
|
||||
--extra deepgram \
|
||||
--extra google \
|
||||
--extra langchain \
|
||||
--extra livekit \
|
||||
--extra local-smart-turn-v3 \
|
||||
--extra piper \
|
||||
--extra sagemaker \
|
||||
--extra tracing \
|
||||
--extra websocket
|
||||
|
||||
- name: Run tests with coverage
|
||||
|
||||
2
.github/workflows/generate-changelog.yml
vendored
2
.github/workflows/generate-changelog.yml
vendored
@@ -86,7 +86,7 @@ jobs:
|
||||
fi
|
||||
|
||||
# Validate fragment types
|
||||
VALID_TYPES="added changed deprecated removed fixed security other"
|
||||
VALID_TYPES="added changed deprecated removed fixed performance security other"
|
||||
INVALID_FRAGMENTS=""
|
||||
|
||||
for file in changelog/*.md; do
|
||||
|
||||
5
.github/workflows/tests.yaml
vendored
5
.github/workflows/tests.yaml
vendored
@@ -33,6 +33,7 @@ jobs:
|
||||
|
||||
- name: Install system packages
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y portaudio19-dev
|
||||
|
||||
- name: Install dependencies
|
||||
@@ -40,11 +41,13 @@ jobs:
|
||||
uv sync --group dev \
|
||||
--extra anthropic \
|
||||
--extra aws \
|
||||
--extra deepgram \
|
||||
--extra google \
|
||||
--extra langchain \
|
||||
--extra livekit \
|
||||
--extra local-smart-turn-v3 \
|
||||
--extra piper \
|
||||
--extra sagemaker \
|
||||
--extra tracing \
|
||||
--extra websocket
|
||||
|
||||
- name: Test with pytest
|
||||
|
||||
209
CHANGELOG.md
209
CHANGELOG.md
@@ -7,6 +7,215 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
<!-- towncrier release notes start -->
|
||||
|
||||
## [0.0.103] - 2026-02-20
|
||||
|
||||
### Added
|
||||
|
||||
- Added `"timestampTransportStrategy": "ASYNC"` to `InworldAITTSService`. This
|
||||
allows timestamps info to trail audio chunks arrival, resulting in much
|
||||
better first audio chunk latency
|
||||
(PR [#3625](https://github.com/pipecat-ai/pipecat/pull/3625))
|
||||
|
||||
- Added model-specific `InputParams` to `RimeTTSService`: arcana params
|
||||
(`repetition_penalty`, `temperature`, `top_p`) and mistv2 params
|
||||
(`no_text_normalization`, `save_oovs`, `segment`). Model, voice, and param
|
||||
changes now trigger WebSocket reconnection.
|
||||
(PR [#3642](https://github.com/pipecat-ai/pipecat/pull/3642))
|
||||
|
||||
- Added `write_transport_frame()` hook to `BaseOutputTransport` allowing
|
||||
transport subclasses to handle custom frame types that flow through the audio
|
||||
queue.
|
||||
(PR [#3719](https://github.com/pipecat-ai/pipecat/pull/3719))
|
||||
|
||||
- Added `DailySIPTransferFrame` and `DailySIPReferFrame` to the Daily
|
||||
transport. These frames queue SIP transfer and SIP REFER operations with
|
||||
audio, so the operation executes only after the bot finishes its current
|
||||
utterance.
|
||||
(PR [#3719](https://github.com/pipecat-ai/pipecat/pull/3719))
|
||||
|
||||
- Added keepalive support to `SarvamSTTService` to prevent idle connection
|
||||
timeouts (e.g. when used behind a `ServiceSwitcher`).
|
||||
(PR [#3730](https://github.com/pipecat-ai/pipecat/pull/3730))
|
||||
|
||||
- Added `UserIdleTimeoutUpdateFrame` to enable or disable user idle detection
|
||||
at runtime by updating the timeout dynamically.
|
||||
(PR [#3748](https://github.com/pipecat-ai/pipecat/pull/3748))
|
||||
|
||||
- Added `broadcast_sibling_id` field to the base `Frame` class. This field is
|
||||
automatically set by `broadcast_frame()` and `broadcast_frame_instance()` to
|
||||
the ID of the paired frame pushed in the opposite direction, allowing
|
||||
receivers to identify broadcast pairs.
|
||||
(PR [#3774](https://github.com/pipecat-ai/pipecat/pull/3774))
|
||||
|
||||
- Added `ignored_sources` parameter to `RTVIObserverParams` and
|
||||
`add_ignored_source()`/`remove_ignored_source()` methods to `RTVIObserver` to
|
||||
suppress RTVI messages from specific pipeline processors (e.g. a silent
|
||||
evaluation LLM).
|
||||
(PR [#3779](https://github.com/pipecat-ai/pipecat/pull/3779))
|
||||
|
||||
- Added `DeepgramSageMakerTTSService` for running Deepgram TTS models deployed
|
||||
on AWS SageMaker endpoints via HTTP/2 bidirectional streaming. Supports the
|
||||
Deepgram TTS protocol (Speak, Flush, Clear, Close), interruption handling,
|
||||
and per-turn TTFB metrics.
|
||||
(PR [#3785](https://github.com/pipecat-ai/pipecat/pull/3785))
|
||||
|
||||
### Changed
|
||||
|
||||
- ⚠️ `RimeTTSService` now defaults to `model="arcana"` and the
|
||||
`wss://users-ws.rime.ai/ws3` endpoint. `InputParams` defaults changed from
|
||||
mistv2-specific values to `None` — only explicitly-set params are sent as
|
||||
query params.
|
||||
(PR [#3642](https://github.com/pipecat-ai/pipecat/pull/3642))
|
||||
|
||||
- `AICFilter` now shares read-only AIC models via a singleton `AICModelManager`
|
||||
in `aic_filter.py`.
|
||||
- Multiple filters using the same model path or `(model_id,
|
||||
model_download_dir)` share one loaded model, with reference counting and
|
||||
concurrent load deduplication.
|
||||
- Model file I/O runs off the event loop so the filter does not block.
|
||||
(PR [#3684](https://github.com/pipecat-ai/pipecat/pull/3684))
|
||||
|
||||
- Added `X-User-Agent` and `X-Request-Id` headers to `InworldTTSService` for
|
||||
better traceability.
|
||||
(PR [#3706](https://github.com/pipecat-ai/pipecat/pull/3706))
|
||||
|
||||
- `DailyUpdateRemoteParticipantsFrame` is no longer deprecated and is now
|
||||
queued with audio like other transport frames.
|
||||
(PR [#3719](https://github.com/pipecat-ai/pipecat/pull/3719))
|
||||
|
||||
- Bumped Pillow dependency upper bound from `<12` to `<13` to allow Pillow
|
||||
12.x.
|
||||
(PR [#3728](https://github.com/pipecat-ai/pipecat/pull/3728))
|
||||
|
||||
- Moved STT keepalive mechanism from `WebsocketSTTService` to the `STTService`
|
||||
base class, allowing any STT service (not just websocket-based ones) to use
|
||||
idle-connection keepalive via the `keepalive_timeout` and
|
||||
`keepalive_interval` parameters.
|
||||
(PR [#3730](https://github.com/pipecat-ai/pipecat/pull/3730))
|
||||
|
||||
- Improved audio context management in `AudioContextTTSService` by moving
|
||||
context ID tracking to the base class and adding
|
||||
`reuse_context_id_within_turn` parameter to control concurrent TTS request
|
||||
handling.
|
||||
- Added helper methods: `has_active_audio_context()`,
|
||||
`get_active_audio_context_id()`, `remove_active_audio_context()`,
|
||||
`reset_active_audio_context()`
|
||||
- Simplified Cartesia, ElevenLabs, Inworld, Rime, AsyncAI, and Gradium TTS
|
||||
implementations by removing duplicate context management code
|
||||
(PR [#3732](https://github.com/pipecat-ai/pipecat/pull/3732))
|
||||
|
||||
- `UserIdleController` is now always created with a default timeout of 0
|
||||
(disabled). The `user_idle_timeout` parameter changed from `Optional[float] =
|
||||
None` to `float = 0` in `UserTurnProcessor`, `LLMUserAggregatorParams`, and
|
||||
`UserIdleController`.
|
||||
(PR [#3748](https://github.com/pipecat-ai/pipecat/pull/3748))
|
||||
|
||||
- Change the version specifier from `>=0.2.8` to `~=0.2.8` for the
|
||||
`speechmatics-voice` package to ensure compatibility with future patch
|
||||
versions.
|
||||
(PR [#3761](https://github.com/pipecat-ai/pipecat/pull/3761))
|
||||
|
||||
- Updated `InworldTTSService` and `InworldHttpTTSService` to use `ASYNC`
|
||||
timestamp transport strategy by default
|
||||
(PR [#3765](https://github.com/pipecat-ai/pipecat/pull/3765))
|
||||
|
||||
- Added `start_time` and `end_time` parameters to `start_ttfb_metrics()`,
|
||||
`stop_ttfb_metrics()`, `start_processing_metrics()`, and
|
||||
`stop_processing_metrics()` in `FrameProcessor` and `FrameProcessorMetrics`,
|
||||
allowing custom timestamps for metrics measurement. `STTService` now uses
|
||||
these instead of custom TTFB tracking.
|
||||
(PR [#3776](https://github.com/pipecat-ai/pipecat/pull/3776))
|
||||
|
||||
- Updated default Anthropic model from `claude-sonnet-4-5-20250929` to
|
||||
`claude-sonnet-4-6`.
|
||||
(PR [#3792](https://github.com/pipecat-ai/pipecat/pull/3792))
|
||||
|
||||
### Deprecated
|
||||
|
||||
- Deprecated unused `Traceable`, `@traceable`, `@traced`, and
|
||||
`AttachmentStrategy` in `pipecat.utils.tracing.class_decorators`. This module
|
||||
will be removed in a future release.
|
||||
(PR [#3733](https://github.com/pipecat-ai/pipecat/pull/3733))
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed race condition where `RTVIObserver` could send messages before
|
||||
`DailyTransport` join completed. Outbound messages are now queued & delivered
|
||||
after the transport is ready.
|
||||
(PR [#3615](https://github.com/pipecat-ai/pipecat/pull/3615))
|
||||
|
||||
- Fixed async generator cleanup in OpenAI LLM streaming to prevent
|
||||
`AttributeError` with uvloop on Python 3.12+ (MagicStack/uvloop#699).
|
||||
(PR [#3698](https://github.com/pipecat-ai/pipecat/pull/3698))
|
||||
|
||||
- Fixed `SmallWebRTCTransport` input audio resampling to properly handle all
|
||||
sample rates, including 8kHz audio.
|
||||
(PR [#3713](https://github.com/pipecat-ai/pipecat/pull/3713))
|
||||
|
||||
- Fixed a race condition in `RTVIObserver` where bot output messages could be
|
||||
sent before the bot-started-speaking event.
|
||||
(PR [#3718](https://github.com/pipecat-ai/pipecat/pull/3718))
|
||||
|
||||
- Fixed Grok Realtime `session.updated` event parsing failure caused by the API
|
||||
returning prefixed voice names (e.g. `"human_Ara"` instead of `"Ara"`).
|
||||
(PR [#3720](https://github.com/pipecat-ai/pipecat/pull/3720))
|
||||
|
||||
- Fixed context ID reuse issue in `ElevenLabsTTSService`, `InworldTTSService`,
|
||||
`RimeTTSService`, `CartesiaTTSService`, `AsyncAITTSService`, and
|
||||
`PlayHTTTSService`. Services now properly reuse the same context ID across
|
||||
multiple `run_tts()` invocations within a single LLM turn, preventing context
|
||||
tracking issues and incorrect lifecycle signaling.
|
||||
(PR [#3729](https://github.com/pipecat-ai/pipecat/pull/3729))
|
||||
|
||||
- Fixed word timestamp interleaving issue in `ElevenLabsTTSService` when
|
||||
processing multiple sentences within a single LLM turn.
|
||||
(PR [#3729](https://github.com/pipecat-ai/pipecat/pull/3729))
|
||||
|
||||
- Fixed tracing service decorators executing the wrapped function twice when
|
||||
the function itself raised an exception (e.g., LLM rate limit, TTS timeout).
|
||||
(PR [#3735](https://github.com/pipecat-ai/pipecat/pull/3735))
|
||||
|
||||
- Fixed `LLMUserAggregator` broadcasting mute events before `StartFrame`
|
||||
reaches downstream processors.
|
||||
(PR [#3737](https://github.com/pipecat-ai/pipecat/pull/3737))
|
||||
|
||||
- Fixed `UserIdleController` false idle triggers caused by gaps between user
|
||||
and bot activity frames. The idle timer now starts only after
|
||||
`BotStoppedSpeakingFrame` and is suppressed during active user turns and
|
||||
function calls.
|
||||
(PR [#3744](https://github.com/pipecat-ai/pipecat/pull/3744))
|
||||
|
||||
- Fixed incorrect `sample_rate` assignment in
|
||||
`TavusInputTransport._on_participant_audio_data` (was using
|
||||
`audio.audio_frames` instead of `audio.sample_rate`).
|
||||
(PR [#3768](https://github.com/pipecat-ai/pipecat/pull/3768))
|
||||
|
||||
- Fixed `RTVIObserver` not processing upstream-only frames. Previously, all
|
||||
upstream frames were filtered out to avoid duplicate messages from
|
||||
broadcasted frames. Now only upstream copies of broadcasted frames are
|
||||
skipped.
|
||||
(PR [#3774](https://github.com/pipecat-ai/pipecat/pull/3774))
|
||||
|
||||
- Fixed mutable default arguments in `LLMContextAggregatorPair.__init__()` that
|
||||
could cause shared state across instances.
|
||||
(PR [#3782](https://github.com/pipecat-ai/pipecat/pull/3782))
|
||||
|
||||
- Fixed `DeepgramSageMakerSTTService` to properly track finalize lifecycle
|
||||
using `request_finalize()` / `confirm_finalize()` and use `is_final` (instead
|
||||
of `is_final and speech_final`) for final transcription detection, matching
|
||||
`DeepgramSTTService` behavior.
|
||||
(PR [#3784](https://github.com/pipecat-ai/pipecat/pull/3784))
|
||||
|
||||
- Fixed a race condition in `AudioContextTTSService` where the audio context
|
||||
could time out between consecutive TTS requests within the same turn, causing
|
||||
audio to be discarded.
|
||||
(PR [#3787](https://github.com/pipecat-ai/pipecat/pull/3787))
|
||||
|
||||
- Fixed `push_interruption_task_frame_and_wait()` hanging indefinitely when the
|
||||
`InterruptionFrame` does not reach the pipeline sink within the timeout.
|
||||
Added a `timeout` keyword argument to customize the wait duration.
|
||||
(PR [#3789](https://github.com/pipecat-ai/pipecat/pull/3789))
|
||||
|
||||
## [0.0.102] - 2026-02-10
|
||||
|
||||
### Added
|
||||
|
||||
33
CLAUDE.md
33
CLAUDE.md
@@ -25,7 +25,7 @@ uv run pytest tests/test_name.py
|
||||
uv run pytest tests/test_name.py::test_function_name
|
||||
|
||||
# Preview changelog
|
||||
towncrier build --draft --version Unreleased
|
||||
uv run towncrier build --draft --version Unreleased
|
||||
|
||||
# Lint and format check
|
||||
uv run ruff check
|
||||
@@ -74,7 +74,7 @@ All data flows as **Frame** objects through a pipeline of **FrameProcessors**:
|
||||
- **Context Aggregation**: `LLMContext` accumulates messages for LLM calls; `UserResponse` aggregates user input
|
||||
|
||||
- **Turn Management**: Turn management is done through `LLMUserAggregator` and
|
||||
`LLMAssistantAggregator`, created with `LLMContextAggregatorPair`
|
||||
`LLMAssistantAggregator`, created with `LLMContextAggregatorPair`
|
||||
|
||||
- **User turn strategies**: Detection of when the user starts and stops speaking is done via user turn start/stop strategies. They push `UserStartedSpeakingFrame` and `UserStoppedSpeakingFrame` respectively.
|
||||
|
||||
@@ -90,23 +90,26 @@ All data flows as **Frame** objects through a pipeline of **FrameProcessors**:
|
||||
|
||||
### Key Directories
|
||||
|
||||
| Directory | Purpose |
|
||||
|---------------------------|----------------------------------------------------|
|
||||
| `src/pipecat/frames/` | Frame definitions (100+ types) |
|
||||
| `src/pipecat/processors/` | FrameProcessor base + aggregators, filters, audio |
|
||||
| `src/pipecat/pipeline/` | Pipeline orchestration |
|
||||
| `src/pipecat/services/` | AI service integrations (60+ providers) |
|
||||
| `src/pipecat/transports/` | Transport layer (Daily, LiveKit, WebSocket, Local) |
|
||||
| `src/pipecat/serializers/`| Frame serialization for WebSocket protocols |
|
||||
| `src/pipecat/observers/` | Pipeline observers for monitoring frame flow |
|
||||
| `src/pipecat/audio/` | VAD, filters, mixers, turn detection, DTMF |
|
||||
| `src/pipecat/turns/` | User turn management |
|
||||
| Directory | Purpose |
|
||||
| -------------------------- | -------------------------------------------------- |
|
||||
| `src/pipecat/frames/` | Frame definitions (100+ types) |
|
||||
| `src/pipecat/processors/` | FrameProcessor base + aggregators, filters, audio |
|
||||
| `src/pipecat/pipeline/` | Pipeline orchestration |
|
||||
| `src/pipecat/services/` | AI service integrations (60+ providers) |
|
||||
| `src/pipecat/transports/` | Transport layer (Daily, LiveKit, WebSocket, Local) |
|
||||
| `src/pipecat/serializers/` | Frame serialization for WebSocket protocols |
|
||||
| `src/pipecat/observers/` | Pipeline observers for monitoring frame flow |
|
||||
| `src/pipecat/audio/` | VAD, filters, mixers, turn detection, DTMF |
|
||||
| `src/pipecat/turns/` | User turn management |
|
||||
|
||||
## Code Style
|
||||
|
||||
- **Docstrings**: Google-style. Classes describe purpose; `__init__` has `Args:` section; dataclasses use `Parameters:` section.
|
||||
- **Linting**: Ruff (line length 100). Pre-commit hooks enforce formatting.
|
||||
- **Type hints**: Required for complex async code.
|
||||
- **Dataclass vs Pydantic**: Use `@dataclass` for frames and internal pipeline data (high-frequency, no validation needed). Use Pydantic `BaseModel` for configuration, parameters, metrics, and external API data (benefits from validation and serialization). Specifically:
|
||||
- `@dataclass`: Frame types, context aggregator pairs, internal data containers
|
||||
- `BaseModel`: Service `InputParams`, transport/VAD/turn params, metrics data, API request/response models, serializer params
|
||||
|
||||
### Docstring Example
|
||||
|
||||
@@ -152,7 +155,3 @@ When adding a new service:
|
||||
## Testing
|
||||
|
||||
Test utilities live in `src/pipecat/tests/utils.py`. Use `run_test()` to send frames through a pipeline and assert expected output frames in each direction. Use `SleepFrame(sleep=N)` to add delays between frames.
|
||||
|
||||
## Pull Requests
|
||||
|
||||
After creating a PR, use `/changelog <pr_number>` to generate the changelog file and `/pr-description <pr_number>` to update the PR description.
|
||||
|
||||
@@ -25,7 +25,6 @@ Your repository must contain these components:
|
||||
- **Source code** - Complete implementation following Pipecat patterns
|
||||
- **Foundational example** - Single file example showing basic usage (see [Pipecat examples](https://github.com/pipecat-ai/pipecat/tree/main/examples/foundational))
|
||||
- **README.md** - Must include:
|
||||
|
||||
- Introduction and explanation of your integration
|
||||
- Installation instructions
|
||||
- Usage instructions with Pipecat Pipeline
|
||||
@@ -110,7 +109,6 @@ Once your PR is submitted, post in the `#community-integrations` Discord channel
|
||||
#### Key requirements:
|
||||
|
||||
- **Frame sequence:** Output must follow this frame sequence pattern:
|
||||
|
||||
- `LLMFullResponseStartFrame` - Signals the start of an LLM response
|
||||
- `LLMTextFrame` - Contains LLM content, typically streamed as tokens
|
||||
- `LLMFullResponseEndFrame` - Signals the end of an LLM response
|
||||
@@ -235,22 +233,79 @@ def can_generate_metrics(self) -> bool:
|
||||
|
||||
### Dynamic Settings Updates
|
||||
|
||||
STT, LLM, and TTS services support `ServiceUpdateSettingsFrame` for dynamic configuration changes. The base STTService has an `_update_settings()` method that handles settings, and the private `_settings` `Dict` is used to store settings and provide access to the subclass.
|
||||
STT, LLM, and TTS services support runtime configuration changes via `*UpdateSettingsFrame`s (e.g. `STTUpdateSettingsFrame`, `TTSUpdateSettingsFrame`, `LLMUpdateSettingsFrame`).
|
||||
|
||||
Each service declares a settings dataclass that extends the appropriate base (`STTSettings`, `TTSSettings`, `LLMSettings`). Fields default to `NOT_GIVEN` so that update objects can represent sparse deltas:
|
||||
|
||||
```python
|
||||
async def set_language(self, language: Language):
|
||||
"""Set the recognition language and reconnect.
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
Args:
|
||||
language: The language to use for speech recognition.
|
||||
from pipecat.services.settings import STTSettings, NOT_GIVEN
|
||||
|
||||
@dataclass
|
||||
class MySTTSettings(STTSettings):
|
||||
"""Settings for my STT service.
|
||||
|
||||
Parameters:
|
||||
region: Cloud region for the service.
|
||||
"""
|
||||
logger.info(f"Switching STT language to: [{language}]")
|
||||
self._settings["language"] = language
|
||||
await self._disconnect()
|
||||
await self._connect()
|
||||
|
||||
region: str = field(default_factory=lambda: NOT_GIVEN)
|
||||
```
|
||||
|
||||
Note that, in this example, Deepgram requires the websocket connection be disconnected and reconnected to reinitialize the service with the new value. Consider if your service requires reconnection.
|
||||
The service stores its current settings in `self._settings` and declares the type with a class-level annotation for editor support:
|
||||
|
||||
```python
|
||||
class MySTTService(STTService):
|
||||
_settings: MySTTSettings
|
||||
|
||||
def __init__(self, *, model: str, language: str, region: str, **kwargs):
|
||||
# An initial value should be provided for every settings field.
|
||||
# This will be validated at service start.
|
||||
# (If you track sample_rate, it can be a placeholder value like 0; see
|
||||
# "Sample Rate Handling").
|
||||
super().__init__(
|
||||
settings=MySTTSettings(model=model, language=language, region=region), **kwargs
|
||||
)
|
||||
```
|
||||
|
||||
To react to runtime setting changes, override `_update_settings`. The base implementation applies the delta to `self._settings` and returns a `dict` mapping each changed field name to its **pre-update** value. Your override should call `super()` first, then act on the changed fields. A common implementation might look like:
|
||||
|
||||
```python
|
||||
async def _update_settings(self, update: STTSettings) -> dict[str, Any]:
|
||||
"""Apply a settings update, reconfiguring the recognizer if needed."""
|
||||
changed = await super()._update_settings(update)
|
||||
|
||||
if not changed:
|
||||
return changed
|
||||
|
||||
await self._disconnect()
|
||||
await self._connect()
|
||||
|
||||
return changed
|
||||
```
|
||||
|
||||
The dict keys work like a set for membership tests (`"language" in changed`) and truthiness (`if changed`). Use `changed.keys() - {"language"}` for set difference, or `changed["language"]` to inspect the previous value of a field.
|
||||
|
||||
Note that, in this example, the service requires a reconnect to apply the new language. Consider, for each setting, whether your service requires reconnection or can apply changes in-place.
|
||||
|
||||
If your service can't yet apply certain settings at runtime, call `self._warn_unhandled_updated_settings(changed)` with any unhandled field names so users get a clear log message:
|
||||
|
||||
```python
|
||||
async def _update_settings(self, update: STTSettings) -> dict[str, Any]:
|
||||
changed = await super()._update_settings(update)
|
||||
|
||||
if not changed:
|
||||
return changed
|
||||
|
||||
if "language" in changed:
|
||||
await self._update_language()
|
||||
else:
|
||||
# TODO: this should be temporary - handle changes to other settings soon!
|
||||
self._warn_unhandled_updated_settings(changed.keys() - {"language"})
|
||||
|
||||
return changed
|
||||
```
|
||||
|
||||
### Sample Rate Handling
|
||||
|
||||
@@ -260,7 +315,7 @@ Sample rates are set via PipelineParams and passed to each frame processor at in
|
||||
async def start(self, frame: StartFrame):
|
||||
"""Start the service."""
|
||||
await super().start(frame)
|
||||
self._settings["output_format"]["sample_rate"] = self.sample_rate
|
||||
self._settings.output_sample_rate = self.sample_rate
|
||||
await self._connect()
|
||||
```
|
||||
|
||||
|
||||
@@ -49,12 +49,12 @@ Every pull request that makes a user-facing change should include a changelog en
|
||||
```
|
||||
|
||||
2. Choose the appropriate type:
|
||||
|
||||
- `added.md` - New features
|
||||
- `changed.md` - Changes in existing functionality
|
||||
- `deprecated.md` - Soon-to-be removed features
|
||||
- `removed.md` - Removed features
|
||||
- `fixed.md` - Bug fixes
|
||||
- `performance.md` - Performance improvements
|
||||
- `security.md` - Security fixes
|
||||
- `other.md` - Other changes (documentation, dependencies, etc.)
|
||||
|
||||
@@ -80,7 +80,6 @@ Every pull request that makes a user-facing change should include a changelog en
|
||||
|
||||
```markdown
|
||||
- Updated service configuration:
|
||||
|
||||
- Changed default timeout to 30 seconds
|
||||
- Added retry logic for failed connections
|
||||
```
|
||||
@@ -105,7 +104,6 @@ changelog/1234.changed.2.md
|
||||
|
||||
```markdown
|
||||
- Updated service configuration:
|
||||
|
||||
- Changed default timeout to 30 seconds
|
||||
- Added retry logic for failed connections
|
||||
```
|
||||
|
||||
45
README.md
45
README.md
@@ -55,6 +55,16 @@ Looking for help debugging your pipeline and processors? Check out [Whisker](htt
|
||||
|
||||
Love terminal applications? Check out [Tail](https://github.com/pipecat-ai/tail), a terminal dashboard for Pipecat.
|
||||
|
||||
### 🤖 Claude Code Skills
|
||||
|
||||
Use [Pipecat Skills](https://github.com/pipecat-ai/skills) with [Claude Code](https://claude.ai/code) to scaffold projects, deploy to Pipecat Cloud, and more. Install the marketplace with:
|
||||
|
||||
```
|
||||
claude plugin marketplace add pipecat-ai/skills
|
||||
```
|
||||
|
||||
and install any of the available plugins.
|
||||
|
||||
### 📺️ Pipecat TV Channel
|
||||
|
||||
Catch new features, interviews, and how-tos on our [Pipecat TV](https://www.youtube.com/playlist?list=PLzU2zoMTQIHjqC3v4q2XVSR3hGSzwKFwH) channel.
|
||||
@@ -71,19 +81,19 @@ Catch new features, interviews, and how-tos on our [Pipecat TV](https://www.yout
|
||||
|
||||
## 🧩 Available services
|
||||
|
||||
| Category | Services |
|
||||
| ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/stt/elevenlabs), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Gradium](https://docs.pipecat.ai/server/services/stt/gradium), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [Hathora](https://docs.pipecat.ai/server/services/stt/hathora), [NVIDIA Riva](https://docs.pipecat.ai/server/services/stt/riva), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [SambaNova (Whisper)](https://docs.pipecat.ai/server/services/stt/sambanova), [Sarvam](https://docs.pipecat.ai/server/services/stt/sarvam), [Soniox](https://docs.pipecat.ai/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/server/services/stt/speechmatics), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) |
|
||||
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [Mistral](https://docs.pipecat.ai/server/services/llm/mistral), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/server/services/llm/sambanova) [Together AI](https://docs.pipecat.ai/server/services/llm/together) |
|
||||
| Text-to-Speech | [Async](https://docs.pipecat.ai/server/services/tts/asyncai), [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Camb AI](https://docs.pipecat.ai/server/services/tts/camb), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [Gradium](https://docs.pipecat.ai/server/services/tts/gradium), [Groq](https://docs.pipecat.ai/server/services/tts/groq), [Hathora](https://docs.pipecat.ai/server/services/tts/hathora), [Hume](https://docs.pipecat.ai/server/services/tts/hume), [Inworld](https://docs.pipecat.ai/server/services/tts/inworld), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [NVIDIA Riva](https://docs.pipecat.ai/server/services/tts/riva), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Resemble](https://docs.pipecat.ai/server/services/tts/resemble), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [Speechmatics](https://docs.pipecat.ai/server/services/tts/speechmatics), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
|
||||
| Speech-to-Speech | [AWS Nova Sonic](https://docs.pipecat.ai/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [Grok Voice Agent](https://docs.pipecat.ai/server/services/s2s/grok), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai), [Ultravox](https://docs.pipecat.ai/server/services/s2s/ultravox), |
|
||||
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local |
|
||||
| Serializers | [Exotel](https://docs.pipecat.ai/server/utilities/serializers/exotel), [Plivo](https://docs.pipecat.ai/server/utilities/serializers/plivo), [Twilio](https://docs.pipecat.ai/server/utilities/serializers/twilio), [Telnyx](https://docs.pipecat.ai/server/utilities/serializers/telnyx), [Vonage](https://docs.pipecat.ai/server/utilities/serializers/vonage) |
|
||||
| Video | [HeyGen](https://docs.pipecat.ai/server/services/video/heygen), [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) |
|
||||
| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) |
|
||||
| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/google-imagen), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) |
|
||||
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [ai-coustics](https://docs.pipecat.ai/server/utilities/audio/aic-filter) |
|
||||
| Analytics & Metrics | [OpenTelemetry](https://docs.pipecat.ai/server/utilities/opentelemetry), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) |
|
||||
| Category | Services |
|
||||
| ------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/stt/elevenlabs), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Gradium](https://docs.pipecat.ai/server/services/stt/gradium), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [Hathora](https://docs.pipecat.ai/server/services/stt/hathora), [NVIDIA Riva](https://docs.pipecat.ai/server/services/stt/riva), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [SambaNova (Whisper)](https://docs.pipecat.ai/server/services/stt/sambanova), [Sarvam](https://docs.pipecat.ai/server/services/stt/sarvam), [Soniox](https://docs.pipecat.ai/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/server/services/stt/speechmatics), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) |
|
||||
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [Mistral](https://docs.pipecat.ai/server/services/llm/mistral), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/server/services/llm/sambanova) [Together AI](https://docs.pipecat.ai/server/services/llm/together) |
|
||||
| Text-to-Speech | [Async](https://docs.pipecat.ai/server/services/tts/asyncai), [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Camb AI](https://docs.pipecat.ai/server/services/tts/camb), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [Gradium](https://docs.pipecat.ai/server/services/tts/gradium), [Groq](https://docs.pipecat.ai/server/services/tts/groq), [Hathora](https://docs.pipecat.ai/server/services/tts/hathora), [Hume](https://docs.pipecat.ai/server/services/tts/hume), [Inworld](https://docs.pipecat.ai/server/services/tts/inworld), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [NVIDIA Riva](https://docs.pipecat.ai/server/services/tts/riva), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [Resemble](https://docs.pipecat.ai/server/services/tts/resemble), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [Speechmatics](https://docs.pipecat.ai/server/services/tts/speechmatics), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
|
||||
| Speech-to-Speech | [AWS Nova Sonic](https://docs.pipecat.ai/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [Grok Voice Agent](https://docs.pipecat.ai/server/services/s2s/grok), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai), [Ultravox](https://docs.pipecat.ai/server/services/s2s/ultravox), |
|
||||
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local |
|
||||
| Serializers | [Exotel](https://docs.pipecat.ai/server/utilities/serializers/exotel), [Plivo](https://docs.pipecat.ai/server/utilities/serializers/plivo), [Twilio](https://docs.pipecat.ai/server/utilities/serializers/twilio), [Telnyx](https://docs.pipecat.ai/server/utilities/serializers/telnyx), [Vonage](https://docs.pipecat.ai/server/utilities/serializers/vonage) |
|
||||
| Video | [HeyGen](https://docs.pipecat.ai/server/services/video/heygen), [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) |
|
||||
| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) |
|
||||
| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/google-imagen), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) |
|
||||
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [ai-coustics](https://docs.pipecat.ai/server/utilities/audio/aic-filter) |
|
||||
| Analytics & Metrics | [OpenTelemetry](https://docs.pipecat.ai/server/utilities/opentelemetry), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) |
|
||||
|
||||
📚 [View full services documentation →](https://docs.pipecat.ai/server/services/supported-services)
|
||||
|
||||
@@ -163,6 +173,15 @@ You can get started with Pipecat running on your local machine, then move your a
|
||||
|
||||
> **Note**: Some extras (local, gstreamer) require system dependencies. See documentation if you encounter build errors.
|
||||
|
||||
### Claude Code Skills
|
||||
|
||||
Install development workflow skills for contributing to Pipecat with [Claude Code](https://claude.ai/code):
|
||||
|
||||
```
|
||||
claude plugin marketplace add pipecat-ai/pipecat
|
||||
claude plugin install pipecat-dev@pipecat-dev-skills
|
||||
```
|
||||
|
||||
### Running tests
|
||||
|
||||
To run all tests, from the root directory:
|
||||
|
||||
1
changelog/3696.added.md
Normal file
1
changelog/3696.added.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added `TextAggregationMetricsData` metric measuring the time from the first LLM token to the first complete sentence, representing the latency cost of sentence aggregation in the TTS pipeline.
|
||||
1
changelog/3696.changed.md
Normal file
1
changelog/3696.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added `text_aggregation_mode` parameter to `TTSService` and all TTS subclasses with a new `TextAggregationMode` enum (`SENTENCE`, `TOKEN`). All text now flows through text aggregators regardless of mode, enabling pattern detection and tag handling in TOKEN mode.
|
||||
1
changelog/3696.deprecated.md
Normal file
1
changelog/3696.deprecated.md
Normal file
@@ -0,0 +1 @@
|
||||
- ⚠️ Deprecated `aggregate_sentences` parameter on `TTSService` and all TTS subclasses. Use `text_aggregation_mode=TextAggregationMode.SENTENCE` or `text_aggregation_mode=TextAggregationMode.TOKEN` instead.
|
||||
19
changelog/3714.added.md
Normal file
19
changelog/3714.added.md
Normal file
@@ -0,0 +1,19 @@
|
||||
- Added support for using strongly-typed objects instead of dicts for updating service settings at runtime.
|
||||
|
||||
Instead of, say:
|
||||
|
||||
```python
|
||||
await task.queue_frame(
|
||||
STTUpdateSettingsFrame(settings={"language": Language.ES})
|
||||
)
|
||||
```
|
||||
|
||||
you'd do:
|
||||
|
||||
```python
|
||||
await task.queue_frame(
|
||||
STTUpdateSettingsFrame(delta=DeepgramSTTSettings(language=Language.ES))
|
||||
)
|
||||
```
|
||||
|
||||
Each service now vends strongly-typed classes like `DeepgramSTTSettings` representing the service's runtime-updatable settings.
|
||||
1
changelog/3714.changed.md
Normal file
1
changelog/3714.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- ⚠️ Refactored runtime-updatable service settings to use strongly-typed classes (`TTSSettings`, `STTSettings`, `LLMSettings`, and service-specific subclasses) instead of plain dicts. Each service's `_settings` now holds these strongly-typed objects. For service maintainers, see changes in COMMUNITY_INTEGRATIONS.md.
|
||||
1
changelog/3714.deprecated.2.md
Normal file
1
changelog/3714.deprecated.2.md
Normal file
@@ -0,0 +1 @@
|
||||
- Dict-based `*UpdateSettingsFrame(settings={...})` is deprecated in favor of passing typed settings delta objects with `*UpdateSettingsFrame(delta={...})`.
|
||||
3
changelog/3714.deprecated.md
Normal file
3
changelog/3714.deprecated.md
Normal file
@@ -0,0 +1,3 @@
|
||||
- Deprecated `set_model()`, `set_voice()`, and `set_language()` on AI services in favor of runtime updates via `TTSUpdateSettingsFrame`, `STTUpdateSettingsFrame`, and `LLMUpdateSettingsFrame`.
|
||||
|
||||
⚠️ Note, too, a subtle behavior change in these deprecated methods. Whereas previously only `set_language()` caused the service to actually react to the update (e.g. by reconnecting to a remote service so it an pick up the change), now all these methods do. This change was made as part of a refactor making them all work the same way under the hood.
|
||||
1
changelog/3759.performance.md
Normal file
1
changelog/3759.performance.md
Normal file
@@ -0,0 +1 @@
|
||||
- Switched `GradiumTTSService` from `InterruptibleWordTTSService` to `AudioContextWordTTSService`, eliminating websocket disconnect/reconnect on every interruption by using `client_req_id`-based multiplexing.
|
||||
1
changelog/3786.changed.md
Normal file
1
changelog/3786.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Word timestamp support has been moved from `WordTTSService` into `TTSService` via a new `supports_word_timestamps` parameter. Services that previously extended `WordTTSService`, `AudioContextWordTTSService`, or `WebsocketWordTTSService` now pass `supports_word_timestamps=True` to their parent `__init__` instead.
|
||||
5
changelog/3786.deprecated.md
Normal file
5
changelog/3786.deprecated.md
Normal file
@@ -0,0 +1,5 @@
|
||||
- Deprecated `WordTTSService`, `WebsocketWordTTSService`, `AudioContextWordTTSService`, and `InterruptibleWordTTSService`. Use their non-word counterparts with `supports_word_timestamps=True` instead:
|
||||
- `WordTTSService` → `TTSService(supports_word_timestamps=True)`
|
||||
- `WebsocketWordTTSService` → `WebsocketTTSService(supports_word_timestamps=True)`
|
||||
- `AudioContextWordTTSService` → `AudioContextTTSService(supports_word_timestamps=True)`
|
||||
- `InterruptibleWordTTSService` → `InterruptibleTTSService(supports_word_timestamps=True)`
|
||||
1
changelog/3803.fixed.md
Normal file
1
changelog/3803.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed Poetry compatibility by inlining `local-smart-turn-v3` dependencies (`transformers`, `onnxruntime`) into core dependencies instead of using a self-referential extra.
|
||||
1
changelog/3803.removed.md
Normal file
1
changelog/3803.removed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Removed `local-smart-turn-v3` optional extra from `pyproject.toml`. The `transformers` and `onnxruntime` packages are now always installed as core dependencies since they are required by the default turn stop strategy, `TurnAnalyzerUserTurnStopStrategy` which uses `LocalSmartTurnAnalyzerV3`.
|
||||
1
changelog/3806.added.md
Normal file
1
changelog/3806.added.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added `output_medium` parameter to `AgentInputParams` and `OneShotInputParams` in Ultravox service to control initial output medium (text or voice) at call creation time.
|
||||
1
changelog/3806.changed.2.md
Normal file
1
changelog/3806.changed.2.md
Normal file
@@ -0,0 +1 @@
|
||||
- Improved Ultravox TTFB measurement accuracy by using VAD speech end time instead of `UserStoppedSpeakingFrame` timing.
|
||||
1
changelog/3806.changed.md
Normal file
1
changelog/3806.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Aligned `UltravoxRealtimeLLMService` frame handling with OpenAI/Gemini realtime services: added `InterruptionFrame` handling with metrics cleanup, processing metrics at response boundaries, and improved agent transcript handling for both voice and text output modalities.
|
||||
1
changelog/3807.changed.md
Normal file
1
changelog/3807.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Updated `OpenAIRealtimeLLMService` default model to `gpt-realtime-1.5`.
|
||||
1
changelog/3808.fixed.md
Normal file
1
changelog/3808.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed `SentryMetrics` method signatures to match updated `FrameProcessorMetrics` base class, resolving `TypeError` when using `start_time`/`end_time` keyword arguments.
|
||||
1
changelog/3809.added.md
Normal file
1
changelog/3809.added.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added `TurnMetricsData` as a generic metrics class for turn detection, with e2e processing time measurement. `KrispVivaTurn` now emits `TurnMetricsData` with `e2e_processing_time_ms` tracking the interval from VAD speech-to-silence transition to turn completion.
|
||||
1
changelog/3809.changed.md
Normal file
1
changelog/3809.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added `api_key` parameter to `KrispVivaSDKManager`, `KrispVivaTurn`, and `KrispVivaFilter` for Krisp SDK v1.6.1+ licensing. Falls back to `KRISP_VIVA_API_KEY` environment variable.
|
||||
1
changelog/3809.deprecated.md
Normal file
1
changelog/3809.deprecated.md
Normal file
@@ -0,0 +1 @@
|
||||
- Deprecated `SmartTurnMetricsData` in favor of `TurnMetricsData`. `BaseSmartTurn` now emits `TurnMetricsData` directly.
|
||||
1
changelog/3811.changed.md
Normal file
1
changelog/3811.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Bumped `nltk` minimum version from 3.9.1 to 3.9.3 to resolve a security vulnerability.
|
||||
1
changelog/3813.fixed.md
Normal file
1
changelog/3813.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed STT TTFB metrics not being reported for `SonioxSTTService` and `AWSTranscribeSTTService` due to missing `can_generate_metrics()` override.
|
||||
1
changelog/3814.added.md
Normal file
1
changelog/3814.added.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added `on_audio_context_interrupted()` and `on_audio_context_completed()` callbacks to `AudioContextTTSService`. Subclasses can override these to perform provider-specific cleanup instead of overriding `_handle_interruption()`.
|
||||
1
changelog/3814.fixed.md
Normal file
1
changelog/3814.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed an issue where `AudioContextTTSService`-based providers (AsyncAI, ElevenLabs, Inworld, Rime) did not close or clean up their server-side audio contexts after normal speech completion, only on interruption.
|
||||
4
changelog/3819.changed.md
Normal file
4
changelog/3819.changed.md
Normal file
@@ -0,0 +1,4 @@
|
||||
- `ServiceSettingsUpdateFrame`s are now `UninterruptibleFrame`s. Generally speaking, you don't want a user interruption to prevent a service setting change from going into effect. Note that you usually don't use `ServiceSettingsUpdateFrame` directly, you use one of its subclasses:
|
||||
- `LLMUpdateSettingsFrame`
|
||||
- `TTSUpdateSettingsFrame`
|
||||
- `STTUpdateSettingsFrame`
|
||||
1
changelog/3822.fixed.md
Normal file
1
changelog/3822.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed STT TTFB metrics measuring timeout expiry time instead of actual transcript arrival time.
|
||||
1
changelog/3825.fixed.md
Normal file
1
changelog/3825.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed `InterimTranscriptionFrame` and `TranslationFrame` being unintentionally pushed downstream in `LLMUserAggregator`. They are now consumed like `TranscriptionFrame`.
|
||||
1
changelog/3828.fixed.md
Normal file
1
changelog/3828.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed misleading "Empty audio frame received for STT service" warnings when using audio filters (e.g. `RNNoiseFilter`, `KrispVivaFilter`, `AICFilter`) that buffer audio internally.
|
||||
1
changelog/3837.fixed.md
Normal file
1
changelog/3837.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed issues with `RimeNonJsonTTSService` where trailing punctuation is sometimes vocalized
|
||||
1
changelog/3838.removed.md
Normal file
1
changelog/3838.removed.md
Normal file
@@ -0,0 +1 @@
|
||||
- ⚠️ Removed `PlayHTTTSService` and `PlayHTHttpTTSService`. PlayHT has been shut down and is no longer available.
|
||||
1
changelog/3851.removed.md
Normal file
1
changelog/3851.removed.md
Normal file
@@ -0,0 +1 @@
|
||||
- ⚠️ Removed `ProcessingMetricsData` and all `start_processing_metrics()`/`stop_processing_metrics()` methods from `FrameProcessor` and `FrameProcessorMetrics`. These metrics were inconsistently implemented across services and overlapped with the better-defined TTFB metric. TTFB, LLM token usage, TTS character usage, and text aggregation metrics are unaffected.
|
||||
@@ -42,7 +42,7 @@ This script:
|
||||
|
||||
- Creates a fresh virtual environment
|
||||
- Installs all dependencies as specified in requirements files
|
||||
- Handles conflicting dependencies (like grpcio versions for Riva and PlayHT)
|
||||
- Handles conflicting dependencies (like grpcio versions for Riva)
|
||||
- Builds the documentation in an isolated environment
|
||||
- Provides detailed logging of the build process
|
||||
|
||||
@@ -74,7 +74,6 @@ start _build/html/index.html
|
||||
├── index.rst # Main documentation entry point
|
||||
├── requirements-base.txt # Base documentation dependencies
|
||||
├── requirements-riva.txt # Riva-specific dependencies
|
||||
├── requirements-playht.txt # PlayHT-specific dependencies
|
||||
├── build-docs.sh # Local build script
|
||||
└── rtd-test.py # ReadTheDocs test build script
|
||||
```
|
||||
|
||||
@@ -47,7 +47,8 @@ DAILY_ROOM_URL=https://...
|
||||
|
||||
# Deepgram
|
||||
DEEPGRAM_API_KEY=...
|
||||
SAGEMAKER_ENDPOINT_NAME=...
|
||||
SAGEMAKER_STT_ENDPOINT_NAME=...
|
||||
SAGEMAKER_TTS_ENDPOINT_NAME=...
|
||||
|
||||
# DeepSeek
|
||||
DEEPSEEK_API_KEY=...
|
||||
@@ -103,6 +104,7 @@ INWORLD_API_KEY=...
|
||||
KRISP_MODEL_PATH=...
|
||||
|
||||
# Krisp Viva
|
||||
KRISP_VIVA_API_KEY=...
|
||||
KRISP_VIVA_FILTER_MODEL_PATH=...
|
||||
KRISP_VIVA_TURN_MODEL_PATH=...
|
||||
|
||||
@@ -145,10 +147,6 @@ KOALA_ACCESS_KEY=...
|
||||
# Piper
|
||||
PIPER_BASE_URL=...
|
||||
|
||||
# PlayHT
|
||||
PLAYHT_USER_ID=...
|
||||
PLAYHT_API_KEY=...
|
||||
|
||||
# Plivo
|
||||
PLIVO_AUTH_ID=...
|
||||
PLIVO_AUTH_TOKEN=...
|
||||
|
||||
@@ -13,7 +13,6 @@ from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import Frame, LLMRunFrame, MetricsFrame
|
||||
from pipecat.metrics.metrics import (
|
||||
LLMUsageMetricsData,
|
||||
ProcessingMetricsData,
|
||||
TTFBMetricsData,
|
||||
TTSUsageMetricsData,
|
||||
)
|
||||
@@ -46,8 +45,6 @@ class MetricsLogger(FrameProcessor):
|
||||
for d in frame.data:
|
||||
if isinstance(d, TTFBMetricsData):
|
||||
print(f"!!! MetricsFrame: {frame}, ttfb: {d.value}")
|
||||
elif isinstance(d, ProcessingMetricsData):
|
||||
print(f"!!! MetricsFrame: {frame}, processing: {d.value}")
|
||||
elif isinstance(d, LLMUsageMetricsData):
|
||||
tokens = d.value
|
||||
print(
|
||||
|
||||
@@ -24,6 +24,7 @@ from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.services.tts_service import TextAggregationMode
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
@@ -56,6 +57,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
# Alternatively, you can use TextAggregationMode.TOKEN to stream tokens instead of
|
||||
# sentencesfor faster response times.
|
||||
# text_aggregation_mode=TextAggregationMode.TOKEN,
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
@@ -24,7 +24,7 @@ from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.aws.llm import AWSBedrockLLMService
|
||||
from pipecat.services.deepgram.stt_sagemaker import DeepgramSageMakerSTTService
|
||||
from pipecat.services.deepgram.tts import DeepgramTTSService
|
||||
from pipecat.services.deepgram.tts_sagemaker import DeepgramSageMakerTTSService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
@@ -58,11 +58,19 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
# - AWS credentials configured (via environment variables or AWS CLI)
|
||||
# - A deployed SageMaker endpoint with Deepgram model
|
||||
stt = DeepgramSageMakerSTTService(
|
||||
endpoint_name=os.getenv("SAGEMAKER_ENDPOINT_NAME"),
|
||||
endpoint_name=os.getenv("SAGEMAKER_STT_ENDPOINT_NAME"),
|
||||
region=os.getenv("AWS_REGION"),
|
||||
)
|
||||
|
||||
tts = DeepgramTTSService(api_key=os.getenv("DEEPGRAM_API_KEY"), voice="aura-2-andromeda-en")
|
||||
# Initialize Deepgram SageMaker TTS Service
|
||||
# This requires:
|
||||
# - AWS credentials configured (via environment variables or AWS CLI)
|
||||
# - A deployed SageMaker endpoint with Deepgram TTS model
|
||||
tts = DeepgramSageMakerTTSService(
|
||||
endpoint_name=os.getenv("SAGEMAKER_TTS_ENDPOINT_NAME"),
|
||||
region=os.getenv("AWS_REGION"),
|
||||
voice="aura-2-andromeda-en",
|
||||
)
|
||||
|
||||
llm = AWSBedrockLLMService(
|
||||
aws_region=os.getenv("AWS_REGION"),
|
||||
|
||||
@@ -31,6 +31,8 @@ from pipecat.audio.filters.krisp_viva_filter import KrispVivaFilter
|
||||
from pipecat.audio.turn.krisp_viva_turn import KrispVivaTurn
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.metrics.metrics import TurnMetricsData
|
||||
from pipecat.observers.loggers.metrics_log_observer import MetricsLogObserver
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
@@ -41,32 +43,37 @@ from pipecat.processors.aggregators.llm_response_universal import (
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.deepgram.tts import DeepgramTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
|
||||
krisp_viva_filter = KrispVivaFilter()
|
||||
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
audio_in_filter=KrispVivaFilter(),
|
||||
audio_in_filter=krisp_viva_filter,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
audio_in_filter=KrispVivaFilter(),
|
||||
audio_in_filter=krisp_viva_filter,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
audio_in_filter=KrispVivaFilter(),
|
||||
audio_in_filter=krisp_viva_filter,
|
||||
),
|
||||
}
|
||||
|
||||
@@ -76,7 +83,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = DeepgramTTSService(api_key=os.getenv("DEEPGRAM_API_KEY"), voice="aura-helios-en")
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"), voice_id="71a7ad14-091c-4e8e-a314-022ece01c121"
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
@@ -117,6 +126,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
observers=[MetricsLogObserver(include_metrics={TurnMetricsData})],
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
|
||||
@@ -56,7 +56,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
|
||||
tts = RimeTTSService(
|
||||
api_key=os.getenv("RIME_API_KEY", ""),
|
||||
voice_id="rex",
|
||||
voice_id="luna",
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
@@ -96,7 +96,7 @@ class UserAudioCollector(FrameProcessor):
|
||||
self._user_speaking = True
|
||||
elif isinstance(frame, UserStoppedSpeakingFrame):
|
||||
self._user_speaking = False
|
||||
self._context.add_audio_frames_message(audio_frames=self._audio_frames)
|
||||
await self._context.add_audio_frames_message(audio_frames=self._audio_frames)
|
||||
await self._user_context_aggregator.push_frame(LLMRunFrame())
|
||||
|
||||
elif isinstance(frame, InputAudioRawFrame):
|
||||
|
||||
@@ -72,10 +72,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
llm = AnthropicLLMService(
|
||||
api_key=os.getenv("ANTHROPIC_API_KEY"),
|
||||
model="claude-3-7-sonnet-latest",
|
||||
)
|
||||
llm = AnthropicLLMService(api_key=os.getenv("ANTHROPIC_API_KEY"))
|
||||
llm.register_function("get_weather", get_weather)
|
||||
llm.register_function("get_restaurant_recommendation", fetch_restaurant_recommendation)
|
||||
|
||||
|
||||
@@ -65,7 +65,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
llm = PerplexityLLMService(api_key=os.getenv("PERPLEXITY_API_KEY"), model="sonar")
|
||||
llm = PerplexityLLMService(api_key=os.getenv("PERPLEXITY_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
|
||||
@@ -70,10 +70,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
llm = SambaNovaLLMService(
|
||||
api_key=os.getenv("SAMBANOVA_API_KEY"),
|
||||
model="Llama-4-Maverick-17B-128E-Instruct",
|
||||
)
|
||||
llm = SambaNovaLLMService(api_key=os.getenv("SAMBANOVA_API_KEY"))
|
||||
# You can also register a function_name of None to get all functions
|
||||
# sent to the same callback with an additional function_name parameter.
|
||||
llm.register_function("get_current_weather", fetch_weather_from_api)
|
||||
|
||||
@@ -5,17 +5,21 @@
|
||||
#
|
||||
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import (
|
||||
EndTaskFrame,
|
||||
LLMMessagesAppendFrame,
|
||||
LLMRunFrame,
|
||||
TTSSpeakFrame,
|
||||
UserIdleTimeoutUpdateFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -30,6 +34,7 @@ from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.llm_service import FunctionCallParams
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
@@ -74,6 +79,17 @@ class IdleHandler:
|
||||
await aggregator.push_frame(EndTaskFrame(), FrameDirection.UPSTREAM)
|
||||
|
||||
|
||||
async def fetch_weather_from_api(params: FunctionCallParams):
|
||||
# Simulate a slow API call, waiting longer than the user idle timeout.
|
||||
await asyncio.sleep(3)
|
||||
await params.result_callback({"conditions": "nice", "temperature": "75"})
|
||||
|
||||
|
||||
async def fetch_restaurant_recommendation(params: FunctionCallParams):
|
||||
await asyncio.sleep(6)
|
||||
await params.result_callback({"name": "The Golden Dragon"})
|
||||
|
||||
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
@@ -104,6 +120,42 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
llm.register_function("get_current_weather", fetch_weather_from_api)
|
||||
llm.register_function("get_restaurant_recommendation", fetch_restaurant_recommendation)
|
||||
|
||||
@llm.event_handler("on_function_calls_started")
|
||||
async def on_function_calls_started(service, function_calls):
|
||||
await tts.queue_frame(TTSSpeakFrame("Let me check on that."))
|
||||
|
||||
weather_function = FunctionSchema(
|
||||
name="get_current_weather",
|
||||
description="Get the current weather",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||
},
|
||||
},
|
||||
required=["location", "format"],
|
||||
)
|
||||
restaurant_function = FunctionSchema(
|
||||
name="get_restaurant_recommendation",
|
||||
description="Get a restaurant recommendation",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
},
|
||||
required=["location"],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[weather_function, restaurant_function])
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
@@ -111,7 +163,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
context = LLMContext(messages, tools)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
@@ -146,6 +198,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
|
||||
@user_aggregator.event_handler("on_user_turn_idle")
|
||||
async def on_user_turn_idle(aggregator):
|
||||
logger.info(f"User turn idle")
|
||||
await idle_handler.handle_idle(aggregator)
|
||||
|
||||
@user_aggregator.event_handler("on_user_turn_started")
|
||||
@@ -158,6 +211,12 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
await asyncio.sleep(30)
|
||||
logger.info(f"Disabling idle detection")
|
||||
await task.queue_frames([UserIdleTimeoutUpdateFrame(timeout=0)])
|
||||
await asyncio.sleep(30)
|
||||
logger.info(f"Enabling idle detection")
|
||||
await task.queue_frames([UserIdleTimeoutUpdateFrame(timeout=5)])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -98,7 +98,7 @@ class UserAudioCollector(FrameProcessor):
|
||||
self._user_speaking = True
|
||||
elif isinstance(frame, UserStoppedSpeakingFrame):
|
||||
self._user_speaking = False
|
||||
self._context.add_audio_frames_message(audio_frames=self._audio_frames)
|
||||
await self._context.add_audio_frames_message(audio_frames=self._audio_frames)
|
||||
await self._user_context_aggregator.push_frame(LLMContextFrame(context=self._context))
|
||||
elif isinstance(frame, InputAudioRawFrame):
|
||||
if self._user_speaking:
|
||||
|
||||
@@ -117,7 +117,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
# First flush any existing audio to finish the current context
|
||||
await tts.flush_audio()
|
||||
# Then set the new voice
|
||||
tts.set_voice(VOICE_IDS[voice_name])
|
||||
await tts.set_voice(VOICE_IDS[voice_name])
|
||||
logger.info(f"Switched to {voice_name} voice")
|
||||
else:
|
||||
logger.warning(f"Unknown voice: {voice_name}")
|
||||
|
||||
@@ -12,6 +12,8 @@ from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.metrics.metrics import TurnMetricsData
|
||||
from pipecat.observers.loggers.metrics_log_observer import MetricsLogObserver
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
@@ -77,7 +79,6 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
rtvi,
|
||||
stt,
|
||||
user_aggregator, # User responses
|
||||
llm, # LLM
|
||||
@@ -94,17 +95,15 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
observers=[MetricsLogObserver(include_metrics={TurnMetricsData})],
|
||||
)
|
||||
|
||||
@task.rtvi.event_handler("on_client_ready")
|
||||
async def on_client_ready(rtvi):
|
||||
# Kick off the conversation
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
|
||||
@@ -12,11 +12,18 @@ from loguru import logger
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
AssistantTurnStoppedMessage,
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
UserTurnStoppedMessage,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.llm_service import FunctionCallParams
|
||||
@@ -24,6 +31,8 @@ from pipecat.services.ultravox.llm import OneShotInputParams, UltravoxRealtimeLL
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import SpeechTimeoutUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv(override=True)
|
||||
@@ -168,8 +177,21 @@ There is also a secret menu that changes daily. If the user asks about it, use t
|
||||
|
||||
llm.register_function("get_secret_menu", get_secret_menu)
|
||||
|
||||
# Necessary to complete the function call lifecycle in Pipecat.
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(LLMContext([]))
|
||||
context = LLMContext([])
|
||||
|
||||
# Necessary to complete the function call lifecycle in Pipecat and
|
||||
# to produce user and assistant turn stopped events.
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[SpeechTimeoutUserTurnStopStrategy()],
|
||||
),
|
||||
# Set the VAD analyzer to create reliable TTFB measurements and
|
||||
# user stop events.
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
# Build the pipeline
|
||||
pipeline = Pipeline(
|
||||
@@ -177,8 +199,8 @@ There is also a secret menu that changes daily. If the user asks about it, use t
|
||||
transport.input(),
|
||||
user_aggregator,
|
||||
llm,
|
||||
assistant_aggregator,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
@@ -203,6 +225,18 @@ There is also a secret menu that changes daily. If the user asks about it, use t
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
@user_aggregator.event_handler("on_user_turn_stopped")
|
||||
async def on_user_turn_stopped(aggregator, strategy, message: UserTurnStoppedMessage):
|
||||
timestamp = f"[{message.timestamp}] " if message.timestamp else ""
|
||||
line = f"{timestamp}user: {message.content}"
|
||||
logger.info(f"Transcript: {line}")
|
||||
|
||||
@assistant_aggregator.event_handler("on_assistant_turn_stopped")
|
||||
async def on_assistant_turn_stopped(aggregator, message: AssistantTurnStoppedMessage):
|
||||
timestamp = f"[{message.timestamp}] " if message.timestamp else ""
|
||||
line = f"{timestamp}assistant: {message.content}"
|
||||
logger.info(f"Transcript: {line}")
|
||||
|
||||
# Run the pipeline
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
await runner.run(task)
|
||||
|
||||
263
examples/foundational/50a-ultravox-realtime-text.py
Normal file
263
examples/foundational/50a-ultravox-realtime-text.py
Normal file
@@ -0,0 +1,263 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import datetime
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
AssistantTurnStoppedMessage,
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
UserTurnStoppedMessage,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.inworld.tts import InworldTTSService
|
||||
from pipecat.services.llm_service import FunctionCallParams
|
||||
from pipecat.services.ultravox.llm import OneShotInputParams, UltravoxRealtimeLLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import SpeechTimeoutUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def get_secret_menu(params: FunctionCallParams):
|
||||
category = params.arguments.get("category", "both")
|
||||
logger.debug(f"Fetching secret menu with category: {category}")
|
||||
items = []
|
||||
if category in {"donuts", "both"}:
|
||||
items.append(
|
||||
{
|
||||
"name": "Butter Pecan Ice Cream (one scoop)",
|
||||
"price": "$2.99",
|
||||
}
|
||||
)
|
||||
if category in {"drinks", "both"}:
|
||||
items.append(
|
||||
{
|
||||
"name": "Banana Smoothie",
|
||||
"price": "$4.99",
|
||||
}
|
||||
)
|
||||
await params.result_callback(
|
||||
{
|
||||
"date": datetime.date.today().isoformat(),
|
||||
"items": items,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
system_prompt = f"""
|
||||
You are a drive-thru order taker for a donut shop called "Dr. Donut". Local time is currently: {datetime.datetime.now().isoformat()}
|
||||
The user is talking to you over voice on their phone, and your response will be read out loud with realistic text-to-speech (TTS) technology.
|
||||
|
||||
Follow every direction here when crafting your response:
|
||||
|
||||
1. Use natural, conversational language that is clear and easy to follow (short sentences, simple words).
|
||||
1a. Be concise and relevant: Most of your responses should be a sentence or two, unless you're asked to go deeper. Don't monopolize the conversation.
|
||||
1b. Use discourse markers to ease comprehension. Never use the list format.
|
||||
|
||||
2. Keep the conversation flowing.
|
||||
2a. Clarify: when there is ambiguity, ask clarifying questions, rather than make assumptions.
|
||||
2b. Don't implicitly or explicitly try to end the chat (i.e. do not end a response with "Talk soon!", or "Enjoy!").
|
||||
2c. Sometimes the user might just want to chat. Ask them relevant follow-up questions.
|
||||
2d. Don't ask them if there's anything else they need help with (e.g. don't say things like "How can I assist you further?").
|
||||
|
||||
3. Remember that this is a voice conversation:
|
||||
3a. Don't use lists, markdown, bullet points, or other formatting that's not typically spoken.
|
||||
3b. Type out numbers in words (e.g. 'twenty twelve' instead of the year 2012)
|
||||
3c. If something doesn't make sense, it's likely because you misheard them. There wasn't a typo, and the user didn't mispronounce anything.
|
||||
|
||||
Remember to follow these rules absolutely, and do not refer to these rules, even if you're asked about them.
|
||||
|
||||
When talking with the user, use the following script:
|
||||
1. Take their order, acknowledging each item as it is ordered. If it's not clear which menu item the user is ordering, ask them to clarify.
|
||||
DO NOT add an item to the order unless it's one of the items on the menu below.
|
||||
2. Once the order is complete, repeat back the order.
|
||||
2a. If the user only ordered a drink, ask them if they would like to add a donut to their order.
|
||||
2b. If the user only ordered donuts, ask them if they would like to add a drink to their order.
|
||||
2c. If the user ordered both drinks and donuts, don't suggest anything.
|
||||
3. Total up the price of all ordered items and inform the user.
|
||||
4. Ask the user to pull up to the drive thru window.
|
||||
If the user asks for something that's not on the menu, inform them of that fact, and suggest the most similar item on the menu.
|
||||
If the user says something unrelated to your role, responed with "Um... this is a Dr. Donut."
|
||||
If the user says "thank you", respond with "My pleasure."
|
||||
If the user asks about what's on the menu, DO NOT read the entire menu to them. Instead, give a couple suggestions.
|
||||
|
||||
The menu of available items is as follows:
|
||||
|
||||
# DONUTS
|
||||
|
||||
PUMPKIN SPICE ICED DOUGHNUT $1.29
|
||||
PUMPKIN SPICE CAKE DOUGHNUT $1.29
|
||||
OLD FASHIONED DOUGHNUT $1.29
|
||||
CHOCOLATE ICED DOUGHNUT $1.09
|
||||
CHOCOLATE ICED DOUGHNUT WITH SPRINKLES $1.09
|
||||
RASPBERRY FILLED DOUGHNUT $1.09
|
||||
BLUEBERRY CAKE DOUGHNUT $1.09
|
||||
STRAWBERRY ICED DOUGHNUT WITH SPRINKLES $1.09
|
||||
LEMON FILLED DOUGHNUT $1.09
|
||||
DOUGHNUT HOLES $3.99
|
||||
|
||||
# COFFEE & DRINKS
|
||||
|
||||
PUMPKIN SPICE COFFEE $2.59
|
||||
PUMPKIN SPICE LATTE $4.59
|
||||
REGULAR BREWED COFFEE $1.79
|
||||
DECAF BREWED COFFEE $1.79
|
||||
LATTE $3.49
|
||||
CAPPUCINO $3.49
|
||||
CARAMEL MACCHIATO $3.49
|
||||
MOCHA LATTE $3.49
|
||||
CARAMEL MOCHA LATTE $3.49
|
||||
|
||||
There is also a secret menu that changes daily. If the user asks about it, use the get_secret_menu tool to look up today's secret menu items.
|
||||
"""
|
||||
|
||||
secret_menu_function = FunctionSchema(
|
||||
name="get_secret_menu",
|
||||
description="Get today's secret menu items",
|
||||
properties={
|
||||
"category": {
|
||||
"type": "string",
|
||||
"enum": ["donuts", "drinks", "both"],
|
||||
"description": "The category of secret menu items to retrieve. Defaults to both.",
|
||||
},
|
||||
},
|
||||
required=[],
|
||||
)
|
||||
|
||||
llm = UltravoxRealtimeLLMService(
|
||||
params=OneShotInputParams(
|
||||
api_key=os.getenv("ULTRAVOX_API_KEY"),
|
||||
system_prompt=system_prompt,
|
||||
temperature=0.3,
|
||||
max_duration=datetime.timedelta(minutes=3),
|
||||
output_medium="text",
|
||||
),
|
||||
one_shot_selected_tools=ToolsSchema(standard_tools=[secret_menu_function]),
|
||||
)
|
||||
|
||||
llm.register_function("get_secret_menu", get_secret_menu)
|
||||
|
||||
tts = InworldTTSService(
|
||||
api_key=os.getenv("INWORLD_API_KEY", ""),
|
||||
voice_id="Ashley",
|
||||
model="inworld-tts-1",
|
||||
temperature=1.1,
|
||||
)
|
||||
|
||||
context = LLMContext([])
|
||||
|
||||
# Necessary to complete the function call lifecycle in Pipecat and
|
||||
# to produce user and assistant turn stopped events.
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[SpeechTimeoutUserTurnStopStrategy()],
|
||||
),
|
||||
# Set the VAD analyzer to emulate timing of the model.
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)),
|
||||
),
|
||||
)
|
||||
|
||||
# Build the pipeline
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
# Configure the pipeline task
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
# Handle client connection event
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
|
||||
# Handle client disconnection events
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
@user_aggregator.event_handler("on_user_turn_stopped")
|
||||
async def on_user_turn_stopped(aggregator, strategy, message: UserTurnStoppedMessage):
|
||||
timestamp = f"[{message.timestamp}] " if message.timestamp else ""
|
||||
line = f"{timestamp}user: {message.content}"
|
||||
logger.info(f"Transcript: {line}")
|
||||
|
||||
@assistant_aggregator.event_handler("on_assistant_turn_stopped")
|
||||
async def on_assistant_turn_stopped(aggregator, message: AssistantTurnStoppedMessage):
|
||||
timestamp = f"[{message.timestamp}] " if message.timestamp else ""
|
||||
line = f"{timestamp}assistant: {message.content}"
|
||||
logger.info(f"Transcript: {line}")
|
||||
|
||||
# Run the pipeline
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
191
examples/foundational/53-concurrent-llm-rtvi-ignored-sources.py
Normal file
191
examples/foundational/53-concurrent-llm-rtvi-ignored-sources.py
Normal file
@@ -0,0 +1,191 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""RTVIObserver ignored sources example.
|
||||
|
||||
This example shows how to suppress RTVI messages from a specific pipeline
|
||||
processor so that secondary branches don't leak events to the client.
|
||||
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.parallel_pipeline import ParallelPipeline
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.processors.audio.vad_processor import VADProcessor
|
||||
from pipecat.processors.frameworks.rtvi import RTVIObserverParams
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_turn_processor import UserTurnProcessor
|
||||
from pipecat.turns.user_turn_strategies import ExternalUserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info("Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
# Main LLM — drives the conversation. Its RTVI events reach the client.
|
||||
main_llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
main_messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
# Evaluator LLM — silently grades the user's message in the background.
|
||||
# Its RTVI events will be suppressed so the client is unaware of this branch.
|
||||
evaluator_llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
name="EvaluatorLLM",
|
||||
)
|
||||
|
||||
evaluator_messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
"You are a silent quality evaluator. When given a user message, "
|
||||
"respond with a single JSON object: "
|
||||
'{"score": <1-5>, "reason": "<brief reason>"}. '
|
||||
"Do not respond conversationally."
|
||||
),
|
||||
},
|
||||
]
|
||||
|
||||
main_context = LLMContext(main_messages)
|
||||
evaluator_context = LLMContext(evaluator_messages)
|
||||
|
||||
# We use an external VADProcessor because the UserTurnProcessor is shared
|
||||
# across multiple parallel aggregators. The VADProcessor emits
|
||||
# VADUserStartedSpeakingFrame and VADUserStoppedSpeakingFrame which the
|
||||
# UserTurnProcessor needs to manage turn lifecycle.
|
||||
vad_processor = VADProcessor(vad_analyzer=SileroVADAnalyzer())
|
||||
|
||||
# We use this external user turn processor. This processor will push
|
||||
# UserStartedSpeakingFrame and UserStoppedSpeakingFrame as well as
|
||||
# interruptions. This can be used in advanced cases when there are multiple
|
||||
# aggregators in the pipeline.
|
||||
user_turn_processor = UserTurnProcessor()
|
||||
|
||||
# We use external user turn strategies for both aggregators since the turn
|
||||
# management is done by the common UserTurnProcessor.
|
||||
main_context_aggregator = LLMContextAggregatorPair(
|
||||
main_context,
|
||||
user_params=LLMUserAggregatorParams(user_turn_strategies=ExternalUserTurnStrategies()),
|
||||
)
|
||||
evaluator_context_aggregator = LLMContextAggregatorPair(
|
||||
evaluator_context,
|
||||
user_params=LLMUserAggregatorParams(user_turn_strategies=ExternalUserTurnStrategies()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
vad_processor,
|
||||
user_turn_processor,
|
||||
ParallelPipeline(
|
||||
# Main branch: speaks to the user.
|
||||
[
|
||||
main_context_aggregator.user(),
|
||||
main_llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
main_context_aggregator.assistant(),
|
||||
],
|
||||
# Evaluator branch: silent background scoring, no audio output.
|
||||
[
|
||||
evaluator_context_aggregator.user(),
|
||||
evaluator_llm,
|
||||
evaluator_context_aggregator.assistant(),
|
||||
],
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
rtvi_observer_params=RTVIObserverParams(ignored_sources=[evaluator_llm]),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info("Client connected")
|
||||
main_messages.append(
|
||||
{"role": "system", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
evaluator_messages.append({"role": "system", "content": "Ready to evaluate user messages."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info("Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
128
examples/foundational/55a-update-settings-deepgram-flux-stt.py
Normal file
128
examples/foundational/55a-update-settings-deepgram-flux-stt.py
Normal file
@@ -0,0 +1,128 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.flux.stt import DeepgramFluxSTTService, DeepgramFluxSTTSettings
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramFluxSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
await asyncio.sleep(10)
|
||||
logger.info("Updating Deepgram Flux STT settings: language=es")
|
||||
await task.queue_frame(
|
||||
STTUpdateSettingsFrame(delta=DeepgramFluxSTTSettings(language=Language.ES))
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
@@ -0,0 +1,148 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from deepgram import LiveOptions
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt_sagemaker import (
|
||||
DeepgramSageMakerSTTService,
|
||||
DeepgramSageMakerSTTSettings,
|
||||
)
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSageMakerSTTService(
|
||||
endpoint_name=os.getenv("SAGEMAKER_STT_ENDPOINT_NAME"),
|
||||
region=os.getenv("AWS_REGION"),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
# NOTE: after this change, the bot will only respond if you speak Spanish
|
||||
await asyncio.sleep(10)
|
||||
logger.info("Updating Deepgram SageMaker STT settings: language=es, punctuate=False")
|
||||
await task.queue_frame(
|
||||
STTUpdateSettingsFrame(
|
||||
delta=DeepgramSageMakerSTTSettings(
|
||||
language=Language.ES,
|
||||
live_options=LiveOptions(punctuate=False),
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
# Old-style dict update (for backward-compat testing):
|
||||
# await asyncio.sleep(10)
|
||||
# logger.info("Updating Deepgram SageMaker STT settings via dict: punctuate=False, filler_words=True")
|
||||
# await task.queue_frame(
|
||||
# STTUpdateSettingsFrame(settings={"punctuate": False, "filler_words": True})
|
||||
# )
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
142
examples/foundational/55a-update-settings-deepgram-stt.py
Normal file
142
examples/foundational/55a-update-settings-deepgram-stt.py
Normal file
@@ -0,0 +1,142 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from deepgram import LiveOptions
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService, DeepgramSTTSettings
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
# NOTE: after this change, the bot will only respond if you speak Spanish
|
||||
await asyncio.sleep(10)
|
||||
logger.info("Updating Deepgram STT settings: language=es, punctuate=False")
|
||||
await task.queue_frame(
|
||||
STTUpdateSettingsFrame(
|
||||
delta=DeepgramSTTSettings(
|
||||
language=Language.ES,
|
||||
live_options=LiveOptions(punctuate=False),
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
# Old-style dict update (for backward-compat testing):
|
||||
# await asyncio.sleep(10)
|
||||
# logger.info("Updating Deepgram STT settings via dict: punctuate=False, filler_words=True")
|
||||
# await task.queue_frame(
|
||||
# STTUpdateSettingsFrame(settings={"punctuate": False, "filler_words": True})
|
||||
# )
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
129
examples/foundational/55b-update-settings-azure-stt.py
Normal file
129
examples/foundational/55b-update-settings-azure-stt.py
Normal file
@@ -0,0 +1,129 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.azure.stt import AzureSTTService, AzureSTTSettings
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = AzureSTTService(
|
||||
api_key=os.getenv("AZURE_SPEECH_API_KEY"),
|
||||
region=os.getenv("AZURE_SPEECH_REGION"),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
await asyncio.sleep(10)
|
||||
logger.info("Updating Azure STT settings: language=es")
|
||||
await task.queue_frame(STTUpdateSettingsFrame(delta=AzureSTTSettings(language=Language.ES)))
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
128
examples/foundational/55c-update-settings-google-stt.py
Normal file
128
examples/foundational/55c-update-settings-google-stt.py
Normal file
@@ -0,0 +1,128 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.google.stt import GoogleSTTService, GoogleSTTSettings
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = GoogleSTTService(credentials=os.getenv("GOOGLE_TEST_CREDENTIALS"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
await asyncio.sleep(10)
|
||||
logger.info("Updating Google STT settings: language=es")
|
||||
await task.queue_frame(
|
||||
STTUpdateSettingsFrame(delta=GoogleSTTSettings(language=Language.ES))
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
128
examples/foundational/55d-update-settings-assemblyai-stt.py
Normal file
128
examples/foundational/55d-update-settings-assemblyai-stt.py
Normal file
@@ -0,0 +1,128 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.assemblyai.stt import AssemblyAISTTService, AssemblyAISTTSettings
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = AssemblyAISTTService(api_key=os.getenv("ASSEMBLYAI_API_KEY"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
await asyncio.sleep(10)
|
||||
logger.info("Updating AssemblyAI STT settings: language=es")
|
||||
await task.queue_frame(
|
||||
STTUpdateSettingsFrame(delta=AssemblyAISTTSettings(language=Language.ES))
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
128
examples/foundational/55e-update-settings-gladia-stt.py
Normal file
128
examples/foundational/55e-update-settings-gladia-stt.py
Normal file
@@ -0,0 +1,128 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.gladia.stt import GladiaSTTService, GladiaSTTSettings
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = GladiaSTTService(api_key=os.getenv("GLADIA_API_KEY"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
await asyncio.sleep(10)
|
||||
logger.info("Updating Gladia STT settings: language=es")
|
||||
await task.queue_frame(
|
||||
STTUpdateSettingsFrame(delta=GladiaSTTSettings(language=Language.ES))
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
@@ -0,0 +1,131 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.elevenlabs.stt import (
|
||||
ElevenLabsRealtimeSTTService,
|
||||
ElevenLabsRealtimeSTTSettings,
|
||||
)
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = ElevenLabsRealtimeSTTService(api_key=os.getenv("ELEVENLABS_API_KEY"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
await asyncio.sleep(10)
|
||||
logger.info("Updating ElevenLabs Realtime STT settings: language=es")
|
||||
await task.queue_frame(
|
||||
STTUpdateSettingsFrame(delta=ElevenLabsRealtimeSTTSettings(language=Language.ES))
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
133
examples/foundational/55g-update-settings-elevenlabs-stt.py
Normal file
133
examples/foundational/55g-update-settings-elevenlabs-stt.py
Normal file
@@ -0,0 +1,133 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.elevenlabs.stt import ElevenLabsSTTService, ElevenLabsSTTSettings
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
stt = ElevenLabsSTTService(
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
aiohttp_session=session,
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
await asyncio.sleep(10)
|
||||
logger.info("Updating ElevenLabs STT settings: language=es")
|
||||
await task.queue_frame(
|
||||
STTUpdateSettingsFrame(delta=ElevenLabsSTTSettings(language=Language.ES))
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
153
examples/foundational/55h-update-settings-speechmatics-stt.py
Normal file
153
examples/foundational/55h-update-settings-speechmatics-stt.py
Normal file
@@ -0,0 +1,153 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.services.speechmatics.stt import SpeechmaticsSTTService, SpeechmaticsSTTSettings
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = SpeechmaticsSTTService(
|
||||
api_key=os.getenv("SPEECHMATICS_API_KEY"),
|
||||
params=SpeechmaticsSTTService.InputParams(
|
||||
enable_diarization=True,
|
||||
speaker_active_format="<{speaker_id}>{text}</{speaker_id}>",
|
||||
speaker_passive_format="<PASSIVE><{speaker_id}>{text}</{speaker_id}></PASSIVE>",
|
||||
),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
await asyncio.sleep(10)
|
||||
logger.info("Updating Speechmatics STT settings: language=es")
|
||||
await task.queue_frame(
|
||||
STTUpdateSettingsFrame(delta=SpeechmaticsSTTSettings(language=Language.ES))
|
||||
)
|
||||
|
||||
await asyncio.sleep(10)
|
||||
logger.info("Updating Speechmatics STT settings: focus_speakers=['S1']")
|
||||
await task.queue_frame(
|
||||
STTUpdateSettingsFrame(delta=SpeechmaticsSTTSettings(focus_speakers=["S1"]))
|
||||
)
|
||||
|
||||
await asyncio.sleep(10)
|
||||
logger.info(
|
||||
"Updating Speechmatics STT settings: speaker_active_format=<SPEAKER_{speaker_id}>{text}</SPEAKER_{speaker_id}>"
|
||||
)
|
||||
await task.queue_frame(
|
||||
STTUpdateSettingsFrame(
|
||||
delta=SpeechmaticsSTTSettings(
|
||||
speaker_active_format="<SPEAKER_{speaker_id}>{text}</SPEAKER_{speaker_id}>"
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
132
examples/foundational/55i-update-settings-whisper-api-stt.py
Normal file
132
examples/foundational/55i-update-settings-whisper-api-stt.py
Normal file
@@ -0,0 +1,132 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.services.openai.stt import OpenAISTTService
|
||||
from pipecat.services.whisper.base_stt import BaseWhisperSTTSettings
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
# This file is meant to exercise Whisper API-based STT services, so we use
|
||||
# OpenAI's Whisper STT as an example here. Here we could've also used:
|
||||
# - SambaNova
|
||||
# - Groq
|
||||
stt = OpenAISTTService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
await asyncio.sleep(10)
|
||||
logger.info('Updating OpenAI STT settings: language="es"')
|
||||
await task.queue_frame(STTUpdateSettingsFrame(delta=BaseWhisperSTTSettings(language="es")))
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
128
examples/foundational/55j-update-settings-sarvam-stt.py
Normal file
128
examples/foundational/55j-update-settings-sarvam-stt.py
Normal file
@@ -0,0 +1,128 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.services.sarvam.stt import SarvamSTTService, SarvamSTTSettings
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = SarvamSTTService(api_key=os.getenv("SARVAM_API_KEY"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
await asyncio.sleep(10)
|
||||
logger.info("Updating Sarvam STT settings: language=en-IN")
|
||||
await task.queue_frame(
|
||||
STTUpdateSettingsFrame(delta=SarvamSTTSettings(language=Language.EN_IN))
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
128
examples/foundational/55k-update-settings-soniox-stt.py
Normal file
128
examples/foundational/55k-update-settings-soniox-stt.py
Normal file
@@ -0,0 +1,128 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.services.soniox.stt import SonioxSTTService, SonioxSTTSettings
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = SonioxSTTService(api_key=os.getenv("SONIOX_API_KEY"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
await asyncio.sleep(10)
|
||||
logger.info("Updating Soniox STT settings: language=es")
|
||||
await task.queue_frame(
|
||||
STTUpdateSettingsFrame(delta=SonioxSTTSettings(language=Language.ES))
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
128
examples/foundational/55l-update-settings-aws-transcribe-stt.py
Normal file
128
examples/foundational/55l-update-settings-aws-transcribe-stt.py
Normal file
@@ -0,0 +1,128 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.aws.stt import AWSTranscribeSTTService, AWSTranscribeSTTSettings
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = AWSTranscribeSTTService()
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
await asyncio.sleep(10)
|
||||
logger.info("Updating AWS Transcribe STT settings: language=es")
|
||||
await task.queue_frame(
|
||||
STTUpdateSettingsFrame(delta=AWSTranscribeSTTSettings(language=Language.ES))
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
128
examples/foundational/55m-update-settings-cartesia-stt.py
Normal file
128
examples/foundational/55m-update-settings-cartesia-stt.py
Normal file
@@ -0,0 +1,128 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.stt import CartesiaSTTService, CartesiaSTTSettings
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = CartesiaSTTService(api_key=os.getenv("CARTESIA_API_KEY"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
await asyncio.sleep(10)
|
||||
logger.info("Updating Cartesia STT settings: language=es")
|
||||
await task.queue_frame(
|
||||
STTUpdateSettingsFrame(delta=CartesiaSTTSettings(language=Language.ES))
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
133
examples/foundational/55n-update-settings-cartesia-http-tts.py
Normal file
133
examples/foundational/55n-update-settings-cartesia-http-tts.py
Normal file
@@ -0,0 +1,133 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.tts import (
|
||||
CartesiaHttpTTSService,
|
||||
CartesiaTTSSettings,
|
||||
GenerationConfig,
|
||||
)
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = CartesiaHttpTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121",
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
await asyncio.sleep(10)
|
||||
logger.info("Updating Cartesia HTTP TTS settings: speed increased to 1.5")
|
||||
await task.queue_frame(
|
||||
TTSUpdateSettingsFrame(
|
||||
delta=CartesiaTTSSettings(generation_config=GenerationConfig(speed=1.5))
|
||||
)
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
@@ -4,14 +4,14 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
@@ -22,9 +22,9 @@ from pipecat.processors.aggregators.llm_response_universal import (
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService, CartesiaTTSSettings, GenerationConfig
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.services.playht.tts import PlayHTHttpTTSService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
@@ -54,10 +54,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = PlayHTHttpTTSService(
|
||||
user_id=os.getenv("PLAYHT_USER_ID"),
|
||||
api_key=os.getenv("PLAYHT_API_KEY"),
|
||||
voice_url="s3://voice-cloning-zero-shot/d9ff78ba-d016-47f6-b0ef-dd630f59414e/female-cs/manifest.json",
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
@@ -103,6 +102,14 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
await asyncio.sleep(10)
|
||||
logger.info("Updating Cartesia TTS settings: speed increased to 1.5")
|
||||
await task.queue_frame(
|
||||
TTSUpdateSettingsFrame(
|
||||
delta=CartesiaTTSSettings(generation_config=GenerationConfig(speed=1.5))
|
||||
)
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
132
examples/foundational/55o-update-settings-elevenlabs-http-tts.py
Normal file
132
examples/foundational/55o-update-settings-elevenlabs-http-tts.py
Normal file
@@ -0,0 +1,132 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.elevenlabs.tts import ElevenLabsHttpTTSService, ElevenLabsHttpTTSSettings
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = ElevenLabsHttpTTSService(
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
aiohttp_session=session,
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
await asyncio.sleep(10)
|
||||
logger.info("Updating ElevenLabs TTS settings: speed=0.7")
|
||||
await task.queue_frame(
|
||||
TTSUpdateSettingsFrame(delta=ElevenLabsHttpTTSSettings(speed=0.7))
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
134
examples/foundational/55o-update-settings-elevenlabs-tts.py
Normal file
134
examples/foundational/55o-update-settings-elevenlabs-tts.py
Normal file
@@ -0,0 +1,134 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.elevenlabs.tts import ElevenLabsTTSService, ElevenLabsTTSSettings
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
await asyncio.sleep(10)
|
||||
logger.info("Updating ElevenLabs TTS settings: speed=0.7")
|
||||
await task.queue_frame(TTSUpdateSettingsFrame(delta=ElevenLabsTTSSettings(speed=0.7)))
|
||||
|
||||
await asyncio.sleep(10)
|
||||
logger.info("Updating ElevenLabs TTS settings: switching to a different voice")
|
||||
await task.queue_frame(
|
||||
TTSUpdateSettingsFrame(
|
||||
delta=ElevenLabsTTSSettings(voice=os.getenv("ELEVENLABS_VOICE_ID_ALT"))
|
||||
)
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
123
examples/foundational/55p-update-settings-openai-tts.py
Normal file
123
examples/foundational/55p-update-settings-openai-tts.py
Normal file
@@ -0,0 +1,123 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.services.openai.tts import OpenAITTSService, OpenAITTSSettings
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = OpenAITTSService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
audio_out_sample_rate=24000,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
await asyncio.sleep(10)
|
||||
logger.info("Updating OpenAI TTS settings: speed=2.0")
|
||||
await task.queue_frame(TTSUpdateSettingsFrame(delta=OpenAITTSSettings(speed=2.0)))
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
137
examples/foundational/55q-update-settings-deepgram-http-tts.py
Normal file
137
examples/foundational/55q-update-settings-deepgram-http-tts.py
Normal file
@@ -0,0 +1,137 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.deepgram.tts import DeepgramHttpTTSService, DeepgramTTSSettings
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = DeepgramHttpTTSService(
|
||||
api_key=os.getenv("DEEPGRAM_API_KEY"),
|
||||
aiohttp_session=session,
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
await asyncio.sleep(10)
|
||||
logger.info('Updating Deepgram TTS settings: voice="aura-2-aries-en"')
|
||||
await task.queue_frame(
|
||||
TTSUpdateSettingsFrame(delta=DeepgramTTSSettings(voice="aura-2-aries-en"))
|
||||
)
|
||||
|
||||
await asyncio.sleep(10)
|
||||
logger.info('Updating Deepgram TTS settings: voice="aura-2-luna-en"')
|
||||
await task.queue_frame(
|
||||
TTSUpdateSettingsFrame(delta=DeepgramTTSSettings(voice="aura-2-luna-en"))
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
@@ -0,0 +1,137 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.deepgram.tts_sagemaker import (
|
||||
DeepgramSageMakerTTSService,
|
||||
DeepgramSageMakerTTSSettings,
|
||||
)
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = DeepgramSageMakerTTSService(
|
||||
endpoint_name=os.getenv("SAGEMAKER_TTS_ENDPOINT_NAME"),
|
||||
region=os.getenv("AWS_REGION"),
|
||||
voice="aura-2-helena-en",
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
await asyncio.sleep(10)
|
||||
logger.info('Updating Deepgram SageMaker TTS settings: voice="aura-2-aries-en"')
|
||||
await task.queue_frame(
|
||||
TTSUpdateSettingsFrame(delta=DeepgramSageMakerTTSSettings(voice="aura-2-aries-en"))
|
||||
)
|
||||
|
||||
await asyncio.sleep(10)
|
||||
logger.info('Updating Deepgram SageMaker TTS settings: voice="aura-2-luna-en"')
|
||||
await task.queue_frame(
|
||||
TTSUpdateSettingsFrame(delta=DeepgramSageMakerTTSSettings(voice="aura-2-luna-en"))
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
130
examples/foundational/55q-update-settings-deepgram-tts.py
Normal file
130
examples/foundational/55q-update-settings-deepgram-tts.py
Normal file
@@ -0,0 +1,130 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.deepgram.tts import DeepgramTTSService, DeepgramTTSSettings
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = DeepgramTTSService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
await asyncio.sleep(10)
|
||||
logger.info('Updating Deepgram TTS settings: voice="aura-2-aries-en"')
|
||||
await task.queue_frame(
|
||||
TTSUpdateSettingsFrame(delta=DeepgramTTSSettings(voice="aura-2-aries-en"))
|
||||
)
|
||||
|
||||
await asyncio.sleep(10)
|
||||
logger.info('Updating Deepgram TTS settings: voice="aura-2-luna-en"')
|
||||
await task.queue_frame(
|
||||
TTSUpdateSettingsFrame(delta=DeepgramTTSSettings(voice="aura-2-luna-en"))
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
127
examples/foundational/55r-update-settings-azure-http-tts.py
Normal file
127
examples/foundational/55r-update-settings-azure-http-tts.py
Normal file
@@ -0,0 +1,127 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.azure.tts import AzureHttpTTSService, AzureTTSSettings
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = AzureHttpTTSService(
|
||||
api_key=os.getenv("AZURE_SPEECH_API_KEY"),
|
||||
region=os.getenv("AZURE_SPEECH_REGION"),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
await asyncio.sleep(10)
|
||||
logger.info('Updating Azure TTS settings: rate="0.7", style="sad"')
|
||||
await task.queue_frame(
|
||||
TTSUpdateSettingsFrame(delta=AzureTTSSettings(rate="0.7", style="sad"))
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
127
examples/foundational/55r-update-settings-azure-tts.py
Normal file
127
examples/foundational/55r-update-settings-azure-tts.py
Normal file
@@ -0,0 +1,127 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.azure.tts import AzureTTSService, AzureTTSSettings
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = AzureTTSService(
|
||||
api_key=os.getenv("AZURE_SPEECH_API_KEY"),
|
||||
region=os.getenv("AZURE_SPEECH_REGION"),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
await asyncio.sleep(10)
|
||||
logger.info('Updating Azure TTS settings: rate="0.7", style="sad"')
|
||||
await task.queue_frame(
|
||||
TTSUpdateSettingsFrame(delta=AzureTTSSettings(rate="0.7", style="sad"))
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
124
examples/foundational/55s-update-settings-google-http-tts.py
Normal file
124
examples/foundational/55s-update-settings-google-http-tts.py
Normal file
@@ -0,0 +1,124 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.google.tts import GoogleHttpTTSService, GoogleHttpTTSSettings
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = GoogleHttpTTSService(credentials=os.getenv("GOOGLE_TEST_CREDENTIALS"))
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
await asyncio.sleep(10)
|
||||
logger.info("Updating Google HTTP TTS settings: speaking_rate=1.4")
|
||||
await task.queue_frame(
|
||||
TTSUpdateSettingsFrame(delta=GoogleHttpTTSSettings(speaking_rate=1.4))
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
124
examples/foundational/55s-update-settings-google-stream-tts.py
Normal file
124
examples/foundational/55s-update-settings-google-stream-tts.py
Normal file
@@ -0,0 +1,124 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.google.tts import GoogleStreamTTSSettings, GoogleTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = GoogleTTSService(credentials=os.getenv("GOOGLE_TEST_CREDENTIALS"))
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
await asyncio.sleep(10)
|
||||
logger.info("Updating Google Stream TTS settings: speaking_rate=1.4")
|
||||
await task.queue_frame(
|
||||
TTSUpdateSettingsFrame(delta=GoogleStreamTTSSettings(speaking_rate=1.4))
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
128
examples/foundational/55u-update-settings-rime-http-tts.py
Normal file
128
examples/foundational/55u-update-settings-rime-http-tts.py
Normal file
@@ -0,0 +1,128 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.services.rime.tts import RimeHttpTTSService, RimeTTSSettings
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = RimeHttpTTSService(
|
||||
api_key=os.getenv("RIME_API_KEY"), voice_id="eva", aiohttp_session=session
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
await asyncio.sleep(10)
|
||||
logger.info("Updating Rime TTS settings: voice=rex")
|
||||
await task.queue_frame(TTSUpdateSettingsFrame(delta=RimeTTSSettings(voice="rex")))
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
125
examples/foundational/55u-update-settings-rime-tts.py
Normal file
125
examples/foundational/55u-update-settings-rime-tts.py
Normal file
@@ -0,0 +1,125 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.services.rime.tts import RimeTTSService, RimeTTSSettings
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = RimeTTSService(
|
||||
api_key=os.getenv("RIME_API_KEY"),
|
||||
voice_id="luna",
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
await asyncio.sleep(10)
|
||||
logger.info("Updating Rime TTS settings: voice=bond")
|
||||
await task.queue_frame(TTSUpdateSettingsFrame(delta=RimeTTSSettings(voice="bond")))
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
125
examples/foundational/55v-update-settings-lmnt-tts.py
Normal file
125
examples/foundational/55v-update-settings-lmnt-tts.py
Normal file
@@ -0,0 +1,125 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.lmnt.tts import LmntTTSService, LmntTTSSettings
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = LmntTTSService(
|
||||
api_key=os.getenv("LMNT_API_KEY"),
|
||||
voice_id="lily",
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
await asyncio.sleep(10)
|
||||
logger.info('Updating LMNT TTS settings: voice="tyler"')
|
||||
await task.queue_frame(TTSUpdateSettingsFrame(delta=LmntTTSSettings(voice="tyler")))
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
127
examples/foundational/55w-update-settings-fish-tts.py
Normal file
127
examples/foundational/55w-update-settings-fish-tts.py
Normal file
@@ -0,0 +1,127 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.fish.tts import FishAudioTTSService, FishAudioTTSSettings
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = FishAudioTTSService(
|
||||
api_key=os.getenv("FISH_API_KEY"),
|
||||
model="4ce7e917cedd4bc2bb2e6ff3a46acaa1", # Barack Obama
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
await asyncio.sleep(10)
|
||||
logger.info("Updating Fish Audio TTS settings: prosody_speed=1.5")
|
||||
await task.queue_frame(
|
||||
TTSUpdateSettingsFrame(delta=FishAudioTTSSettings(prosody_speed=1.5))
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
130
examples/foundational/55x-update-settings-minimax-tts.py
Normal file
130
examples/foundational/55x-update-settings-minimax-tts.py
Normal file
@@ -0,0 +1,130 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.minimax.tts import MiniMaxHttpTTSService, MiniMaxTTSSettings
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = MiniMaxHttpTTSService(
|
||||
api_key=os.getenv("MINIMAX_API_KEY", ""),
|
||||
group_id=os.getenv("MINIMAX_GROUP_ID", ""),
|
||||
aiohttp_session=session,
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
await asyncio.sleep(10)
|
||||
logger.info('Updating MiniMax TTS settings: speed=1.5, emotion="happy"')
|
||||
await task.queue_frame(
|
||||
TTSUpdateSettingsFrame(delta=MiniMaxTTSSettings(speed=1.5, emotion="happy"))
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
122
examples/foundational/55y-update-settings-groq-tts.py
Normal file
122
examples/foundational/55y-update-settings-groq-tts.py
Normal file
@@ -0,0 +1,122 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.groq.tts import GroqTTSService, GroqTTSSettings
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = GroqTTSService(api_key=os.getenv("GROQ_API_KEY"))
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
await asyncio.sleep(10)
|
||||
logger.info("Updating Groq TTS settings: voice=troy")
|
||||
await task.queue_frame(TTSUpdateSettingsFrame(delta=GroqTTSSettings(voice="troy")))
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
129
examples/foundational/55z-update-settings-hume-tts.py
Normal file
129
examples/foundational/55z-update-settings-hume-tts.py
Normal file
@@ -0,0 +1,129 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.hume.tts import HumeTTSService, HumeTTSSettings
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = HumeTTSService(
|
||||
api_key=os.getenv("HUME_API_KEY"),
|
||||
voice_id="f898a92e-685f-43fa-985b-a46920f0650b",
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
await asyncio.sleep(10)
|
||||
logger.info('Updating Hume TTS settings: speed=2.0, description="Speak with excitement"')
|
||||
await task.queue_frame(
|
||||
TTSUpdateSettingsFrame(
|
||||
delta=HumeTTSSettings(speed=2.0, description="Speak with excitement")
|
||||
)
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
127
examples/foundational/55za-update-settings-neuphonic-http-tts.py
Normal file
127
examples/foundational/55za-update-settings-neuphonic-http-tts.py
Normal file
@@ -0,0 +1,127 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.neuphonic.tts import NeuphonicHttpTTSService, NeuphonicTTSSettings
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
tts = NeuphonicHttpTTSService(
|
||||
api_key=os.getenv("NEUPHONIC_API_KEY"),
|
||||
aiohttp_session=session,
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
await asyncio.sleep(10)
|
||||
logger.info("Updating Neuphonic HTTP TTS settings: speed=1.4")
|
||||
await task.queue_frame(TTSUpdateSettingsFrame(delta=NeuphonicTTSSettings(speed=1.4)))
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
122
examples/foundational/55za-update-settings-neuphonic-tts.py
Normal file
122
examples/foundational/55za-update-settings-neuphonic-tts.py
Normal file
@@ -0,0 +1,122 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.neuphonic.tts import NeuphonicTTSService, NeuphonicTTSSettings
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = NeuphonicTTSService(api_key=os.getenv("NEUPHONIC_API_KEY"))
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
user_aggregator,
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
assistant_aggregator,
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
await asyncio.sleep(10)
|
||||
logger.info("Updating Neuphonic TTS settings: speed=1.4")
|
||||
await task.queue_frame(TTSUpdateSettingsFrame(delta=NeuphonicTTSSettings(speed=1.4)))
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user