Compare commits
273 Commits
hush/claud
...
mb/static-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5a6cc4d35c | ||
|
|
28be775740 | ||
|
|
bc730e4069 | ||
|
|
104d06551a | ||
|
|
90ad2a4e81 | ||
|
|
570f2d7fc0 | ||
|
|
f3d99adf8f | ||
|
|
d34f416281 | ||
|
|
5a1deb7cb4 | ||
|
|
a5fc2b1650 | ||
|
|
5cb8d91431 | ||
|
|
ce690848c0 | ||
|
|
30f51edfcd | ||
|
|
cd03d449cb | ||
|
|
57df03aade | ||
|
|
4945cfbd8f | ||
|
|
8d37d3bae7 | ||
|
|
d7b1624d3c | ||
|
|
7f65204c3b | ||
|
|
97eff414c3 | ||
|
|
5b67e76de7 | ||
|
|
b9e79bd06a | ||
|
|
d5105a78e6 | ||
|
|
a352b2d7a0 | ||
|
|
2345090b10 | ||
|
|
af562bf9a8 | ||
|
|
d4993f0dcf | ||
|
|
1790a84bfd | ||
|
|
29c53b99a4 | ||
|
|
aa5a855eab | ||
|
|
e66d6f8ffe | ||
|
|
b8ac2ba713 | ||
|
|
6eea40858e | ||
|
|
90700d10aa | ||
|
|
fa85f7bbc7 | ||
|
|
669f013970 | ||
|
|
76f63e54e2 | ||
|
|
cce5a13444 | ||
|
|
d11e1cd631 | ||
|
|
8b9da632d1 | ||
|
|
b36f7892a4 | ||
|
|
9b43cde128 | ||
|
|
6af4d872a8 | ||
|
|
22398e1410 | ||
|
|
d10467e043 | ||
|
|
cbe131636d | ||
|
|
fef9e3ea32 | ||
|
|
56d8ef2bf4 | ||
|
|
8791559351 | ||
|
|
f6c919354f | ||
|
|
93138466d6 | ||
|
|
5a5a98b497 | ||
|
|
2b4f507d37 | ||
|
|
d6f3a90662 | ||
|
|
8fb0e37965 | ||
|
|
0d45b48f7b | ||
|
|
6af4520b1f | ||
|
|
ba469e5645 | ||
|
|
bd12b60b5c | ||
|
|
54db37ea47 | ||
|
|
752e16f553 | ||
|
|
7c7408a048 | ||
|
|
8f42343927 | ||
|
|
46da6cd91b | ||
|
|
ecb02d9049 | ||
|
|
cc68e00125 | ||
|
|
e0e3b5250b | ||
|
|
55a3b10e70 | ||
|
|
e6b06414b3 | ||
|
|
6bcfb40d12 | ||
|
|
65b1a8ce36 | ||
|
|
2db3d94d06 | ||
|
|
2a26b9f7a3 | ||
|
|
4f77c532fb | ||
|
|
c3a4da4a29 | ||
|
|
84ca0b6d58 | ||
|
|
c1857d255d | ||
|
|
d50ec33079 | ||
|
|
40c84faff5 | ||
|
|
84cd9346f9 | ||
|
|
5d5b19e1d2 | ||
|
|
8d3e10f054 | ||
|
|
1665ce181a | ||
|
|
803a20cc00 | ||
|
|
90bead06ab | ||
|
|
b427d534ae | ||
|
|
b030f1178d | ||
|
|
a627597bca | ||
|
|
4c10ddb7bb | ||
|
|
a4e499dc80 | ||
|
|
ca49acfaa6 | ||
|
|
86147f15f3 | ||
|
|
5cda72d138 | ||
|
|
54e62a8177 | ||
|
|
a592b7fdf0 | ||
|
|
ba2b7c05d6 | ||
|
|
774041e9a1 | ||
|
|
763002f2bc | ||
|
|
50dedf350d | ||
|
|
d3ecbb11c1 | ||
|
|
f453227ba3 | ||
|
|
52cc64019a | ||
|
|
95689cc81c | ||
|
|
675c7c43e3 | ||
|
|
bfd19e867c | ||
|
|
acc9923c0a | ||
|
|
bdc9e7e2e4 | ||
|
|
a587e1b99a | ||
|
|
7853e5ca93 | ||
|
|
614b8e1a62 | ||
|
|
ef51c2a5c6 | ||
|
|
f42dc0d38e | ||
|
|
d87f3543c7 | ||
|
|
fee633cb92 | ||
|
|
607af91153 | ||
|
|
e779233918 | ||
|
|
604d5d0b14 | ||
|
|
342ae7af41 | ||
|
|
c92ec1552e | ||
|
|
93160f1455 | ||
|
|
e3158e1131 | ||
|
|
63a23246d5 | ||
|
|
569ea9849a | ||
|
|
a98ca9b65b | ||
|
|
c9310789dc | ||
|
|
b93e12d701 | ||
|
|
3f77da627d | ||
|
|
35d265770d | ||
|
|
9632efec8c | ||
|
|
27dbfa1eda | ||
|
|
183c0aa4ef | ||
|
|
a69a037ffa | ||
|
|
c46e7f5da0 | ||
|
|
307aeaeda0 | ||
|
|
305ab44132 | ||
|
|
b486f35c70 | ||
|
|
c92080b0d2 | ||
|
|
ddfedaf478 | ||
|
|
b1ad4d5ab0 | ||
|
|
0857aa87be | ||
|
|
fd3c5f69b7 | ||
|
|
72ab329513 | ||
|
|
7999d08b7e | ||
|
|
57821cf709 | ||
|
|
18045582a9 | ||
|
|
7be2b8cc34 | ||
|
|
671cc8eb74 | ||
|
|
b4dce656f0 | ||
|
|
253a1d1114 | ||
|
|
ca613bcb79 | ||
|
|
0423acd8a0 | ||
|
|
7eabaaa0ef | ||
|
|
bbb8b53d03 | ||
|
|
f3b72e9263 | ||
|
|
31c7fbc5ba | ||
|
|
6ab12626d6 | ||
|
|
b77a50de73 | ||
|
|
433c1b9b92 | ||
|
|
bd00587092 | ||
|
|
5a85e27cc5 | ||
|
|
11daa43b1b | ||
|
|
875614ff7a | ||
|
|
eb1bf1e446 | ||
|
|
7456a0a55f | ||
|
|
27277ed3d9 | ||
|
|
5543bc56f3 | ||
|
|
c8496dfb8e | ||
|
|
d3f4cbb620 | ||
|
|
c9f922c479 | ||
|
|
49bd3da26b | ||
|
|
f3ef488925 | ||
|
|
4f08098917 | ||
|
|
a7cd5b0322 | ||
|
|
55dadc9118 | ||
|
|
01bbf61e0d | ||
|
|
10fb77c0e2 | ||
|
|
2612fae527 | ||
|
|
c5be67f293 | ||
|
|
312caaba86 | ||
|
|
ff0eb6d286 | ||
|
|
ef6bbace98 | ||
|
|
06ec21387f | ||
|
|
bdae177125 | ||
|
|
468e159f9b | ||
|
|
a4acafd3be | ||
|
|
105824a372 | ||
|
|
55e0d4ecc4 | ||
|
|
9102e81cb8 | ||
|
|
d7d8e93a3d | ||
|
|
bf9b166464 | ||
|
|
e80e0eab29 | ||
|
|
61242e6575 | ||
|
|
8841387121 | ||
|
|
ee695ae9fe | ||
|
|
52012b0fb2 | ||
|
|
f7a1c6b719 | ||
|
|
6aa77ccc13 | ||
|
|
45b7ec4e2c | ||
|
|
1c434c6ad5 | ||
|
|
4591affba9 | ||
|
|
91346f5f37 | ||
|
|
6a66ebe332 | ||
|
|
c1d4180042 | ||
|
|
81a53c699c | ||
|
|
60168f7f69 | ||
|
|
23d7608e5f | ||
|
|
99242c0a93 | ||
|
|
3a71865cf4 | ||
|
|
febd52274d | ||
|
|
98fcfd7c91 | ||
|
|
2f23f2e39c | ||
|
|
fc1444c9d6 | ||
|
|
ea94939add | ||
|
|
0c69ae6371 | ||
|
|
8b88280bb1 | ||
|
|
960d0faea5 | ||
|
|
b9390ccb1b | ||
|
|
ca2eb1904f | ||
|
|
4bce58f270 | ||
|
|
7572d63f8f | ||
|
|
3c463c9416 | ||
|
|
bd618d64e3 | ||
|
|
a824660df7 | ||
|
|
58b9019852 | ||
|
|
afcdef8c81 | ||
|
|
bd92104fb3 | ||
|
|
34e9f224a8 | ||
|
|
dca7f3b5b0 | ||
|
|
70a85cd192 | ||
|
|
91e86658b7 | ||
|
|
0a8588669c | ||
|
|
0e99400148 | ||
|
|
648f20db6d | ||
|
|
09b5b6b12d | ||
|
|
0e6a423955 | ||
|
|
dc8972cd94 | ||
|
|
e4e2231958 | ||
|
|
18b3ee743b | ||
|
|
65b8e0e89c | ||
|
|
b77f8b065f | ||
|
|
5fd43faec3 | ||
|
|
abebcf37bd | ||
|
|
ca4e3c79f9 | ||
|
|
e8d1bec03b | ||
|
|
f0cc54589e | ||
|
|
22b9aac2ff | ||
|
|
7f86f4ac27 | ||
|
|
dcab79753b | ||
|
|
bdded9b026 | ||
|
|
1e1e275fea | ||
|
|
effb6aa8f4 | ||
|
|
a4a9bae79e | ||
|
|
c943ef9261 | ||
|
|
f05809520b | ||
|
|
ec17dc6626 | ||
|
|
4e85e81d9b | ||
|
|
a1cc88a233 | ||
|
|
61a230ec53 | ||
|
|
a13380b574 | ||
|
|
2a927189d9 | ||
|
|
a90c15362c | ||
|
|
d3bdd2d246 | ||
|
|
465ae4f706 | ||
|
|
a0d801b658 | ||
|
|
35919a84e3 | ||
|
|
a446bca72d | ||
|
|
8ae834366b | ||
|
|
f128cdd19a | ||
|
|
7921bce4af | ||
|
|
cadced3f79 | ||
|
|
fa5da3b0be | ||
|
|
7e82a0cf49 | ||
|
|
8dfc59be13 |
8
.claude/.gitignore
vendored
8
.claude/.gitignore
vendored
@@ -1,8 +0,0 @@
|
||||
# Claude Code temporary files
|
||||
*.tmp
|
||||
*.log
|
||||
.claude-cache/
|
||||
|
||||
# OS files
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
@@ -1,200 +0,0 @@
|
||||
# Claude Code Quick Start for Pipecat
|
||||
|
||||
This guide helps you get started using Claude Code with the Pipecat project.
|
||||
|
||||
## Initial Setup
|
||||
|
||||
1. **Install Claude Code** (if not already installed):
|
||||
```bash
|
||||
# Follow instructions at https://claude.ai/claude-code
|
||||
```
|
||||
|
||||
2. **Install project dependencies**:
|
||||
```bash
|
||||
uv sync --group dev --all-extras --no-extra gstreamer --no-extra krisp --no-extra local
|
||||
```
|
||||
|
||||
3. **Install pre-commit hooks**:
|
||||
```bash
|
||||
uv run pre-commit install
|
||||
```
|
||||
|
||||
## Common Commands
|
||||
|
||||
### Testing
|
||||
- "Run all tests"
|
||||
- "Run tests for [specific file]"
|
||||
- "Run tests and show coverage"
|
||||
|
||||
### Code Quality
|
||||
- "Format the code"
|
||||
- "Fix linting issues"
|
||||
- "Run type checking"
|
||||
- "Run pre-commit hooks"
|
||||
|
||||
### Development
|
||||
- "Add a new TTS service for [provider]"
|
||||
- "Create a new processor that [does something]"
|
||||
- "Add a new frame type for [purpose]"
|
||||
- "Document the [ClassName] class" (uses `/docstring` skill)
|
||||
|
||||
### Documentation
|
||||
- "Document this module using Google style"
|
||||
- "Add docstrings to [file/class]"
|
||||
- Use `/docstring ClassName` for comprehensive class documentation
|
||||
|
||||
### Git Operations
|
||||
- "Create a commit for these changes"
|
||||
- "Create a pull request"
|
||||
- Use `/pr-description` skill for detailed PR descriptions
|
||||
- Use `/changelog` skill for changelog entries
|
||||
|
||||
## Custom Skills
|
||||
|
||||
### `/docstring [ClassName]`
|
||||
Automatically documents a Python class and its methods following Google-style conventions.
|
||||
|
||||
**Example:**
|
||||
```
|
||||
/docstring AudioProcessor
|
||||
```
|
||||
|
||||
This will:
|
||||
- Find the class in the codebase
|
||||
- Add module docstring if missing
|
||||
- Add class docstring with purpose and event handlers
|
||||
- Document all public methods
|
||||
- Document constructor parameters
|
||||
- Skip private methods and already-documented code
|
||||
|
||||
### `/changelog`
|
||||
Generates changelog entries using towncrier.
|
||||
|
||||
### `/pr-description`
|
||||
Creates comprehensive pull request descriptions based on your changes.
|
||||
|
||||
## Project-Specific Tips
|
||||
|
||||
### Understanding Pipecat Architecture
|
||||
|
||||
When asking Claude Code to help with development:
|
||||
|
||||
1. **Frame-Based System**: All data flows through frames
|
||||
- Ask: "Explain how frames work in this pipeline"
|
||||
- Reference: `src/pipecat/frames/frames.py`
|
||||
|
||||
2. **Processor Pattern**: Everything is a processor
|
||||
- Ask: "Show me how to create a custom processor"
|
||||
- Reference: `src/pipecat/processors/frame_processor.py`
|
||||
|
||||
3. **Service Integrations**: Many AI service integrations
|
||||
- Ask: "How do I add a new TTS service?"
|
||||
- Reference: `src/pipecat/services/tts/`
|
||||
|
||||
### Working with Examples
|
||||
|
||||
- "Show me examples of [feature]"
|
||||
- "Create a simple example that [does something]"
|
||||
- Examples are in `examples/foundational/` (building blocks) and `examples/` (complete apps)
|
||||
|
||||
### Debugging
|
||||
|
||||
- "Help me debug this pipeline"
|
||||
- "Why isn't my processor receiving frames?"
|
||||
- "Trace the flow of this frame type through the pipeline"
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Be Specific**: Instead of "fix this", say "fix the audio dropouts in the TTS processor"
|
||||
|
||||
2. **Context**: Provide context about what you're building
|
||||
- "I'm building a voice assistant that needs to interrupt TTS"
|
||||
- "I want to add vision capabilities to this chatbot"
|
||||
|
||||
3. **Reference Examples**: Point to existing patterns
|
||||
- "Similar to how DeepgramTTS works"
|
||||
- "Following the pattern in OpenAILLMService"
|
||||
|
||||
4. **Test-Driven**: Ask for tests
|
||||
- "Create tests for this processor"
|
||||
- "Add test coverage for the error handling"
|
||||
|
||||
5. **Documentation**: Keep docs updated
|
||||
- "Update the docstrings for these changes"
|
||||
- "Add a usage example to the class docstring"
|
||||
|
||||
## Example Conversations
|
||||
|
||||
### Adding a New Feature
|
||||
```
|
||||
You: "I need to add a processor that detects when the user says 'hello' and triggers an event"
|
||||
|
||||
Claude Code will:
|
||||
1. Create the processor class
|
||||
2. Implement frame processing logic
|
||||
3. Add event emission
|
||||
4. Create tests
|
||||
5. Add documentation
|
||||
```
|
||||
|
||||
### Debugging an Issue
|
||||
```
|
||||
You: "The audio is cutting out in my pipeline. Here's the code: [paste code]"
|
||||
|
||||
Claude Code will:
|
||||
1. Analyze the pipeline structure
|
||||
2. Check for common issues (buffer sizes, async handling, etc.)
|
||||
3. Suggest fixes
|
||||
4. Explain the root cause
|
||||
```
|
||||
|
||||
### Refactoring
|
||||
```
|
||||
You: "Refactor the XYZ service to use the new WebSocket pattern from ABC service"
|
||||
|
||||
Claude Code will:
|
||||
1. Analyze both services
|
||||
2. Identify the pattern differences
|
||||
3. Apply the refactoring
|
||||
4. Update tests
|
||||
5. Maintain backward compatibility if needed
|
||||
```
|
||||
|
||||
## Useful Prompts
|
||||
|
||||
- "Explain how [feature] works in this codebase"
|
||||
- "Add error handling for [scenario]"
|
||||
- "Create an example that demonstrates [feature]"
|
||||
- "Optimize this processor for [use case]"
|
||||
- "Add logging to help debug [issue]"
|
||||
- "Make this code more maintainable"
|
||||
- "Add type hints to this file"
|
||||
- "Create a comprehensive test suite for [component]"
|
||||
|
||||
## Configuration Reference
|
||||
|
||||
All Claude Code settings are in [.claude/settings.json](.claude/settings.json):
|
||||
- Project commands (test, lint, format, etc.)
|
||||
- Coding standards
|
||||
- File patterns
|
||||
- Important files and directories
|
||||
|
||||
For detailed architecture info, see [.claude/README.md](.claude/README.md).
|
||||
|
||||
## Getting Help
|
||||
|
||||
- **Project docs**: https://docs.pipecat.ai
|
||||
- **Discord**: https://discord.gg/pipecat
|
||||
- **GitHub Issues**: https://github.com/pipecat-ai/pipecat/issues
|
||||
- **Examples**: https://github.com/pipecat-ai/pipecat-examples
|
||||
|
||||
## Tips for Success
|
||||
|
||||
1. Start with small, specific tasks
|
||||
2. Use the custom skills (`/docstring`, `/pr-description`, etc.)
|
||||
3. Reference existing code patterns
|
||||
4. Ask for explanations when confused
|
||||
5. Request tests and documentation
|
||||
6. Run pre-commit hooks before committing
|
||||
|
||||
Happy coding with Claude! 🎙️🤖
|
||||
@@ -1,177 +0,0 @@
|
||||
# Claude Code Setup for Pipecat
|
||||
|
||||
This directory contains configuration and custom skills for working with the Pipecat project using Claude Code.
|
||||
|
||||
## Project Overview
|
||||
|
||||
Pipecat is an open-source Python framework for building real-time voice and multimodal conversational agents. It provides a composable, frame-based architecture for orchestrating audio, video, AI services, and conversation pipelines.
|
||||
|
||||
## Architecture
|
||||
|
||||
### Core Concepts
|
||||
|
||||
1. **Frames** - The fundamental data units in Pipecat (audio, text, images, system messages, etc.)
|
||||
- Located in: `src/pipecat/frames/frames.py`
|
||||
- Different frame types for different data: `AudioRawFrame`, `TextFrame`, `ImageRawFrame`, etc.
|
||||
|
||||
2. **Processors** - Processing units that receive, transform, and emit frames
|
||||
- Base class: `src/pipecat/processors/frame_processor.py`
|
||||
- Can be chained to form pipelines
|
||||
- Examples: STT services, LLMs, TTS services, aggregators, etc.
|
||||
|
||||
3. **Pipelines** - Chains of processors that define data flow
|
||||
- Created using the `Pipeline` class
|
||||
- Processors linked using `link()` method or `|` operator
|
||||
|
||||
4. **Transports** - Handle input/output for audio/video streams
|
||||
- WebRTC (Daily), WebSocket, Local audio, etc.
|
||||
- Located in: `src/pipecat/transports/`
|
||||
|
||||
### Key Directories
|
||||
|
||||
- `src/pipecat/` - Main source code
|
||||
- `frames/` - Frame definitions and utilities
|
||||
- `processors/` - Base processors and common processors
|
||||
- `services/` - AI service integrations (STT, TTS, LLM, etc.)
|
||||
- `transports/` - Transport implementations
|
||||
- `audio/` - Audio processing utilities
|
||||
- `examples/` - Example applications and foundational examples
|
||||
- `tests/` - Test suite
|
||||
- `docs/` - Documentation source
|
||||
|
||||
## Development Workflow
|
||||
|
||||
### Setup
|
||||
|
||||
```bash
|
||||
# Install dependencies
|
||||
uv sync --group dev --all-extras --no-extra gstreamer --no-extra krisp --no-extra local
|
||||
|
||||
# Install pre-commit hooks
|
||||
uv run pre-commit install
|
||||
```
|
||||
|
||||
### Running Tests
|
||||
|
||||
```bash
|
||||
# All tests
|
||||
uv run pytest
|
||||
|
||||
# Specific test file
|
||||
uv run pytest tests/test_name.py
|
||||
|
||||
# With coverage
|
||||
uv run coverage run --module pytest
|
||||
uv run coverage report
|
||||
```
|
||||
|
||||
### Code Quality
|
||||
|
||||
```bash
|
||||
# Format code
|
||||
uv run ruff format .
|
||||
|
||||
# Lint code
|
||||
uv run ruff check .
|
||||
|
||||
# Fix linting issues
|
||||
uv run ruff check --fix .
|
||||
|
||||
# Type checking
|
||||
uv run pyright
|
||||
|
||||
# Run all pre-commit hooks
|
||||
uv run pre-commit run --all-files
|
||||
```
|
||||
|
||||
### Building
|
||||
|
||||
```bash
|
||||
# Build package
|
||||
uv build
|
||||
```
|
||||
|
||||
## Custom Skills
|
||||
|
||||
This project includes custom Claude Code skills:
|
||||
|
||||
### `/docstring`
|
||||
Document Python modules and classes using Google-style docstrings.
|
||||
|
||||
Usage: `/docstring ClassName`
|
||||
|
||||
### `/changelog`
|
||||
Generate changelog entries using towncrier.
|
||||
|
||||
### `/pr-description`
|
||||
Generate comprehensive PR descriptions based on changes.
|
||||
|
||||
## Coding Standards
|
||||
|
||||
1. **Docstrings** - Use Google-style docstrings for all public APIs
|
||||
- Module docstrings required
|
||||
- Class docstrings with purpose and event handlers
|
||||
- Method docstrings with Args/Returns/Raises
|
||||
- Constructor (`__init__`) must document all parameters
|
||||
|
||||
2. **Type Hints** - Required for all function signatures
|
||||
- Use `from typing import ...` for complex types
|
||||
- Dataclasses should have field type annotations
|
||||
|
||||
3. **Async/Await** - Consistent use of async patterns
|
||||
- Most processors use async methods
|
||||
- Tests use pytest-asyncio
|
||||
|
||||
4. **Code Style**
|
||||
- Line length: 100 characters max
|
||||
- Ruff for linting and formatting
|
||||
- Follow existing patterns in the codebase
|
||||
|
||||
5. **Testing**
|
||||
- Write tests for new features
|
||||
- Use pytest fixtures for common setups
|
||||
- Mock external services when appropriate
|
||||
|
||||
## Contributing
|
||||
|
||||
1. Fork the repository
|
||||
2. Create a feature branch
|
||||
3. Make changes following coding standards
|
||||
4. Add tests for new functionality
|
||||
5. Run pre-commit hooks: `uv run pre-commit run --all-files`
|
||||
6. Submit a pull request
|
||||
|
||||
## Common Tasks
|
||||
|
||||
### Adding a New Service Integration
|
||||
|
||||
1. Create service file in `src/pipecat/services/<category>/`
|
||||
2. Inherit from appropriate base class (e.g., `TTSService`, `LLMService`)
|
||||
3. Implement required abstract methods
|
||||
4. Add service to `pyproject.toml` optional dependencies
|
||||
5. Add documentation
|
||||
6. Add tests in `tests/`
|
||||
|
||||
### Adding a New Processor
|
||||
|
||||
1. Create processor in `src/pipecat/processors/`
|
||||
2. Inherit from `FrameProcessor` or appropriate subclass
|
||||
3. Override `process_frame()` method
|
||||
4. Handle relevant frame types
|
||||
5. Emit frames using `await self.push_frame()`
|
||||
6. Add tests
|
||||
|
||||
### Adding a New Frame Type
|
||||
|
||||
1. Add frame definition to `src/pipecat/frames/frames.py`
|
||||
2. Inherit from appropriate base frame class
|
||||
3. Use `@dataclass` decorator for data frames
|
||||
4. Document the frame type and its fields
|
||||
5. Update processors that should handle this frame type
|
||||
|
||||
## Resources
|
||||
|
||||
- [Documentation](https://docs.pipecat.ai)
|
||||
- [GitHub Repository](https://github.com/pipecat-ai/pipecat)
|
||||
- [Examples](https://github.com/pipecat-ai/pipecat-examples)
|
||||
- [Discord Community](https://discord.gg/pipecat)
|
||||
@@ -1,81 +0,0 @@
|
||||
{
|
||||
"description": "Pipecat - Open-source Python framework for real-time voice and multimodal AI agents",
|
||||
"conventions": {
|
||||
"language": "Python",
|
||||
"version": ">=3.10",
|
||||
"package_manager": "uv",
|
||||
"code_style": "Google docstrings, Ruff formatting",
|
||||
"test_framework": "pytest",
|
||||
"async": true
|
||||
},
|
||||
"project_info": {
|
||||
"type": "python_library",
|
||||
"framework": "pipecat",
|
||||
"main_source": "src/pipecat",
|
||||
"examples": "examples/",
|
||||
"tests": "tests/",
|
||||
"docs": "docs/"
|
||||
},
|
||||
"commands": {
|
||||
"install": "uv sync --group dev --all-extras --no-extra gstreamer --no-extra krisp --no-extra local",
|
||||
"test": "uv run pytest",
|
||||
"test_file": "uv run pytest {file}",
|
||||
"lint": "uv run ruff check .",
|
||||
"lint_fix": "uv run ruff check --fix .",
|
||||
"format": "uv run ruff format .",
|
||||
"format_check": "uv run ruff format --check .",
|
||||
"type_check": "uv run pyright",
|
||||
"pre_commit": "uv run pre-commit run --all-files",
|
||||
"build": "uv build",
|
||||
"changelog": "uv run towncrier build --version {version}"
|
||||
},
|
||||
"coding_standards": [
|
||||
"Use Google-style docstrings for all public classes and methods",
|
||||
"Follow Ruff linting rules (see pyproject.toml)",
|
||||
"Maintain type hints for all function signatures",
|
||||
"Use async/await patterns consistently",
|
||||
"Keep line length at 100 characters maximum",
|
||||
"Use dataclasses with type annotations for configuration classes",
|
||||
"Prefer composition over inheritance where appropriate",
|
||||
"Write comprehensive pytest tests with asyncio support",
|
||||
"Document event handlers in class docstrings with Example:: sections"
|
||||
],
|
||||
"file_patterns": {
|
||||
"source_files": "src/pipecat/**/*.py",
|
||||
"test_files": "tests/**/*.py",
|
||||
"example_files": "examples/**/*.py",
|
||||
"config_files": "pyproject.toml"
|
||||
},
|
||||
"important_files": [
|
||||
"pyproject.toml - Project configuration and dependencies",
|
||||
"CONTRIBUTING.md - Contributing guidelines",
|
||||
"README.md - Project overview and quick start",
|
||||
"src/pipecat/__init__.py - Main package exports",
|
||||
"src/pipecat/frames/frames.py - Core frame definitions",
|
||||
"src/pipecat/processors/frame_processor.py - Base processor class"
|
||||
],
|
||||
"documentation": {
|
||||
"style": "Google",
|
||||
"build_command": "cd docs && make html",
|
||||
"skip_private_methods": true,
|
||||
"skip_simple_dunders": true,
|
||||
"require_module_docstrings": true,
|
||||
"require_class_docstrings": true,
|
||||
"require_init_docstrings": true
|
||||
},
|
||||
"git": {
|
||||
"main_branch": "main",
|
||||
"commit_style": "Conventional Commits",
|
||||
"pre_commit_hooks": true
|
||||
},
|
||||
"ai_assistance_notes": [
|
||||
"This project is a real-time voice and multimodal AI framework",
|
||||
"Core concepts: Frames (data units), Processors (processing units), Pipelines (chains of processors)",
|
||||
"Heavy use of async/await for real-time processing",
|
||||
"WebRTC and WebSocket transports for audio/video streaming",
|
||||
"Integration with many AI services (OpenAI, Anthropic, Deepgram, ElevenLabs, etc.)",
|
||||
"Frame-based architecture allows composable, modular pipeline construction",
|
||||
"Tests use pytest-asyncio for async test support",
|
||||
"Pre-commit hooks enforce code quality (run 'uv run pre-commit install')"
|
||||
]
|
||||
}
|
||||
@@ -7,23 +7,30 @@ Create changelog files for the important commits in this PR. The PR number is pr
|
||||
|
||||
## Instructions
|
||||
|
||||
1. First, check what commits are on the current branch compared to main:
|
||||
1. Skip changelog for: documentation-only, internal refactoring, test-only, CI changes.
|
||||
|
||||
2. First, check what commits are on the current branch compared to main:
|
||||
```
|
||||
git log main..HEAD --oneline
|
||||
```
|
||||
|
||||
2. For each significant change, create a changelog file in the `changelog/` folder using the format:
|
||||
3. For each significant change, create a changelog file in the `changelog/` folder using the format:
|
||||
Allowed types: `added`, `changed`, `deprecated`, `removed`, `fixed`, `security`, `performance`, `other`
|
||||
- `{PR_NUMBER}.added.md` - for new features
|
||||
- `{PR_NUMBER}.added.2.md`, `{PR_NUMBER}.added.3.md` - for additional new features
|
||||
- `{PR_NUMBER}.added.2.md`, `{PR_NUMBER}.added.3.md` - for additional entries of the same type
|
||||
- `{PR_NUMBER}.changed.md` - for changes to existing functionality
|
||||
- `{PR_NUMBER}.fixed.md` - for bug fixes
|
||||
- `{PR_NUMBER}.deprecated.md` - for deprecations
|
||||
- `{PR_NUMBER}.removed.md` - for removed features
|
||||
- `{PR_NUMBER}.security.md` - for security fixes
|
||||
- `{PR_NUMBER}.performance.md` - for performance improvements
|
||||
- `{PR_NUMBER}.other.md` - for other changes
|
||||
|
||||
3. Each changelog file should at least contain a main single line starting with `- ` followed by a clear description of the change.
|
||||
4. Each changelog file should at least contain a main single line starting with `- ` followed by a clear description of the change.
|
||||
|
||||
4. If the change is complicated, changelog files can have indented lines after the main line with additional details or code samples.
|
||||
5. If the change is complicated, changelog files can have indented lines after the main line with additional details or code samples.
|
||||
|
||||
5. Use ⚠️ emoji prefix for breaking changes.
|
||||
6. Use ⚠️ emoji prefix for breaking changes.
|
||||
|
||||
## Example
|
||||
|
||||
|
||||
9
.github/workflows/coverage.yaml
vendored
9
.github/workflows/coverage.yaml
vendored
@@ -33,7 +33,14 @@ jobs:
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
uv sync --group dev --extra anthropic --extra aws --extra google --extra langchain --extra livekit --extra websocket
|
||||
uv sync --group dev \
|
||||
--extra anthropic \
|
||||
--extra aws \
|
||||
--extra google \
|
||||
--extra langchain \
|
||||
--extra livekit \
|
||||
--extra piper \
|
||||
--extra websocket
|
||||
|
||||
- name: Run tests with coverage
|
||||
run: |
|
||||
|
||||
9
.github/workflows/tests.yaml
vendored
9
.github/workflows/tests.yaml
vendored
@@ -37,7 +37,14 @@ jobs:
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
uv sync --group dev --extra anthropic --extra aws --extra google --extra langchain --extra livekit --extra websocket
|
||||
uv sync --group dev \
|
||||
--extra anthropic \
|
||||
--extra aws \
|
||||
--extra google \
|
||||
--extra langchain \
|
||||
--extra livekit \
|
||||
--extra piper \
|
||||
--extra websocket
|
||||
|
||||
- name: Test with pytest
|
||||
run: |
|
||||
|
||||
7
.gitignore
vendored
7
.gitignore
vendored
@@ -61,9 +61,4 @@ docs/api/api
|
||||
.python-version
|
||||
|
||||
# Pipecat
|
||||
whisker_setup.py
|
||||
|
||||
# Claude Code - exclude temporary files but keep configuration
|
||||
.claude/.claude-cache/
|
||||
.claude/**/*.tmp
|
||||
.claude/**/*.log
|
||||
whisker_setup.py
|
||||
252
CHANGELOG.md
252
CHANGELOG.md
@@ -7,6 +7,258 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
<!-- towncrier release notes start -->
|
||||
|
||||
## [0.0.101] - 2026-01-30
|
||||
|
||||
### Added
|
||||
|
||||
- Additions for `AICFilter` and `AICVADAnalyzer`:
|
||||
- Added model downloading support to `AICFilter` with `model_id` and
|
||||
`model_download_dir` parameters.
|
||||
- Added `model_path` parameter to `AICFilter` for loading local `.aicmodel`
|
||||
files.
|
||||
- Added unit tests for `AICFilter` and `AICVADAnalyzer`.
|
||||
(PR [#3408](https://github.com/pipecat-ai/pipecat/pull/3408))
|
||||
|
||||
- Added handling for `server_content.interrupted` signal in the Gemini Live
|
||||
service for faster interruption response in the case where there isn't
|
||||
already turn tracking in the pipeline, e.g. local VAD + context aggregators.
|
||||
When there is already turn tracking in the pipeline, the additional
|
||||
interruption does no harm.
|
||||
(PR [#3429](https://github.com/pipecat-ai/pipecat/pull/3429))
|
||||
|
||||
- Added new `GenesysFrameSerializer` for the Genesys AudioHook WebSocket
|
||||
protocol, enabling bidirectional audio streaming between Pipecat pipelines
|
||||
and Genesys Cloud contact center.
|
||||
(PR [#3500](https://github.com/pipecat-ai/pipecat/pull/3500))
|
||||
|
||||
- Added `reached_upstream_types` and `reached_downstream_types` read-only
|
||||
properties to `PipelineTask` for inspecting current frame filters.
|
||||
(PR [#3510](https://github.com/pipecat-ai/pipecat/pull/3510))
|
||||
|
||||
- Added `add_reached_upstream_filter()` and `add_reached_downstream_filter()`
|
||||
methods to `PipelineTask` for appending frame types.
|
||||
(PR [#3510](https://github.com/pipecat-ai/pipecat/pull/3510))
|
||||
|
||||
- Added `UserTurnCompletionLLMServiceMixin` for LLM services to detect and
|
||||
filter incomplete user turns. When enabled via `filter_incomplete_user_turns`
|
||||
in `LLMUserAggregatorParams`, the LLM outputs a turn completion marker at the
|
||||
start of each response: ✓ (complete), ○ (incomplete short), or ◐ (incomplete
|
||||
long). Incomplete turns are suppressed, and configurable timeouts
|
||||
automatically re-prompt the user.
|
||||
(PR [#3518](https://github.com/pipecat-ai/pipecat/pull/3518))
|
||||
|
||||
- Added `FrameProcessor.broadcast_frame_instance(frame)` method to broadcast a
|
||||
frame instance by extracting its fields and creating new instances for each
|
||||
direction.
|
||||
(PR [#3519](https://github.com/pipecat-ai/pipecat/pull/3519))
|
||||
|
||||
- `PipelineTask` now automatically adds `RTVIProcessor` and registers
|
||||
`RTVIObserver` when `enable_rtvi=True` (default), simplifying pipeline setup.
|
||||
(PR [#3519](https://github.com/pipecat-ai/pipecat/pull/3519))
|
||||
|
||||
- Added `RTVIProcessor.create_rtvi_observer()` factory method for creating RTVI
|
||||
observers.
|
||||
(PR [#3519](https://github.com/pipecat-ai/pipecat/pull/3519))
|
||||
|
||||
- Added `video_out_codec` parameter to `TransportParams` allowing configuration
|
||||
of the preferred video codec (e.g., `"VP8"`, `"H264"`, `"H265"`) for video
|
||||
output in `DailyTransport`.
|
||||
(PR [#3520](https://github.com/pipecat-ai/pipecat/pull/3520))
|
||||
|
||||
- Added `location` parameter to Google TTS services (`GoogleHttpTTSService`,
|
||||
`GoogleTTSService`, `GeminiTTSService`) for regional endpoint support.
|
||||
(PR [#3523](https://github.com/pipecat-ai/pipecat/pull/3523))
|
||||
|
||||
- Added new `PIPECAT_SMART_TURN_LOG_DATA` environment variable, which causes
|
||||
Smart Turn input data to be saved to disk
|
||||
(PR [#3525](https://github.com/pipecat-ai/pipecat/pull/3525))
|
||||
|
||||
- Added `result_callback` parameter to `UserImageRequestFrame` to support
|
||||
deferred function call results.
|
||||
(PR [#3571](https://github.com/pipecat-ai/pipecat/pull/3571))
|
||||
|
||||
- Added `function_call_timeout_secs` parameter to `LLMService` to configure
|
||||
timeout for deferred function calls (defaults to 10.0 seconds).
|
||||
(PR [#3571](https://github.com/pipecat-ai/pipecat/pull/3571))
|
||||
|
||||
- Added `vad_analyzer` parameter to `LLMUserAggregatorParams`. VAD analysis is
|
||||
now handled inside the `LLMUserAggregator` rather than in the transport,
|
||||
keeping voice activity detection closer to where it is consumed. The
|
||||
`vad_analyzer` on `BaseInputTransport` is now deprecated.
|
||||
|
||||
```python
|
||||
context_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
```
|
||||
(PR [#3583](https://github.com/pipecat-ai/pipecat/pull/3583))
|
||||
|
||||
- Added `VADProcessor` for detecting speech in audio streams within a pipeline.
|
||||
Pushes `VADUserStartedSpeakingFrame`, `VADUserStoppedSpeakingFrame`, and
|
||||
`UserSpeakingFrame` downstream based on VAD state changes.
|
||||
(PR [#3583](https://github.com/pipecat-ai/pipecat/pull/3583))
|
||||
|
||||
- Added `VADController` for managing voice activity detection state and
|
||||
emitting speech events independently of transport or pipeline processors.
|
||||
(PR [#3583](https://github.com/pipecat-ai/pipecat/pull/3583))
|
||||
|
||||
- Added local `PiperTTSService` for offline text-to-speech using Piper voice
|
||||
models. The existing HTTP-based service has been renamed to
|
||||
`PiperHttpTTSService`.
|
||||
(PR [#3585](https://github.com/pipecat-ai/pipecat/pull/3585))
|
||||
|
||||
- `main()` in `pipecat.runner.run` now accepts an optional
|
||||
`argparse.ArgumentParser`, allowing bots to define custom CLI arguments
|
||||
accessible via `runner_args.cli_args`.
|
||||
(PR [#3590](https://github.com/pipecat-ai/pipecat/pull/3590))
|
||||
|
||||
- Added `KokoroTTSService` for local text-to-speech synthesis using the
|
||||
Kokoro-82M model.
|
||||
(PR [#3595](https://github.com/pipecat-ai/pipecat/pull/3595))
|
||||
|
||||
### Changed
|
||||
|
||||
- Updated `AICFilter` and `AICVADAnalyzer` to use aic-sdk ~= 2.0.1.
|
||||
(PR [#3408](https://github.com/pipecat-ai/pipecat/pull/3408))
|
||||
|
||||
- Improved the STT TTFB (Time To First Byte) measurement, reporting the delay
|
||||
between when the user stops speaking and when the final transcription is
|
||||
received. Note: Unlike traditional TTFB which measures from a discrete
|
||||
request, STT services receive continuous audio input—so we measure from
|
||||
speech end to final transcript, which captures the latency that matters for
|
||||
voice AI applications. In support of this change, added `finalized` field to
|
||||
`TranscriptionFrame` to indicate when a transcript is the final result for an
|
||||
utterance.
|
||||
(PR [#3495](https://github.com/pipecat-ai/pipecat/pull/3495))
|
||||
|
||||
- `SarvamSTTService` now defaults `vad_signals` and `high_vad_sensitivity` to
|
||||
`None` (omitted from connection parameters), improving latency by ~300ms
|
||||
compared to the previous defaults.
|
||||
(PR [#3495](https://github.com/pipecat-ai/pipecat/pull/3495))
|
||||
|
||||
- Changed frame filter storage from tuples to sets in `PipelineTask`.
|
||||
(PR [#3510](https://github.com/pipecat-ai/pipecat/pull/3510))
|
||||
|
||||
- Changed default Inworld TTS model from `inworld-tts-1` to
|
||||
`inworld-tts-1.5-max`.
|
||||
(PR [#3531](https://github.com/pipecat-ai/pipecat/pull/3531))
|
||||
|
||||
- `FrameSerializer` now subclasses from `BaseObject` to enable event support.
|
||||
(PR [#3560](https://github.com/pipecat-ai/pipecat/pull/3560))
|
||||
|
||||
- Added support for TTFS in `SpeechmaticsSTTService` and set the default mode
|
||||
to `EXTERNAL` to support Pipecat-controlled VAD.
|
||||
- Changed dependency to `speechmatics-voice[smart]>=0.2.8`
|
||||
(PR [#3562](https://github.com/pipecat-ai/pipecat/pull/3562))
|
||||
|
||||
- ⚠️ Changed function call handling to use timeout-based completion instead of
|
||||
immediate callback execution.
|
||||
- Function calls that defer their results (e.g., `UserImageRequestFrame`)
|
||||
now use a timeout mechanism
|
||||
- The `result_callback` is invoked automatically when the deferred
|
||||
operation completes or after timeout
|
||||
- This change affects examples using `UserImageRequestFrame` - the
|
||||
`result_callback` should now be passed to the frame instead of being called
|
||||
immediately
|
||||
(PR [#3571](https://github.com/pipecat-ai/pipecat/pull/3571))
|
||||
|
||||
- Pipecat runner now uses `DAILY_ROOM_URL` instead of `DAILY_SAMPLE_ROOM_URL`.
|
||||
(PR [#3582](https://github.com/pipecat-ai/pipecat/pull/3582))
|
||||
|
||||
- Updates to `GradiumSTTService`:
|
||||
- Now flushes pending transcriptions when VAD detects the user stopped
|
||||
speaking, improving response latency.
|
||||
- `GradiumSTTService` now supports `InputParams` for configuring `language`
|
||||
and `delay_in_frames` settings.
|
||||
(PR [#3587](https://github.com/pipecat-ai/pipecat/pull/3587))
|
||||
|
||||
### Deprecated
|
||||
|
||||
- ⚠️ Deprecated `vad_analyzer` parameter on `BaseInputTransport`. Pass
|
||||
`vad_analyzer` to `LLMUserAggregatorParams` instead or use `VADProcessor` in
|
||||
the pipeline.
|
||||
(PR [#3583](https://github.com/pipecat-ai/pipecat/pull/3583))
|
||||
|
||||
### Removed
|
||||
|
||||
- Removed deprecated `AICFilter` parameters: `enhancement_level`, `voice_gain`,
|
||||
`noise_gate_enable`.
|
||||
(PR [#3408](https://github.com/pipecat-ai/pipecat/pull/3408))
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue where if you were using `OpenRouterLLMService` with a Gemini
|
||||
model, it wouldn't handle multiple `"system"` messages as expected (and as we
|
||||
do in `GoogleLLMService`), which is to convert subsequent ones into `"user"`
|
||||
messages. Instead, the latest `"system"` message would overwrite the previous
|
||||
ones.
|
||||
(PR [#3406](https://github.com/pipecat-ai/pipecat/pull/3406))
|
||||
|
||||
- Transports now properly broadcast `InputTransportMessageFrame` frames both
|
||||
upstream and downstream instead of only pushing downstream.
|
||||
(PR [#3519](https://github.com/pipecat-ai/pipecat/pull/3519))
|
||||
|
||||
- Fixed `FrameProcessor.broadcast_frame()` to deep copy kwargs, preventing
|
||||
shared mutable references between the downstream and upstream frame
|
||||
instances.
|
||||
(PR [#3519](https://github.com/pipecat-ai/pipecat/pull/3519))
|
||||
|
||||
- Fixed OpenAI LLM services to emit `ErrorFrame` on completion timeout,
|
||||
enabling proper error handling and LLMSwitcher failover.
|
||||
(PR [#3529](https://github.com/pipecat-ai/pipecat/pull/3529))
|
||||
|
||||
- Fixed a logging issue where non-ASCII characters (e.g., Japanese, Chinese,
|
||||
etc.) were being unnecessarily escaped to Unicode sequences when function
|
||||
call occurred.
|
||||
(PR [#3536](https://github.com/pipecat-ai/pipecat/pull/3536))
|
||||
|
||||
- Fixed how audio tracks are synchronized inside the `AudioBufferProcessor` to
|
||||
fix timing issues where silence and audio were misaligned between user and
|
||||
bot buffers.
|
||||
(PR [#3541](https://github.com/pipecat-ai/pipecat/pull/3541))
|
||||
|
||||
- Fixed race condition in `OpenAIRealtimeBetaLLMService` that could cause an
|
||||
error when truncating the conversation.
|
||||
(PR [#3567](https://github.com/pipecat-ai/pipecat/pull/3567))
|
||||
|
||||
- Fixed an infinite loop in `WebsocketService` that blocked the event loop when
|
||||
a remote server closed the connection gracefully.
|
||||
(PR [#3574](https://github.com/pipecat-ai/pipecat/pull/3574))
|
||||
|
||||
- Fixed `LLMUserAggregator` and `LLMAssistantAggregator` not emitting pending
|
||||
transcripts via `on_user_turn_stopped` and `on_assistant_turn_stopped` events
|
||||
when the conversation ends (`EndFrame`) or is cancelled (`CancelFrame`).
|
||||
(PR [#3575](https://github.com/pipecat-ai/pipecat/pull/3575))
|
||||
|
||||
- Added missing `LiveKitRunnerArguments` and `LiveKitTransport` support in
|
||||
runner utilities to enable LiveKit transport configuration.
|
||||
(PR [#3580](https://github.com/pipecat-ai/pipecat/pull/3580))
|
||||
|
||||
- Fixed race condition in `OpenAIRealtimeLLMService` that could cause an error
|
||||
when truncating the conversation.
|
||||
(PR [#3581](https://github.com/pipecat-ai/pipecat/pull/3581))
|
||||
|
||||
- Fixed `PiperHttpTTSService` (olf `PiperTTSService`) to resample audio output
|
||||
based on the model's sample rate parsed from the WAV header.
|
||||
(PR [#3585](https://github.com/pipecat-ai/pipecat/pull/3585))
|
||||
|
||||
- Fixed `UserTurnController` to reset user turn timeout when interim
|
||||
transcriptions are received.
|
||||
(PR [#3594](https://github.com/pipecat-ai/pipecat/pull/3594))
|
||||
|
||||
- Fixed an issue in the `IVRNavigator` where the `TextFrame`s pushed had
|
||||
incorrect spacing. Now, the internal `IVRProcessor` pushes
|
||||
`AggregatedTextFrame`s when in conversation mode. This allows for controlling
|
||||
spacing of the outputted, aggregated text.
|
||||
(PR [#3604](https://github.com/pipecat-ai/pipecat/pull/3604))
|
||||
|
||||
- Fixed `GeminiLiveLLMService` transcription timeout handler not being
|
||||
scheduled by yielding to the event loop after task creation.
|
||||
(PR [#3605](https://github.com/pipecat-ai/pipecat/pull/3605))
|
||||
|
||||
## [0.0.100] - 2026-01-20
|
||||
|
||||
### Added
|
||||
|
||||
143
CLAUDE.md
Normal file
143
CLAUDE.md
Normal file
@@ -0,0 +1,143 @@
|
||||
# CLAUDE.md
|
||||
|
||||
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
||||
|
||||
## Project Overview
|
||||
|
||||
Pipecat is an open-source Python framework for building real-time voice and multimodal conversational AI agents. It orchestrates audio/video, AI services, transports, and conversation pipelines using a frame-based architecture.
|
||||
|
||||
## Common Commands
|
||||
|
||||
```bash
|
||||
# Setup development environment
|
||||
uv sync --group dev --all-extras --no-extra gstreamer --no-extra krisp
|
||||
|
||||
# Install pre-commit hooks
|
||||
uv run pre-commit install
|
||||
|
||||
# Run all tests
|
||||
uv run pytest
|
||||
|
||||
# Run a single test file
|
||||
uv run pytest tests/test_name.py
|
||||
|
||||
# Run a specific test
|
||||
uv run pytest tests/test_name.py::test_function_name
|
||||
|
||||
# Preview changelog
|
||||
towncrier build --draft --version Unreleased
|
||||
|
||||
# Lint and format check
|
||||
uv run ruff check
|
||||
uv run ruff format --check
|
||||
|
||||
# Update dependencies (after editing pyproject.toml)
|
||||
uv lock && uv sync
|
||||
```
|
||||
|
||||
## Architecture
|
||||
|
||||
### Frame-Based Pipeline Processing
|
||||
|
||||
All data flows as **Frame** objects through a pipeline of **FrameProcessors**:
|
||||
|
||||
```
|
||||
Transport Input → Pipeline Source → [Processor1] → [Processor2] → ... → Pipeline Sink → Transport Output
|
||||
```
|
||||
|
||||
**Key components:**
|
||||
|
||||
- **Frames** (`src/pipecat/frames/frames.py`): Data units (audio, text, video) and control signals. Flow DOWNSTREAM (input→output) or UPSTREAM (acknowledgments/errors).
|
||||
|
||||
- **FrameProcessor** (`src/pipecat/processors/frame_processor.py`): Base processing unit. Each processor receives frames, processes them, and pushes results downstream.
|
||||
|
||||
- **Pipeline** (`src/pipecat/pipeline/pipeline.py`): Chains processors together.
|
||||
|
||||
- **ParallelPipeline** (`src/pipecat/pipeline/parallel_pipeline.py`): Runs multiple pipelines in parallel.
|
||||
|
||||
- **Transports** (`src/pipecat/transports/`): External I/O layer (Daily WebRTC, LiveKit WebRTC, WebSocket, Local). Abstract interface via `BaseTransport`.
|
||||
|
||||
- **Services** (`src/pipecat/services/`): 60+ AI provider integrations (STT, TTS, LLM, etc.). Extend base classes: `AIService`, `LLMService`, `STTService`, `TTSService`, `VisionService`.
|
||||
|
||||
- **Serializers** (`src/pipecat/serializers/`): Convert frames to/from wire formats for WebSocket transports. `FrameSerializer` base class defines `serialize()` and `deserialize()`. Telephony serializers (Twilio, Plivo, Vonage, Telnyx, Exotel, Genesys) handle provider-specific protocols and audio encoding (e.g., μ-law).
|
||||
|
||||
- **RTVI** (`src/pipecat/processors/frameworks/rtvi.py`): Real-Time Voice Interface protocol bridging clients and the pipeline. `RTVIProcessor` handles incoming client messages (text input, audio, function call results). `RTVIObserver` converts pipeline frames to outgoing messages: user/bot speaking events, transcriptions, LLM/TTS lifecycle, function calls, metrics, and audio levels.
|
||||
|
||||
### Important Patterns
|
||||
|
||||
- **Context Aggregation**: `LLMContext` accumulates messages for LLM calls; `UserResponse` aggregates user input
|
||||
|
||||
- **Turn Management**: Turn management is done through `LLMUserAggregator` and
|
||||
`LLMAssistantAggregator`, created with `LLMContextAggregatorPair`
|
||||
|
||||
- **User turn strategies**: Detection of when the user starts and stops speaking is done via user turn start/stop strategies. They push `UserStartedSpeakingFrame` and `UserStoppedSpeakingFrame` respectively.
|
||||
|
||||
- **Interruptions**: Interruptions are usually triggered by a user turn start strategy (e.g. `VADUserTurnStartStrategy`) but they can be triggered by other processors as well, in which case the user turn start strategies don't need to. An `InterruptionFrame` carries an optional `asyncio.Event` that is set when the frame reaches the pipeline sink. If a processor stops an `InterruptionFrame` from propagating downstream (i.e., doesn't push it), it **must** call `frame.complete()` to avoid stalling `push_interruption_task_frame_and_wait()` callers.
|
||||
|
||||
- **Uninterruptible Frames**: These are frames that will not be removed from internal queues even if there's an interruption. For example, `EndFrame` and `StopFrame`.
|
||||
|
||||
- **Events**: Most classes in Pipecat have `BaseObject` as the very base class. `BaseObject` has support for events. Events can run in the background in an async task (default) or synchronously (`sync=True`) if we want immediate action. Synchronous event handlers need to exectue fast.
|
||||
|
||||
### Key Directories
|
||||
|
||||
| Directory | Purpose |
|
||||
|---------------------------|----------------------------------------------------|
|
||||
| `src/pipecat/frames/` | Frame definitions (100+ types) |
|
||||
| `src/pipecat/processors/` | FrameProcessor base + aggregators, filters, audio |
|
||||
| `src/pipecat/pipeline/` | Pipeline orchestration |
|
||||
| `src/pipecat/services/` | AI service integrations (60+ providers) |
|
||||
| `src/pipecat/transports/` | Transport layer (Daily, LiveKit, WebSocket, Local) |
|
||||
| `src/pipecat/serializers/`| Frame serialization for WebSocket protocols |
|
||||
| `src/pipecat/audio/` | VAD, filters, mixers, turn detection, DTMF |
|
||||
| `src/pipecat/turns/` | User turn management |
|
||||
|
||||
## Code Style
|
||||
|
||||
- **Docstrings**: Google-style. Classes describe purpose; `__init__` has `Args:` section; dataclasses use `Parameters:` section.
|
||||
- **Linting**: Ruff (line length 100). Pre-commit hooks enforce formatting.
|
||||
- **Type hints**: Required for complex async code.
|
||||
|
||||
### Docstring Example
|
||||
|
||||
```python
|
||||
class MyService(LLMService):
|
||||
"""Description of what the service does.
|
||||
|
||||
More detailed description.
|
||||
|
||||
Event handlers available:
|
||||
|
||||
- on_connected: Called when we are connected
|
||||
|
||||
Example::
|
||||
|
||||
@service.event_handler("on_connected")
|
||||
async def on_connected(service, frame):
|
||||
...
|
||||
"""
|
||||
|
||||
def __init__(self, param1: str, **kwargs):
|
||||
"""Initialize the service.
|
||||
|
||||
Args:
|
||||
param1: Description of param1.
|
||||
**kwargs: Additional arguments passed to parent.
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
```
|
||||
|
||||
## Service Implementation
|
||||
|
||||
When adding a new service:
|
||||
|
||||
1. Extend the appropriate base class (`STTService`, `TTSService`, `LLMService`, etc.)
|
||||
2. Implement required abstract methods
|
||||
3. Handle necessary frames
|
||||
4. By default, all frames should be pushed in the direction they came
|
||||
5. Push `ErrorFrame` on failures
|
||||
6. Add metrics tracking via `MetricsData` if relevant
|
||||
7. Follow the pattern of existing services in `src/pipecat/services/`
|
||||
|
||||
## Pull Requests
|
||||
|
||||
After creating a PR, use `/changelog <pr_number>` to generate the changelog file and `/pr-description <pr_number>` to update the PR description.
|
||||
@@ -75,7 +75,7 @@ Catch new features, interviews, and how-tos on our [Pipecat TV](https://www.yout
|
||||
| ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/stt/elevenlabs), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Gradium](https://docs.pipecat.ai/server/services/stt/gradium), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [Hathora](https://docs.pipecat.ai/server/services/stt/hathora), [NVIDIA Riva](https://docs.pipecat.ai/server/services/stt/riva), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [SambaNova (Whisper)](https://docs.pipecat.ai/server/services/stt/sambanova), [Sarvam](https://docs.pipecat.ai/server/services/stt/sarvam), [Soniox](https://docs.pipecat.ai/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/server/services/stt/speechmatics), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) |
|
||||
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [Mistral](https://docs.pipecat.ai/server/services/llm/mistral), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/server/services/llm/sambanova) [Together AI](https://docs.pipecat.ai/server/services/llm/together) |
|
||||
| Text-to-Speech | [Async](https://docs.pipecat.ai/server/services/tts/asyncai), [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Camb AI](https://docs.pipecat.ai/server/services/tts/camb), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [Gradium](https://docs.pipecat.ai/server/services/tts/gradium), [Groq](https://docs.pipecat.ai/server/services/tts/groq), [Hathora](https://docs.pipecat.ai/server/services/tts/hathora), [Hume](https://docs.pipecat.ai/server/services/tts/hume), [Inworld](https://docs.pipecat.ai/server/services/tts/inworld), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [NVIDIA Riva](https://docs.pipecat.ai/server/services/tts/riva), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [Speechmatics](https://docs.pipecat.ai/server/services/tts/speechmatics), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
|
||||
| Text-to-Speech | [Async](https://docs.pipecat.ai/server/services/tts/asyncai), [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Camb AI](https://docs.pipecat.ai/server/services/tts/camb), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [Gradium](https://docs.pipecat.ai/server/services/tts/gradium), [Groq](https://docs.pipecat.ai/server/services/tts/groq), [Hathora](https://docs.pipecat.ai/server/services/tts/hathora), [Hume](https://docs.pipecat.ai/server/services/tts/hume), [Inworld](https://docs.pipecat.ai/server/services/tts/inworld), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [NVIDIA Riva](https://docs.pipecat.ai/server/services/tts/riva), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Resemble](https://docs.pipecat.ai/server/services/tts/resemble), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [Speechmatics](https://docs.pipecat.ai/server/services/tts/speechmatics), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
|
||||
| Speech-to-Speech | [AWS Nova Sonic](https://docs.pipecat.ai/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [Grok Voice Agent](https://docs.pipecat.ai/server/services/s2s/grok), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai), [Ultravox](https://docs.pipecat.ai/server/services/s2s/ultravox), |
|
||||
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local |
|
||||
| Serializers | [Exotel](https://docs.pipecat.ai/server/utilities/serializers/exotel), [Plivo](https://docs.pipecat.ai/server/utilities/serializers/plivo), [Twilio](https://docs.pipecat.ai/server/utilities/serializers/twilio), [Telnyx](https://docs.pipecat.ai/server/utilities/serializers/telnyx), [Vonage](https://docs.pipecat.ai/server/utilities/serializers/vonage) |
|
||||
|
||||
1
changelog/3134.added.md
Normal file
1
changelog/3134.added.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added `ResembleAITTSService` for text-to-speech using Resemble AI's streaming WebSocket API with word-level timestamps and jitter buffering for smooth audio playback.
|
||||
1
changelog/3355.added.md
Normal file
1
changelog/3355.added.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added `UserBotLatencyObserver` for tracking user-to-bot response latency. When tracing is enabled, latency measurements are automatically recorded as `turn.user_bot_latency_seconds` attributes on OpenTelemetry turn spans.
|
||||
1
changelog/3355.deprecated.md
Normal file
1
changelog/3355.deprecated.md
Normal file
@@ -0,0 +1 @@
|
||||
- Deprecated `UserBotLatencyLogObserver`. Use `UserBotLatencyObserver` directly with its `on_latency_measured` event handler instead.
|
||||
@@ -1 +0,0 @@
|
||||
- Fixed an issue where if you were using `OpenRouterLLMService` with a Gemini model, it wouldn't handle multiple `"system"` messages as expected (and as we do in `GoogleLLMService`), which is to convert subsequent ones into `"user"` messages. Instead, the latest `"system"` message would overwrite the previous ones.
|
||||
@@ -1 +0,0 @@
|
||||
- `SarvamSTTService` now defaults `vad_signals` and `high_vad_sensitivity` to `None` (omitted from connection parameters), improving latency by ~300ms compared to the previous defaults.
|
||||
@@ -1 +0,0 @@
|
||||
- Improved the STT TTFB (Time To First Byte) measurement, reporting the delay between when the user stops speaking and when the final transcription is received. Note: Unlike traditional TTFB which measures from a discrete request, STT services receive continuous audio input—so we measure from speech end to final transcript, which captures the latency that matters for voice AI applications. In support of this change, added `finalized` field to `TranscriptionFrame` to indicate when a transcript is the final result for an utterance.
|
||||
@@ -1 +0,0 @@
|
||||
- Added `add_reached_upstream_filter()` and `add_reached_downstream_filter()` methods to `PipelineTask` for appending frame types.
|
||||
@@ -1 +0,0 @@
|
||||
- Added `reached_upstream_types` and `reached_downstream_types` read-only properties to `PipelineTask` for inspecting current frame filters.
|
||||
@@ -1 +0,0 @@
|
||||
- Changed frame filter storage from tuples to sets in `PipelineTask`.
|
||||
@@ -1 +0,0 @@
|
||||
- Added `RTVIProcessor.create_rtvi_observer()` factory method for creating RTVI observers.
|
||||
@@ -1 +0,0 @@
|
||||
- Added `FrameProcessor.broadcast_frame_instance(frame)` method to broadcast a frame instance by extracting its fields and creating new instances for each direction.
|
||||
@@ -1 +0,0 @@
|
||||
- `PipelineTask` now automatically adds `RTVIProcessor` and registers `RTVIObserver` when `enable_rtvi=True` (default), simplifying pipeline setup.
|
||||
@@ -1 +0,0 @@
|
||||
- Fixed `FrameProcessor.broadcast_frame()` to deep copy kwargs, preventing shared mutable references between the downstream and upstream frame instances.
|
||||
@@ -1 +0,0 @@
|
||||
- Transports now properly broadcast `InputTransportMessageFrame` frames both upstream and downstream instead of only pushing downstream.
|
||||
@@ -1 +0,0 @@
|
||||
- Added `video_out_codec` parameter to `TransportParams` allowing configuration of the preferred video codec (e.g., `"VP8"`, `"H264"`, `"H265"`) for video output in `DailyTransport`.
|
||||
@@ -1 +0,0 @@
|
||||
- Added `location` parameter to Google TTS services (`GoogleHttpTTSService`, `GoogleTTSService`, `GeminiTTSService`) for regional endpoint support.
|
||||
@@ -1 +0,0 @@
|
||||
- Added new `PIPECAT_SMART_TURN_LOG_DATA` environment variable, which causes Smart Turn input data to be saved to disk
|
||||
@@ -1,2 +0,0 @@
|
||||
- Changed default Inworld TTS model from `inworld-tts-1` to
|
||||
`inworld-tts-1.5-max`.
|
||||
1
changelog/3542.fixed.md
Normal file
1
changelog/3542.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed pipeline freeze when `InterruptionFrame` discards `EndFrame` or `StopFrame` by making terminal frames uninterruptible.
|
||||
1
changelog/3589.fixed.md
Normal file
1
changelog/3589.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed OpenAI LLM stream not being closed on cancellation/exception, which could leak sockets.
|
||||
1
changelog/3593.added.md
Normal file
1
changelog/3593.added.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added support for Inworld TTS Websocket Auto Mode for improved latency
|
||||
1
changelog/3593.changed.md
Normal file
1
changelog/3593.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Updated timestamps to be cumulative within an agent turn, using flushCompleted message as an indication of when timestamps from the server are reset to 0
|
||||
1
changelog/3610.fixed.md
Normal file
1
changelog/3610.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed `PipelineTask` adding duplicate `RTVIProcessor` and `RTVIObserver` when they were already provided in the pipeline or observers list. They are now detected and skipped, with appropriate warnings and errors logged for mismatched configurations.
|
||||
1
changelog/3612.changed.md
Normal file
1
changelog/3612.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Changed `KokoroTTSService` to use `kokoro-onnx` instead of `kokoro` as the underlying TTS engine.
|
||||
1
changelog/3616.fixed.md
Normal file
1
changelog/3616.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed function call timeout task not being cancelled when the handler completes without calling `result_callback` or is cancelled externally, which caused `RuntimeWarning: coroutine was never awaited`.
|
||||
5
changelog/3617.fixed.md
Normal file
5
changelog/3617.fixed.md
Normal file
@@ -0,0 +1,5 @@
|
||||
- Fixed sentence splitting for Japanese, Chinese, Korean, and other non-Latin
|
||||
languages in TTS pipeline. NLTK's sentence tokenizer does not support CJK
|
||||
languages, causing text to accumulate until flush instead of being split at
|
||||
sentence boundaries. Added fallback detection for unambiguous non-Latin
|
||||
sentence-ending punctuation (e.g., `。`, `?`, `!`).
|
||||
1
changelog/3623.fixed.md
Normal file
1
changelog/3623.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed `PipelineTask` to also call `set_bot_ready()` when an external `RTVIProcessor` is provided.
|
||||
1
changelog/3628.fixed.md
Normal file
1
changelog/3628.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed `VADController` not broadcasting `SpeechControlParamsFrame` on startup, which prevented STT services from receiving VAD params needed for TTFB measurement.
|
||||
1
changelog/3629.fixed.md
Normal file
1
changelog/3629.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed `StopAsyncIteration` exceptions in `parse_telephony_websocket()` when WebSocket connections close before sending expected messages.
|
||||
1
changelog/3630.added.md
Normal file
1
changelog/3630.added.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added RTVI function call lifecycle events (`llm-function-call-started`, `llm-function-call-in-progress`, `llm-function-call-stopped`) with configurable security levels via `RTVIObserverParams.function_call_report_level`. Supports per-function control over what information is exposed (`DISABLED`, `NONE`, `NAME`, or `FULL`).
|
||||
1
changelog/3630.deprecated.md
Normal file
1
changelog/3630.deprecated.md
Normal file
@@ -0,0 +1 @@
|
||||
- Deprecated `RTVILLMFunctionCallMessage`, `RTVILLMFunctionCallMessageData`, and `RTVIProcessor.handle_function_call()`. Use the new `llm-function-call-in-progress` event sent automatically by `RTVIObserver` instead.
|
||||
1
changelog/3635.fixed.md
Normal file
1
changelog/3635.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed WebSocket transport error when broadcasting `InputTransportMessageFrame` by correctly instantiating the frame with its message parameter.
|
||||
1
changelog/3649.fixed.md
Normal file
1
changelog/3649.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed orphan OpenTelemetry spans during flow initialization and transitions in tracing.
|
||||
1
changelog/3652.changed.md
Normal file
1
changelog/3652.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Upgraded the `pipecat-ai-small-webrtc-prebuilt` package to v2.1.0.
|
||||
1
changelog/3656.added.md
Normal file
1
changelog/3656.added.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added `OpenAIRealtimeSTTService` for real-time streaming speech-to-text using OpenAI's Realtime API WebSocket transcription sessions. Supports local VAD and server-side VAD modes, noise reduction, and automatic reconnection.
|
||||
10
changelog/3659.changed.md
Normal file
10
changelog/3659.changed.md
Normal file
@@ -0,0 +1,10 @@
|
||||
- ⚠️ The default `VADParams` `stop_secs` default is changing from `0.8` seconds
|
||||
to `0.2` seconds. This change both simplifies the developer experience and
|
||||
improves the performance of STT services. With a shorter `stop_secs` value,
|
||||
STT services using a local VAD can finalize sooner, resulting in faster
|
||||
transcription.
|
||||
|
||||
- `SpeechTimeoutUserTurnStopStrategy`: control how long to wait for
|
||||
additional user speech using `user_speech_timeout` (default: 0.6 sec).
|
||||
- `TurnAnalyzerUserTurnStopStrategy`: the turn analyzer automatically adjusts
|
||||
the user wait time based on the audio input.
|
||||
1
changelog/3660.changed.md
Normal file
1
changelog/3660.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Moved interruption wait event from per-processor instance state to `InterruptionFrame` itself. Added `InterruptionFrame.complete()` to signal when the interruption has fully traversed the pipeline. Custom processors that block or consume an `InterruptionFrame` before it reaches the pipeline sink must call `frame.complete()` to avoid stalling `push_interruption_task_frame_and_wait()`. A warning is logged if completion does not happen within 2 seconds.
|
||||
1
changelog/3663.fixed.md
Normal file
1
changelog/3663.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed `SambaNovaLLMService` and `GoogleLLMOpenAIBetaService` streams not being closed on cancellation/exception, which could leak sockets.
|
||||
1
changelog/3664.changed.md
Normal file
1
changelog/3664.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Update the default model to `scribe_v2` for `ElevenLabsSTTService`.
|
||||
1
changelog/3666.changed.md
Normal file
1
changelog/3666.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Changed the `DeepgramSTTService` default setting for `smart_format` to `False`, as agents don't need smart formatting. Disabling this setting provides a small performance improvement, as well.
|
||||
1
changelog/3667.fixed.md
Normal file
1
changelog/3667.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed an issue in `InworldTTSService` where punctuation was pronounced. Now, the `InworldTTSService` ensures proper spacing between sentences, resolving pronunciation issues.
|
||||
1
changelog/3668.fixed.md
Normal file
1
changelog/3668.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed `ParallelPipeline` allowing frames pushed by internal processors to escape during lifecycle frame (`StartFrame`/`EndFrame`/`CancelFrame`) synchronization. These frames are now buffered and flushed after all branches complete.
|
||||
1
changelog/3678.added.md
Normal file
1
changelog/3678.added.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added pyright basic type checking configuration for the core framework.
|
||||
@@ -43,7 +43,7 @@ CEREBRAS_API_KEY=...
|
||||
|
||||
# Daily
|
||||
DAILY_API_KEY=...
|
||||
DAILY_SAMPLE_ROOM_URL=https://...
|
||||
DAILY_ROOM_URL=https://...
|
||||
|
||||
# Deepgram
|
||||
DEEPGRAM_API_KEY=...
|
||||
@@ -156,6 +156,10 @@ PLIVO_AUTH_TOKEN=...
|
||||
# Qwen
|
||||
QWEN_API_KEY=...
|
||||
|
||||
# Resemble AI
|
||||
RESEMBLE_API_KEY=
|
||||
RESEMBLE_VOICE_UUID=
|
||||
|
||||
# Rime
|
||||
RIME_API_KEY=...
|
||||
RIME_VOICE_ID=...
|
||||
|
||||
@@ -16,7 +16,7 @@ from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.piper.tts import PiperTTSService
|
||||
from pipecat.services.piper.tts import PiperHttpTTSService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
@@ -24,9 +24,8 @@ from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(audio_out_enabled=True),
|
||||
"twilio": lambda: FastAPIWebsocketParams(audio_out_enabled=True),
|
||||
@@ -39,7 +38,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
|
||||
# Create an HTTP session
|
||||
async with aiohttp.ClientSession() as session:
|
||||
tts = PiperTTSService(
|
||||
tts = PiperHttpTTSService(
|
||||
base_url=os.getenv("PIPER_BASE_URL"), aiohttp_session=session, sample_rate=24000
|
||||
)
|
||||
|
||||
|
||||
@@ -23,9 +23,8 @@ from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(audio_out_enabled=True),
|
||||
"twilio": lambda: FastAPIWebsocketParams(audio_out_enabled=True),
|
||||
|
||||
@@ -23,9 +23,8 @@ from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(audio_out_enabled=True),
|
||||
"twilio": lambda: FastAPIWebsocketParams(audio_out_enabled=True),
|
||||
|
||||
@@ -23,9 +23,8 @@ from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(audio_out_enabled=True),
|
||||
"twilio": lambda: FastAPIWebsocketParams(audio_out_enabled=True),
|
||||
|
||||
@@ -25,9 +25,8 @@ from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(audio_out_enabled=True),
|
||||
"twilio": lambda: FastAPIWebsocketParams(audio_out_enabled=True),
|
||||
|
||||
@@ -23,9 +23,8 @@ from pipecat.transports.daily.transport import DailyParams
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
video_out_enabled=True,
|
||||
|
||||
@@ -22,9 +22,8 @@ from pipecat.transports.daily.transport import DailyParams
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
video_out_enabled=True,
|
||||
|
||||
@@ -19,7 +19,6 @@ from pipecat_ai_small_webrtc_prebuilt.frontend import SmallWebRTCPrebuiltUI
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -64,7 +63,6 @@ async def run_example(webrtc_connection: SmallWebRTCConnection):
|
||||
params=TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
)
|
||||
|
||||
@@ -91,6 +89,7 @@ async def run_example(webrtc_connection: SmallWebRTCConnection):
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -14,7 +14,6 @@ from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -49,7 +48,6 @@ async def main():
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
)
|
||||
|
||||
@@ -76,6 +74,7 @@ async def main():
|
||||
TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())
|
||||
]
|
||||
),
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -14,7 +14,6 @@ from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import (
|
||||
InterruptionFrame,
|
||||
TranscriptionFrame,
|
||||
@@ -54,7 +53,6 @@ async def main():
|
||||
params=LiveKitParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
)
|
||||
|
||||
@@ -84,6 +82,7 @@ async def main():
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -65,9 +65,8 @@ class MonthPrepender(FrameProcessor):
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_out_enabled=True,
|
||||
|
||||
@@ -11,7 +11,6 @@ from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import Frame, LLMRunFrame, MetricsFrame
|
||||
from pipecat.metrics.metrics import (
|
||||
LLMUsageMetricsData,
|
||||
@@ -62,24 +61,20 @@ class MetricsLogger(FrameProcessor):
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -112,6 +107,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -12,7 +12,6 @@ from PIL import Image
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import (
|
||||
BotStartedSpeakingFrame,
|
||||
BotStoppedSpeakingFrame,
|
||||
@@ -77,9 +76,8 @@ class ImageSyncAggregator(FrameProcessor):
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
@@ -87,7 +85,6 @@ transport_params = {
|
||||
video_out_enabled=True,
|
||||
video_out_width=1024,
|
||||
video_out_height=1024,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
@@ -95,7 +92,6 @@ transport_params = {
|
||||
video_out_enabled=True,
|
||||
video_out_width=1024,
|
||||
video_out_height=1024,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -126,6 +122,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -11,7 +11,6 @@ from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -35,24 +34,20 @@ from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -83,6 +78,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -11,7 +11,6 @@ from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -34,24 +33,20 @@ from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -82,6 +77,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -33,9 +33,8 @@ from pipecat.turns.user_turn_strategies import ExternalUserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
|
||||
@@ -12,7 +12,6 @@ from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -37,24 +36,20 @@ from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -125,6 +120,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())
|
||||
]
|
||||
),
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -17,7 +17,6 @@ from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMMessagesUpdateFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -50,24 +49,20 @@ def get_session_history(session_id: str) -> BaseChatMessageHistory:
|
||||
return message_store[session_id]
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -109,6 +104,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -32,9 +32,8 @@ from pipecat.turns.user_turn_strategies import ExternalUserTurnStrategies
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
|
||||
@@ -13,7 +13,6 @@ from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -37,24 +36,20 @@ from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -89,6 +84,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())
|
||||
]
|
||||
),
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -12,7 +12,6 @@ from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -36,24 +35,20 @@ from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -92,6 +87,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -33,9 +33,8 @@ from pipecat.turns.user_turn_strategies import ExternalUserTurnStrategies
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
|
||||
@@ -12,7 +12,6 @@ from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -36,24 +35,20 @@ from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -81,6 +76,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -13,7 +13,6 @@ from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -37,24 +36,20 @@ from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -93,6 +88,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())
|
||||
]
|
||||
),
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -12,7 +12,6 @@ from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -36,24 +35,20 @@ from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -84,6 +79,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -12,7 +12,6 @@ from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -35,24 +34,20 @@ from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -84,6 +79,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -12,7 +12,6 @@ from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -36,24 +35,20 @@ from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -86,6 +81,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -12,7 +12,6 @@ from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -35,24 +34,20 @@ from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -90,6 +85,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -12,7 +12,6 @@ from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -35,24 +34,20 @@ from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -90,6 +85,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
135
examples/foundational/07g-interruptible-openai-http.py
Normal file
135
examples/foundational/07g-interruptible-openai-http.py
Normal file
@@ -0,0 +1,135 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.services.openai.stt import OpenAISTTService
|
||||
from pipecat.services.openai.tts import OpenAITTSService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy
|
||||
from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = OpenAISTTService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o-transcribe",
|
||||
prompt="Expect words related to dogs, such as breed names.",
|
||||
)
|
||||
|
||||
tts = OpenAITTSService(api_key=os.getenv("OPENAI_API_KEY"), voice="ballad")
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are very knowledgable about dogs. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
user_aggregator, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
assistant_aggregator, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
audio_out_sample_rate=24000,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
@@ -12,7 +12,6 @@ from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -25,8 +24,9 @@ from pipecat.processors.aggregators.llm_response_universal import (
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.services.openai.stt import OpenAISTTService
|
||||
from pipecat.services.openai.stt import OpenAIRealtimeSTTService
|
||||
from pipecat.services.openai.tts import OpenAITTSService
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
@@ -35,24 +35,20 @@ from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -60,10 +56,15 @@ transport_params = {
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = OpenAISTTService(
|
||||
stt = OpenAIRealtimeSTTService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
model="gpt-4o-transcribe",
|
||||
prompt="Expect words related to dogs, such as breed names.",
|
||||
language=Language.EN,
|
||||
# Uses local VAD by default.
|
||||
# To enable server-side VAD, set turn_detection=None or
|
||||
# a dict with server_vad settings.
|
||||
# turn_detection={"type": "server_vad", "threshold": 0.5},
|
||||
)
|
||||
|
||||
tts = OpenAITTSService(api_key=os.getenv("OPENAI_API_KEY"), voice="ballad")
|
||||
@@ -84,6 +85,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -13,7 +13,6 @@ from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -36,24 +35,20 @@ from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -89,6 +84,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -13,7 +13,6 @@ from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -36,24 +35,20 @@ from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -89,6 +84,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())
|
||||
]
|
||||
),
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -11,7 +11,6 @@ from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -35,24 +34,20 @@ from pipecat.turns.user_turn_strategies import ExternalUserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -88,7 +83,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
context = LLMContext(messages)
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(user_turn_strategies=ExternalUserTurnStrategies()),
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=ExternalUserTurnStrategies(),
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
|
||||
@@ -12,7 +12,6 @@ from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -37,24 +36,20 @@ from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -93,6 +88,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -12,7 +12,6 @@ from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -35,24 +34,20 @@ from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -80,6 +75,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -12,7 +12,6 @@ from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -35,24 +34,20 @@ from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -82,6 +77,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -41,24 +41,20 @@ except ImportError:
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -126,6 +122,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -10,7 +10,6 @@ from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -33,24 +32,20 @@ from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -86,6 +81,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -27,7 +27,6 @@ from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -50,9 +49,8 @@ from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
@@ -60,7 +58,6 @@ transport_params = {
|
||||
video_out_enabled=True,
|
||||
video_out_width=1024,
|
||||
video_out_height=1024,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
@@ -68,7 +65,6 @@ transport_params = {
|
||||
video_out_enabled=True,
|
||||
video_out_width=1024,
|
||||
video_out_height=1024,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -107,6 +103,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -12,7 +12,6 @@ from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -36,24 +35,20 @@ from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -113,6 +108,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -12,7 +12,6 @@ from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -36,24 +35,20 @@ from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -96,6 +91,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -12,7 +12,6 @@ from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -36,24 +35,20 @@ from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -96,6 +91,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -12,7 +12,6 @@ from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -36,24 +35,20 @@ from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
),
|
||||
}
|
||||
|
||||
@@ -86,6 +81,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())]
|
||||
),
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -30,7 +30,6 @@ from loguru import logger
|
||||
from pipecat.audio.filters.krisp_viva_filter import KrispVivaFilter
|
||||
from pipecat.audio.turn.krisp_viva_turn import KrispVivaTurn
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -53,26 +52,22 @@ from pipecat.turns.user_turn_strategies import UserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
audio_in_filter=KrispVivaFilter(),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
audio_in_filter=KrispVivaFilter(),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
audio_in_filter=KrispVivaFilter(),
|
||||
),
|
||||
}
|
||||
@@ -101,6 +96,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=KrispVivaTurn())]
|
||||
),
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user