Compare commits
117 Commits
v0.0.72
...
aleix/pipe
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ed83248a6b | ||
|
|
64c8230960 | ||
|
|
274a04e535 | ||
|
|
cb81f3d50e | ||
|
|
30a3b24287 | ||
|
|
8aacf71956 | ||
|
|
72d503d3a3 | ||
|
|
453a904290 | ||
|
|
368bff4fb4 | ||
|
|
8c71939425 | ||
|
|
a437c2d365 | ||
|
|
a1784e3237 | ||
|
|
abee0f853c | ||
|
|
e9d358ed17 | ||
|
|
c5d54d06bb | ||
|
|
c16eed7ca2 | ||
|
|
76388a10b5 | ||
|
|
38bcc033a2 | ||
|
|
5af563cd91 | ||
|
|
3de271161c | ||
|
|
c19f9bc43a | ||
|
|
ef85d245ed | ||
|
|
25749bd4c0 | ||
|
|
e19c5464fe | ||
|
|
5c2ea3b804 | ||
|
|
c27348d470 | ||
|
|
de5f9c9217 | ||
|
|
f9086ee3a2 | ||
|
|
43298a9026 | ||
|
|
d80e228c6f | ||
|
|
2902362886 | ||
|
|
1cd303ad7f | ||
|
|
f590a476e7 | ||
|
|
e71cb3ba68 | ||
|
|
510a9af2e5 | ||
|
|
5328f84df4 | ||
|
|
18817fd81b | ||
|
|
4bcc536fd2 | ||
|
|
1ab2ddd317 | ||
|
|
09aa168840 | ||
|
|
05753fb207 | ||
|
|
715e3f8543 | ||
|
|
9c9d4b35a4 | ||
|
|
2ee935f784 | ||
|
|
58aedc88a4 | ||
|
|
0e60385871 | ||
|
|
a4188f7986 | ||
|
|
c7cbfe7a4f | ||
|
|
f1c9f5040b | ||
|
|
79e51051c7 | ||
|
|
a63d0da528 | ||
|
|
4fd8df208f | ||
|
|
44d3bd30fa | ||
|
|
6e6e932370 | ||
|
|
baccf50417 | ||
|
|
7b1071b30d | ||
|
|
bd7ca94196 | ||
|
|
1ec1aa76e9 | ||
|
|
77c369c3c7 | ||
|
|
9171d4b040 | ||
|
|
e02b95fca5 | ||
|
|
d45a07b5e5 | ||
|
|
0cdcfcee8d | ||
|
|
324546b4e7 | ||
|
|
c8ee67a636 | ||
|
|
b87c57c951 | ||
|
|
721f662bbe | ||
|
|
fccd48bfff | ||
|
|
5310d903ec | ||
|
|
8cbce555e4 | ||
|
|
f6112713e8 | ||
|
|
cc637f4dea | ||
|
|
7f76a14c54 | ||
|
|
58675f4d5a | ||
|
|
d50e6db312 | ||
|
|
de74284a8e | ||
|
|
4c9a295b28 | ||
|
|
0968f36d3e | ||
|
|
fd570b0377 | ||
|
|
68ea5ee570 | ||
|
|
f891140a74 | ||
|
|
5ed2d7ac2b | ||
|
|
b713527da0 | ||
|
|
224d2cedc8 | ||
|
|
55cfea776f | ||
|
|
d7a2078e0b | ||
|
|
a3e540eb32 | ||
|
|
e01c20be84 | ||
|
|
ce3ca418c2 | ||
|
|
15b9a5faf6 | ||
|
|
3afa30894f | ||
|
|
0ecfa827e6 | ||
|
|
e1b0db75eb | ||
|
|
b0c773189f | ||
|
|
3064326834 | ||
|
|
c67e50fe34 | ||
|
|
9d45e3eca1 | ||
|
|
43a24d15f6 | ||
|
|
cafbda1668 | ||
|
|
86c26fd64c | ||
|
|
0c20668008 | ||
|
|
92df8dc43c | ||
|
|
9d5f5844b8 | ||
|
|
2cf31884d0 | ||
|
|
19354c6f2d | ||
|
|
0b2079ad41 | ||
|
|
5f18c3af70 | ||
|
|
0a40285d43 | ||
|
|
5b1c328541 | ||
|
|
37929533af | ||
|
|
3b92113680 | ||
|
|
46b52cb9bb | ||
|
|
f0bcc9d9ba | ||
|
|
1cac028bfe | ||
|
|
4956886819 | ||
|
|
c720cfc7c7 | ||
|
|
8fcef5628f |
6
.github/workflows/format.yaml
vendored
6
.github/workflows/format.yaml
vendored
@@ -17,7 +17,7 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
ruff-format:
|
||||
name: "Formatting checker"
|
||||
name: "Code quality checks"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repo
|
||||
@@ -39,8 +39,8 @@ jobs:
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
ruff format --diff
|
||||
- name: Ruff import linter
|
||||
- name: Ruff linter (all rules)
|
||||
id: ruff-check
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
ruff check --select I
|
||||
ruff check
|
||||
|
||||
83
CHANGELOG.md
83
CHANGELOG.md
@@ -5,6 +5,89 @@ All notable changes to **Pipecat** will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
### Added
|
||||
|
||||
- Added `run_llm` field to `LLMMessagesAppendFrame` and `LLMMessagesUpdateFrame`
|
||||
frames. If true, a context frame will be pushed triggering the LLM to respond.
|
||||
|
||||
- Added a new `SOXRStreamAudioResampler` for processing audio in chunks or
|
||||
streams. If you write your own processor and need to use an audio resampler,
|
||||
use the new `create_stream_resampler()`.
|
||||
|
||||
- Added new `DailyParams.audio_in_user_tracks` to allow receiving one track per
|
||||
user (default) or a single track from the room (all participants mixed).
|
||||
|
||||
- Added support for providing "direct" functions, which don't need an
|
||||
accompanying `FunctionSchema` or function definition dict. Instead, metadata
|
||||
(i.e. `name`, `description`, `properties`, and `required`) are automatically
|
||||
extracted from a combination of the function signature and docstring.
|
||||
|
||||
Usage:
|
||||
|
||||
```python
|
||||
# "Direct" function
|
||||
# `params` must be the first parameter
|
||||
async def do_something(params: FunctionCallParams, foo: int, bar: str = ""):
|
||||
"""
|
||||
Do something interesting.
|
||||
|
||||
Args:
|
||||
foo (int): The foo to do something interesting with.
|
||||
bar (string): The bar to do something interesting with.
|
||||
"""
|
||||
|
||||
result = await process(foo, bar)
|
||||
await params.result_callback({"result": result})
|
||||
|
||||
# ...
|
||||
|
||||
llm.register_direct_function(do_something)
|
||||
|
||||
# ...
|
||||
|
||||
tools = ToolsSchema(standard_tools=[do_something])
|
||||
```
|
||||
|
||||
- `user_id` is now populated in the `TranscriptionFrame` and
|
||||
`InterimTranscriptionFrame` when using a transport that provides a
|
||||
`user_id`, like `DailyTransport` or `LiveKitTransport`.
|
||||
|
||||
- Added `watchdog_coroutine()`. This is a watchdog helper for couroutines. So,
|
||||
if you have a coroutine that is waiting for a result and that takes a long
|
||||
time, you will need to wrap it with `watchdog_coroutine()` so the watchdog
|
||||
timers are reset regularly.
|
||||
|
||||
- Added `session_token` parameter to `AWSNovaSonicLLMService`.
|
||||
|
||||
### Changed
|
||||
|
||||
- Updated all the services to use the new `SOXRStreamAudioResampler`, ensuring smooth
|
||||
transitions and eliminating clicks.
|
||||
|
||||
- Upgraded `daily-python` to 0.19.4.
|
||||
|
||||
- Updated `google` optional dependency to use `google-genai` version `1.24.0`.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue where audio would get stuck in the queue when an interrupt occurs
|
||||
during Azure TTS synthesis.
|
||||
|
||||
- Fixed a race condition that occurs in Python 3.10+ where the task could miss
|
||||
the `CancelledError` and continue running indefinitely, freezing the pipeline.
|
||||
|
||||
- Fixed a `AWSNovaSonicLLMService` issue introduced in 0.0.72.
|
||||
|
||||
## [0.0.73] - 2025-06-26
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue introduced in 0.0.72 that would cause `ElevenLabsTTSService`,
|
||||
`GladiaSTTService`, `NeuphonicTTSService` and `OpenAIRealtimeBetaLLMService`
|
||||
to throw an error.
|
||||
|
||||
## [0.0.72] - 2025-06-26
|
||||
|
||||
### Added
|
||||
|
||||
108
CONTRIBUTING.md
108
CONTRIBUTING.md
@@ -43,8 +43,8 @@ We follow Google-style docstrings with these specific conventions:
|
||||
|
||||
**Regular Classes:**
|
||||
|
||||
- Class docstring describes the class purpose and documents all `__init__` parameters in an `Args:` section
|
||||
- No separate `__init__` docstring needed
|
||||
- Class docstring describes the class purpose and key functionality
|
||||
- `__init__` method has its own docstring with complete `Args:` section documenting all parameters
|
||||
- All public methods must have docstrings with `Args:` and `Returns:` sections as appropriate
|
||||
|
||||
**Dataclasses:**
|
||||
@@ -60,6 +60,39 @@ We follow Google-style docstrings with these specific conventions:
|
||||
|
||||
- Must have docstrings explaining what subclasses should implement
|
||||
|
||||
**`__init__.py` Files:**
|
||||
|
||||
- **Skip docstrings** for pure import/re-export modules
|
||||
- **Add brief docstrings** for top-level packages or those with initialization logic
|
||||
|
||||
**Enums:**
|
||||
|
||||
- Class docstring describes the enumeration purpose
|
||||
- Use `Parameters:` section to document each enum value and its meaning
|
||||
- No `__init__` docstring (Enums don't have custom constructors)
|
||||
|
||||
**Code Examples in Docstrings:**
|
||||
|
||||
- Use `Examples:` as a section header for multiple examples
|
||||
- Use descriptive text followed by double colons (`::`) for each example
|
||||
- **Always include a blank line after the `::"`**
|
||||
- Indent all code consistently within each block
|
||||
- Separate multiple examples with blank lines for readability
|
||||
|
||||
**Lists and Bullets in Docstrings:**
|
||||
|
||||
- Use dashes (`-`) for bullet points, not asterisks (`*`)
|
||||
- **Add a blank line before bullet lists** when they follow a colon
|
||||
- Use section headers like "Supported features:" or "Behavior:" before lists
|
||||
- For complex nested information, consider using paragraph format instead
|
||||
|
||||
**Deprecations:**
|
||||
|
||||
- Use `warnings.warn()` in code for runtime deprecation warnings
|
||||
- Add `.. deprecated::` directive in docstrings for documentation visibility
|
||||
- Include version information and describe current status
|
||||
- Describe parameters in present tense, use directive to indicate deprecation status
|
||||
|
||||
#### Examples:
|
||||
|
||||
```python
|
||||
@@ -67,14 +100,34 @@ We follow Google-style docstrings with these specific conventions:
|
||||
class MyService(BaseService):
|
||||
"""Description of what the service does.
|
||||
|
||||
Args:
|
||||
param1: Description of param1.
|
||||
param2: Description of param2. Defaults to True.
|
||||
**kwargs: Additional arguments passed to parent.
|
||||
Provides detailed explanation of the service's functionality,
|
||||
key features, and usage patterns.
|
||||
|
||||
Supported features:
|
||||
|
||||
- Feature one with detailed explanation
|
||||
- Feature two with additional context
|
||||
- Feature three for advanced use cases
|
||||
"""
|
||||
|
||||
def __init__(self, param1: str, param2: bool = True, **kwargs):
|
||||
# No docstring - parameters documented above
|
||||
def __init__(self, param1: str, old_param: str = None, **kwargs):
|
||||
"""Initialize the service.
|
||||
|
||||
Args:
|
||||
param1: Description of param1.
|
||||
old_param: Controls legacy behavior.
|
||||
|
||||
.. deprecated:: 1.2.0
|
||||
This parameter no longer has any effect and will be removed in version 2.0.
|
||||
|
||||
**kwargs: Additional arguments passed to parent.
|
||||
"""
|
||||
if old_param is not None:
|
||||
import warnings
|
||||
warnings.warn(
|
||||
"Parameter 'old_param' is deprecated and will be removed in version 2.0.",
|
||||
DeprecationWarning,
|
||||
)
|
||||
super().__init__(**kwargs)
|
||||
|
||||
@property
|
||||
@@ -97,20 +150,41 @@ class MyService(BaseService):
|
||||
"""
|
||||
pass
|
||||
|
||||
# Dataclass
|
||||
# Dataclass with code examples
|
||||
@dataclass
|
||||
class ConfigParams:
|
||||
"""Configuration parameters for the service.
|
||||
class MessageFrame:
|
||||
"""Frame containing messages in OpenAI format.
|
||||
|
||||
Supports both simple and content list message formats.
|
||||
|
||||
Example::
|
||||
|
||||
[
|
||||
{"role": "user", "content": "Hello"},
|
||||
{"role": "assistant", "content": "Hi there!"}
|
||||
]
|
||||
|
||||
Parameters:
|
||||
host: The host address.
|
||||
port: The port number. Defaults to 8080.
|
||||
timeout: Connection timeout in seconds.
|
||||
messages: List of messages in OpenAI format.
|
||||
"""
|
||||
|
||||
host: str
|
||||
port: int = 8080
|
||||
timeout: float = 30.0
|
||||
messages: List[dict]
|
||||
|
||||
# Enum class
|
||||
class Status(Enum):
|
||||
"""Status codes for processing operations.
|
||||
|
||||
Parameters:
|
||||
PENDING: Operation is queued but not started.
|
||||
RUNNING: Operation is currently in progress.
|
||||
COMPLETED: Operation finished successfully.
|
||||
FAILED: Operation encountered an error.
|
||||
"""
|
||||
|
||||
PENDING = "pending"
|
||||
RUNNING = "running"
|
||||
COMPLETED = "completed"
|
||||
FAILED = "failed"
|
||||
```
|
||||
|
||||
# Contributor Covenant Code of Conduct
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
build~=1.2.2
|
||||
coverage~=7.6.12
|
||||
coverage~=7.9.1
|
||||
grpcio-tools~=1.67.1
|
||||
pip-tools~=7.4.1
|
||||
pre-commit~=4.0.1
|
||||
pyright~=1.1.400
|
||||
pytest~=8.3.4
|
||||
pytest-asyncio~=0.25.3
|
||||
pre-commit~=4.2.0
|
||||
pyright~=1.1.402
|
||||
pytest~=8.4.1
|
||||
pytest-asyncio~=1.0.0
|
||||
pytest-aiohttp==1.1.0
|
||||
ruff~=0.11.13
|
||||
setuptools~=70.0.0
|
||||
setuptools_scm~=8.1.0
|
||||
python-dotenv~=1.0.1
|
||||
ruff~=0.12.1
|
||||
setuptools~=78.1.1
|
||||
setuptools_scm~=8.3.1
|
||||
python-dotenv~=1.1.1
|
||||
|
||||
168
docs/api/conf.py
168
docs/api/conf.py
@@ -26,18 +26,20 @@ extensions = [
|
||||
"sphinx.ext.intersphinx",
|
||||
]
|
||||
|
||||
suppress_warnings = [
|
||||
"autodoc.mocked_object",
|
||||
]
|
||||
|
||||
# Napoleon settings
|
||||
napoleon_google_docstring = True
|
||||
napoleon_numpy_docstring = False
|
||||
napoleon_include_init_with_doc = False
|
||||
napoleon_include_init_with_doc = True
|
||||
|
||||
# AutoDoc settings
|
||||
autodoc_default_options = {
|
||||
"members": True,
|
||||
"member-order": "bysource",
|
||||
"undoc-members": True,
|
||||
"exclude-members": "__weakref__,__init__",
|
||||
"no-index": True,
|
||||
"undoc-members": False,
|
||||
"exclude-members": "__weakref__,model_config",
|
||||
"show-inheritance": True,
|
||||
}
|
||||
|
||||
@@ -72,7 +74,6 @@ autodoc_mock_imports = [
|
||||
"langchain",
|
||||
"lmnt",
|
||||
"noisereduce",
|
||||
"openai",
|
||||
"openpipe",
|
||||
"simli",
|
||||
"soundfile",
|
||||
@@ -82,10 +83,6 @@ autodoc_mock_imports = [
|
||||
"tkinter",
|
||||
"daily",
|
||||
"daily_python",
|
||||
"pydantic.BaseModel",
|
||||
"pydantic.Field",
|
||||
"pydantic._internal._model_construction",
|
||||
"pydantic._internal._fields",
|
||||
# Moondream dependencies
|
||||
"torch",
|
||||
"transformers",
|
||||
@@ -168,85 +165,54 @@ autodoc_mock_imports = [
|
||||
"mcp.client.stdio",
|
||||
"mcp.ClientSession",
|
||||
"mcp.StdioServerParameters",
|
||||
# gstreamer
|
||||
"gi",
|
||||
"gi.require_version",
|
||||
"gi.repository",
|
||||
# Protobuf mocks
|
||||
"pipecat.frames.protobufs.frames_pb2",
|
||||
"pipecat.serializers.protobuf",
|
||||
"google.protobuf",
|
||||
"google.protobuf.descriptor",
|
||||
"google.protobuf.descriptor_pool",
|
||||
"google.protobuf.runtime_version",
|
||||
"google.protobuf.symbol_database",
|
||||
"google.protobuf.internal.builder",
|
||||
]
|
||||
|
||||
# HTML output settings
|
||||
html_theme = "sphinx_rtd_theme"
|
||||
html_static_path = ["_static"]
|
||||
autodoc_typehints = "description"
|
||||
autodoc_typehints = "signature" # Show type hints in the signature only, not in the docstring
|
||||
html_show_sphinx = False
|
||||
|
||||
|
||||
def verify_modules():
|
||||
"""Verify that required modules are available."""
|
||||
required_modules = {
|
||||
"services": [
|
||||
"assemblyai",
|
||||
"aws",
|
||||
"cartesia",
|
||||
"deepgram",
|
||||
"google",
|
||||
"lmnt",
|
||||
"riva",
|
||||
"simli",
|
||||
],
|
||||
"serializers": ["livekit"],
|
||||
"vad": ["silero", "vad_analyzer"],
|
||||
"transports": {
|
||||
"services": ["daily", "livekit"],
|
||||
"local": ["audio", "tk"],
|
||||
"network": ["fastapi_websocket", "websocket_server"],
|
||||
},
|
||||
}
|
||||
def import_core_modules():
|
||||
"""Import core pipecat modules for autodoc to discover."""
|
||||
core_modules = [
|
||||
"pipecat",
|
||||
"pipecat.frames",
|
||||
"pipecat.pipeline",
|
||||
"pipecat.processors",
|
||||
"pipecat.services",
|
||||
"pipecat.transports",
|
||||
"pipecat.audio",
|
||||
"pipecat.adapters",
|
||||
"pipecat.clocks",
|
||||
"pipecat.metrics",
|
||||
"pipecat.observers",
|
||||
"pipecat.serializers",
|
||||
"pipecat.sync",
|
||||
"pipecat.transcriptions",
|
||||
"pipecat.utils",
|
||||
]
|
||||
|
||||
# Skip importing modules that are in autodoc_mock_imports
|
||||
skipped_modules = set(autodoc_mock_imports)
|
||||
|
||||
missing = []
|
||||
for category, modules in required_modules.items():
|
||||
if isinstance(modules, dict):
|
||||
# Handle nested structure
|
||||
for subcategory, submodules in modules.items():
|
||||
for module in submodules:
|
||||
# Check if module is in autodoc_mock_imports
|
||||
if (
|
||||
f"pipecat.{category}.{subcategory}.{module}" in skipped_modules
|
||||
or module in skipped_modules
|
||||
):
|
||||
logger.info(
|
||||
f"Skipping import of mocked module: pipecat.{category}.{subcategory}.{module}"
|
||||
)
|
||||
continue
|
||||
|
||||
try:
|
||||
__import__(f"pipecat.{category}.{subcategory}.{module}")
|
||||
logger.info(
|
||||
f"Successfully imported pipecat.{category}.{subcategory}.{module}"
|
||||
)
|
||||
except (ImportError, TypeError, NameError) as e:
|
||||
missing.append(f"pipecat.{category}.{subcategory}.{module}")
|
||||
logger.warning(
|
||||
f"Optional module not available: pipecat.{category}.{subcategory}.{module} - {str(e)}"
|
||||
)
|
||||
else:
|
||||
# Handle flat structure
|
||||
for module in modules:
|
||||
# Check if module is in autodoc_mock_imports
|
||||
if f"pipecat.{category}.{module}" in skipped_modules or module in skipped_modules:
|
||||
logger.info(f"Skipping import of mocked module: pipecat.{category}.{module}")
|
||||
continue
|
||||
|
||||
try:
|
||||
__import__(f"pipecat.{category}.{module}")
|
||||
logger.info(f"Successfully imported pipecat.{category}.{module}")
|
||||
except (ImportError, TypeError, NameError) as e:
|
||||
missing.append(f"pipecat.{category}.{module}")
|
||||
logger.warning(
|
||||
f"Optional module not available: pipecat.{category}.{module} - {str(e)}"
|
||||
)
|
||||
|
||||
if missing:
|
||||
logger.warning(f"Some optional modules are not available: {missing}")
|
||||
for module_name in core_modules:
|
||||
try:
|
||||
__import__(module_name)
|
||||
logger.info(f"Successfully imported {module_name}")
|
||||
except ImportError as e:
|
||||
logger.warning(f"Failed to import {module_name}: {e}")
|
||||
|
||||
|
||||
def clean_title(title: str) -> str:
|
||||
@@ -258,39 +224,7 @@ def clean_title(title: str) -> str:
|
||||
parts = title.split(".")
|
||||
title = parts[-1]
|
||||
|
||||
# Special cases for service names and common acronyms
|
||||
special_cases = {
|
||||
"ai": "AI",
|
||||
"aws": "AWS",
|
||||
"api": "API",
|
||||
"vad": "VAD",
|
||||
"assemblyai": "AssemblyAI",
|
||||
"deepgram": "Deepgram",
|
||||
"elevenlabs": "ElevenLabs",
|
||||
"openai": "OpenAI",
|
||||
"openpipe": "OpenPipe",
|
||||
"playht": "PlayHT",
|
||||
"xtts": "XTTS",
|
||||
"lmnt": "LMNT",
|
||||
"stt": "STT",
|
||||
"tts": "TTS",
|
||||
"llm": "LLM",
|
||||
}
|
||||
|
||||
# Check if the entire title is a special case
|
||||
if title.lower() in special_cases:
|
||||
return special_cases[title.lower()]
|
||||
|
||||
# Otherwise, capitalize each word
|
||||
words = title.split("_")
|
||||
cleaned_words = []
|
||||
for word in words:
|
||||
if word.lower() in special_cases:
|
||||
cleaned_words.append(special_cases[word.lower()])
|
||||
else:
|
||||
cleaned_words.append(word.capitalize())
|
||||
|
||||
return " ".join(cleaned_words)
|
||||
return title
|
||||
|
||||
|
||||
def setup(app):
|
||||
@@ -315,9 +249,8 @@ def setup(app):
|
||||
|
||||
excludes = [
|
||||
str(project_root / "src/pipecat/pipeline/to_be_updated"),
|
||||
str(project_root / "src/pipecat/processors/gstreamer"),
|
||||
str(project_root / "src/pipecat/services/to_be_updated"),
|
||||
str(project_root / "src/pipecat/vad"), # deprecated
|
||||
str(project_root / "src/pipecat/examples"),
|
||||
str(project_root / "src/pipecat/tests"),
|
||||
"**/test_*.py",
|
||||
"**/tests/*.py",
|
||||
]
|
||||
@@ -358,5 +291,4 @@ def setup(app):
|
||||
logger.error(f"Error generating API documentation: {e}", exc_info=True)
|
||||
|
||||
|
||||
# Run module verification
|
||||
verify_modules()
|
||||
import_core_modules()
|
||||
|
||||
@@ -1,57 +1,17 @@
|
||||
Pipecat API Reference Docs
|
||||
==========================
|
||||
Pipecat API Reference
|
||||
=====================
|
||||
|
||||
Welcome to Pipecat's API reference documentation!
|
||||
Welcome to the Pipecat API reference.
|
||||
|
||||
Pipecat is an open source framework for building voice and multimodal assistants.
|
||||
It provides a flexible pipeline architecture for connecting various AI services,
|
||||
audio processing, and transport layers.
|
||||
Use the navigation on the left to browse modules, or search using the search box.
|
||||
|
||||
**New to Pipecat?** Check out the `main documentation <https://docs.pipecat.ai>`_ for tutorials, guides, and client SDK information.
|
||||
|
||||
Quick Links
|
||||
-----------
|
||||
|
||||
* `GitHub Repository <https://github.com/pipecat-ai/pipecat>`_
|
||||
* `Website <https://pipecat.ai>`_
|
||||
|
||||
API Reference
|
||||
-------------
|
||||
|
||||
Core Components
|
||||
~~~~~~~~~~~~~~~
|
||||
|
||||
* :mod:`Frames <pipecat.frames>`
|
||||
* :mod:`Processors <pipecat.processors>`
|
||||
* :mod:`Pipeline <pipecat.pipeline>`
|
||||
|
||||
Audio Processing
|
||||
~~~~~~~~~~~~~~~~
|
||||
|
||||
* :mod:`Audio <pipecat.audio>`
|
||||
|
||||
Services
|
||||
~~~~~~~~
|
||||
|
||||
* :mod:`Services <pipecat.services>`
|
||||
|
||||
Transport & Serialization
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
* :mod:`Transports <pipecat.transports>`
|
||||
* :mod:`Local <pipecat.transports.local>`
|
||||
* :mod:`Network <pipecat.transports.network>`
|
||||
* :mod:`Services <pipecat.transports.services>`
|
||||
* :mod:`Serializers <pipecat.serializers>`
|
||||
|
||||
Utilities
|
||||
~~~~~~~~~
|
||||
|
||||
* :mod:`Adapters <pipecat.adapters>`
|
||||
* :mod:`Clocks <pipecat.clocks>`
|
||||
* :mod:`Metrics <pipecat.metrics>`
|
||||
* :mod:`Observers <pipecat.observers>`
|
||||
* :mod:`Sync <pipecat.sync>`
|
||||
* :mod:`Transcriptions <pipecat.transcriptions>`
|
||||
* :mod:`Utils <pipecat.utils>`
|
||||
* `Join our Community <https://discord.gg/pipecat>`_
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 3
|
||||
@@ -71,11 +31,4 @@ Utilities
|
||||
Sync <api/pipecat.sync>
|
||||
Transcriptions <api/pipecat.transcriptions>
|
||||
Transports <api/pipecat.transports>
|
||||
Utils <api/pipecat.utils>
|
||||
|
||||
Indices and tables
|
||||
==================
|
||||
|
||||
* :ref:`genindex`
|
||||
* :ref:`modindex`
|
||||
* :ref:`search`
|
||||
Utils <api/pipecat.utils>
|
||||
@@ -4364,9 +4364,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/brace-expansion": {
|
||||
"version": "1.1.11",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
|
||||
"integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
|
||||
"version": "1.1.12",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
|
||||
"integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0",
|
||||
"concat-map": "0.0.1"
|
||||
@@ -6081,9 +6081,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/glob/node_modules/brace-expansion": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
|
||||
"integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
|
||||
"version": "2.0.2",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz",
|
||||
"integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==",
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0"
|
||||
}
|
||||
|
||||
@@ -2,4 +2,4 @@ aiofiles
|
||||
python-dotenv
|
||||
fastapi[all]
|
||||
uvicorn
|
||||
pipecat-ai[daily,deepgram,openai,silero,cartesia]
|
||||
pipecat-ai[daily,deepgram,openai,silero,cartesia,soundfile]
|
||||
|
||||
@@ -215,10 +215,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/env": {
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/env/-/env-14.2.26.tgz",
|
||||
"integrity": "sha512-vO//GJ/YBco+H7xdQhzJxF7ub3SUwft76jwaeOyVVQFHCi5DCnkP16WHB+JBylo4vOKPoZBlR94Z8xBxNBdNJA==",
|
||||
"license": "MIT"
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/env/-/env-14.2.30.tgz",
|
||||
"integrity": "sha512-KBiBKrDY6kxTQWGzKjQB7QirL3PiiOkV7KW98leHFjtVRKtft76Ra5qSA/SL75xT44dp6hOcqiiJ6iievLOYug=="
|
||||
},
|
||||
"node_modules/@next/eslint-plugin-next": {
|
||||
"version": "14.2.25",
|
||||
@@ -231,13 +230,12 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-darwin-arm64": {
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-14.2.26.tgz",
|
||||
"integrity": "sha512-zDJY8gsKEseGAxG+C2hTMT0w9Nk9N1Sk1qV7vXYz9MEiyRoF5ogQX2+vplyUMIfygnjn9/A04I6yrUTRTuRiyQ==",
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-14.2.30.tgz",
|
||||
"integrity": "sha512-EAqfOTb3bTGh9+ewpO/jC59uACadRHM6TSA9DdxJB/6gxOpyV+zrbqeXiFTDy9uV6bmipFDkfpAskeaDcO+7/g==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
@@ -247,13 +245,12 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-darwin-x64": {
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-14.2.26.tgz",
|
||||
"integrity": "sha512-U0adH5ryLfmTDkahLwG9sUQG2L0a9rYux8crQeC92rPhi3jGQEY47nByQHrVrt3prZigadwj/2HZ1LUUimuSbg==",
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-14.2.30.tgz",
|
||||
"integrity": "sha512-TyO7Wz1IKE2kGv8dwQ0bmPL3s44EKVencOqwIY69myoS3rdpO1NPg5xPM5ymKu7nfX4oYJrpMxv8G9iqLsnL4A==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
@@ -263,13 +260,12 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-arm64-gnu": {
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-14.2.26.tgz",
|
||||
"integrity": "sha512-SINMl1I7UhfHGM7SoRiw0AbwnLEMUnJ/3XXVmhyptzriHbWvPPbbm0OEVG24uUKhuS1t0nvN/DBvm5kz6ZIqpg==",
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-14.2.30.tgz",
|
||||
"integrity": "sha512-I5lg1fgPJ7I5dk6mr3qCH1hJYKJu1FsfKSiTKoYwcuUf53HWTrEkwmMI0t5ojFKeA6Vu+SfT2zVy5NS0QLXV4Q==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
@@ -279,13 +275,12 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-arm64-musl": {
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-14.2.26.tgz",
|
||||
"integrity": "sha512-s6JaezoyJK2DxrwHWxLWtJKlqKqTdi/zaYigDXUJ/gmx/72CrzdVZfMvUc6VqnZ7YEvRijvYo+0o4Z9DencduA==",
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-14.2.30.tgz",
|
||||
"integrity": "sha512-8GkNA+sLclQyxgzCDs2/2GSwBc92QLMrmYAmoP2xehe5MUKBLB2cgo34Yu242L1siSkwQkiV4YLdCnjwc/Micw==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
@@ -295,13 +290,12 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-x64-gnu": {
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-14.2.26.tgz",
|
||||
"integrity": "sha512-FEXeUQi8/pLr/XI0hKbe0tgbLmHFRhgXOUiPScz2hk0hSmbGiU8aUqVslj/6C6KA38RzXnWoJXo4FMo6aBxjzg==",
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-14.2.30.tgz",
|
||||
"integrity": "sha512-8Ly7okjssLuBoe8qaRCcjGtcMsv79hwzn/63wNeIkzJVFVX06h5S737XNr7DZwlsbTBDOyI6qbL2BJB5n6TV/w==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
@@ -311,13 +305,12 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-x64-musl": {
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-14.2.26.tgz",
|
||||
"integrity": "sha512-BUsomaO4d2DuXhXhgQCVt2jjX4B4/Thts8nDoIruEJkhE5ifeQFtvW5c9JkdOtYvE5p2G0hcwQ0UbRaQmQwaVg==",
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-14.2.30.tgz",
|
||||
"integrity": "sha512-dBmV1lLNeX4mR7uI7KNVHsGQU+OgTG5RGFPi3tBJpsKPvOPtg9poyav/BYWrB3GPQL4dW5YGGgalwZ79WukbKQ==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
@@ -327,13 +320,12 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-win32-arm64-msvc": {
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-14.2.26.tgz",
|
||||
"integrity": "sha512-5auwsMVzT7wbB2CZXQxDctpWbdEnEW/e66DyXO1DcgHxIyhP06awu+rHKshZE+lPLIGiwtjo7bsyeuubewwxMw==",
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-14.2.30.tgz",
|
||||
"integrity": "sha512-6MMHi2Qc1Gkq+4YLXAgbYslE1f9zMGBikKMdmQRHXjkGPot1JY3n5/Qrbg40Uvbi8//wYnydPnyvNhI1DMUW1g==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
@@ -343,13 +335,12 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-win32-ia32-msvc": {
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-ia32-msvc/-/swc-win32-ia32-msvc-14.2.26.tgz",
|
||||
"integrity": "sha512-GQWg/Vbz9zUGi9X80lOeGsz1rMH/MtFO/XqigDznhhhTfDlDoynCM6982mPCbSlxJ/aveZcKtTlwfAjwhyxDpg==",
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-ia32-msvc/-/swc-win32-ia32-msvc-14.2.30.tgz",
|
||||
"integrity": "sha512-pVZMnFok5qEX4RT59mK2hEVtJX+XFfak+/rjHpyFh7juiT52r177bfFKhnlafm0UOSldhXjj32b+LZIOdswGTg==",
|
||||
"cpu": [
|
||||
"ia32"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
@@ -359,13 +350,12 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-win32-x64-msvc": {
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-14.2.26.tgz",
|
||||
"integrity": "sha512-2rdB3T1/Gp7bv1eQTTm9d1Y1sv9UuJ2LAwOE0Pe2prHKe32UNscj7YS13fRB37d0GAiGNR+Y7ZcW8YjDI8Ns0w==",
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-14.2.30.tgz",
|
||||
"integrity": "sha512-4KCo8hMZXMjpTzs3HOqOGYYwAXymXIy7PEPAXNEcEOyKqkjiDlECumrWziy+JEF0Oi4ILHGxzgQ3YiMGG2t/Lg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
@@ -620,11 +610,10 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@typescript-eslint/typescript-estree/node_modules/brace-expansion": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
|
||||
"integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
|
||||
"version": "2.0.2",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz",
|
||||
"integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0"
|
||||
}
|
||||
@@ -1224,11 +1213,10 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/brace-expansion": {
|
||||
"version": "1.1.11",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
|
||||
"integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
|
||||
"version": "1.1.12",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
|
||||
"integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0",
|
||||
"concat-map": "0.0.1"
|
||||
@@ -2614,11 +2602,10 @@
|
||||
}
|
||||
},
|
||||
"node_modules/glob/node_modules/brace-expansion": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
|
||||
"integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
|
||||
"version": "2.0.2",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz",
|
||||
"integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0"
|
||||
}
|
||||
@@ -3613,12 +3600,11 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/next": {
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/next/-/next-14.2.26.tgz",
|
||||
"integrity": "sha512-b81XSLihMwCfwiUVRRja3LphLo4uBBMZEzBBWMaISbKTwOmq3wPknIETy/8000tr7Gq4WmbuFYPS7jOYIf+ZJw==",
|
||||
"license": "MIT",
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/next/-/next-14.2.30.tgz",
|
||||
"integrity": "sha512-+COdu6HQrHHFQ1S/8BBsCag61jZacmvbuL2avHvQFbWa2Ox7bE+d8FyNgxRLjXQ5wtPyQwEmk85js/AuaG2Sbg==",
|
||||
"dependencies": {
|
||||
"@next/env": "14.2.26",
|
||||
"@next/env": "14.2.30",
|
||||
"@swc/helpers": "0.5.5",
|
||||
"busboy": "1.6.0",
|
||||
"caniuse-lite": "^1.0.30001579",
|
||||
@@ -3633,15 +3619,15 @@
|
||||
"node": ">=18.17.0"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@next/swc-darwin-arm64": "14.2.26",
|
||||
"@next/swc-darwin-x64": "14.2.26",
|
||||
"@next/swc-linux-arm64-gnu": "14.2.26",
|
||||
"@next/swc-linux-arm64-musl": "14.2.26",
|
||||
"@next/swc-linux-x64-gnu": "14.2.26",
|
||||
"@next/swc-linux-x64-musl": "14.2.26",
|
||||
"@next/swc-win32-arm64-msvc": "14.2.26",
|
||||
"@next/swc-win32-ia32-msvc": "14.2.26",
|
||||
"@next/swc-win32-x64-msvc": "14.2.26"
|
||||
"@next/swc-darwin-arm64": "14.2.30",
|
||||
"@next/swc-darwin-x64": "14.2.30",
|
||||
"@next/swc-linux-arm64-gnu": "14.2.30",
|
||||
"@next/swc-linux-arm64-musl": "14.2.30",
|
||||
"@next/swc-linux-x64-gnu": "14.2.30",
|
||||
"@next/swc-linux-x64-musl": "14.2.30",
|
||||
"@next/swc-win32-arm64-msvc": "14.2.30",
|
||||
"@next/swc-win32-ia32-msvc": "14.2.30",
|
||||
"@next/swc-win32-x64-msvc": "14.2.30"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@opentelemetry/api": "^1.1.0",
|
||||
|
||||
@@ -61,7 +61,12 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
credentials=os.getenv("GOOGLE_TEST_CREDENTIALS"),
|
||||
)
|
||||
|
||||
llm = GoogleLLMService(api_key=os.getenv("GOOGLE_API_KEY"))
|
||||
llm = GoogleLLMService(
|
||||
api_key=os.getenv("GOOGLE_API_KEY"),
|
||||
model="gemini-2.5-flash",
|
||||
# turn on thinking if you want it
|
||||
# params=GoogleLLMService.InputParams(extra={"thinking_config": {"thinking_budget": 4096}}),)
|
||||
)
|
||||
|
||||
messages = [
|
||||
{
|
||||
|
||||
@@ -214,7 +214,12 @@ transport_params = {
|
||||
async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
llm = GoogleLLMService(api_key=os.getenv("GOOGLE_API_KEY"), model="gemini-2.0-flash-001")
|
||||
llm = GoogleLLMService(
|
||||
api_key=os.getenv("GOOGLE_API_KEY"),
|
||||
model="gemini-2.5-flash",
|
||||
# turn on thinking if you want it
|
||||
# params=GoogleLLMService.InputParams(extra={"thinking_config": {"thinking_budget": 4096}}),
|
||||
)
|
||||
|
||||
tts = GoogleTTSService(
|
||||
voice_id="en-US-Chirp3-HD-Charon",
|
||||
|
||||
146
examples/foundational/14t-function-calling-direct.py
Normal file
146
examples/foundational/14t-function-calling-direct.py
Normal file
@@ -0,0 +1,146 @@
|
||||
#
|
||||
# Copyright (c) 2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.llm_service import FunctionCallParams
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
async def get_current_weather(params: FunctionCallParams, location: str, format: str):
|
||||
"""
|
||||
Get the current weather.
|
||||
|
||||
Args:
|
||||
location (str): The city and state, e.g. "San Francisco, CA".
|
||||
format (str): The temperature unit to use. Must be either "celsius" or "fahrenheit". Infer this from the user's location.
|
||||
"""
|
||||
await params.result_callback({"conditions": "nice", "temperature": "75"})
|
||||
|
||||
|
||||
async def get_restaurant_recommendation(params: FunctionCallParams, location: str):
|
||||
"""
|
||||
Get a restaurant recommendation.
|
||||
|
||||
Args:
|
||||
location (str): The city and state, e.g. "San Francisco, CA".
|
||||
"""
|
||||
await params.result_callback({"name": "The Golden Dragon"})
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
# You can also register a function_name of None to get all functions
|
||||
# sent to the same callback with an additional function_name parameter.
|
||||
llm.register_direct_function(get_current_weather)
|
||||
llm.register_direct_function(get_restaurant_recommendation)
|
||||
|
||||
@llm.event_handler("on_function_calls_started")
|
||||
async def on_function_calls_started(service, function_calls):
|
||||
await tts.queue_frame(TTSSpeakFrame("Let me check on that."))
|
||||
|
||||
tools = ToolsSchema(standard_tools=[get_current_weather, get_restaurant_recommendation])
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
242
examples/foundational/26f-gemini-multimodal-live-files-api.py
Normal file
242
examples/foundational/26f-gemini-multimodal-live-files-api.py
Normal file
@@ -0,0 +1,242 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.gemini_multimodal_live.gemini import (
|
||||
GeminiMultimodalLiveContext,
|
||||
GeminiMultimodalLiveLLMService,
|
||||
)
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
video_in_enabled=False,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
video_in_enabled=False,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
video_in_enabled=False,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)),
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
sample_file_path = ""
|
||||
|
||||
|
||||
async def create_sample_file():
|
||||
if sample_file_path:
|
||||
return sample_file_path
|
||||
else:
|
||||
"""Create a sample text file for testing the File API."""
|
||||
content = """# Sample Document for Gemini File API Test
|
||||
|
||||
This is a test document to demonstrate the Gemini File API functionality.
|
||||
|
||||
## Key Information:
|
||||
- This document was created for testing purposes
|
||||
- It contains information about AI assistants
|
||||
- The document should be analyzed by Gemini
|
||||
- The secret phrase for the test is "Pineapple Pizza"
|
||||
|
||||
## AI Assistant Capabilities:
|
||||
1. Natural language processing
|
||||
2. File analysis and understanding
|
||||
3. Context-aware conversations
|
||||
4. Multi-modal interactions
|
||||
|
||||
## Conclusion:
|
||||
This document serves as a test case for the Gemini File API integration with Pipecat.
|
||||
The AI should be able to reference and discuss the contents of this file.
|
||||
"""
|
||||
|
||||
# Create a temporary file
|
||||
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
|
||||
f.write(content)
|
||||
return f.name
|
||||
|
||||
|
||||
async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool):
|
||||
logger.info(f"Starting File API bot")
|
||||
|
||||
# Create a sample file to upload
|
||||
sample_file_path = await create_sample_file()
|
||||
logger.info(f"Created sample file: {sample_file_path}")
|
||||
|
||||
system_instruction = """
|
||||
You are a helpful AI assistant with access to a document that has been uploaded for analysis.
|
||||
|
||||
The document contains test information.
|
||||
You should be able to:
|
||||
- Reference and discuss the contents of the uploaded document
|
||||
- Answer questions about what's in the document
|
||||
- Use the information from the document in our conversation
|
||||
|
||||
Your output will be converted to audio so don't include special characters in your answers.
|
||||
Be friendly and demonstrate your ability to work with the uploaded file.
|
||||
"""
|
||||
|
||||
# Initialize Gemini service with File API support
|
||||
llm = GeminiMultimodalLiveLLMService(
|
||||
api_key=os.getenv("GOOGLE_API_KEY"),
|
||||
system_instruction=system_instruction,
|
||||
voice_id="Charon", # Aoede, Charon, Fenrir, Kore, Puck
|
||||
transcribe_user_audio=True,
|
||||
)
|
||||
|
||||
# Upload the sample file to Gemini File API
|
||||
logger.info("Uploading file to Gemini File API...")
|
||||
file_info = None
|
||||
try:
|
||||
file_info = await llm.file_api.upload_file(
|
||||
sample_file_path, display_name="Sample Test Document"
|
||||
)
|
||||
logger.info(f"File uploaded successfully: {file_info['file']['name']}")
|
||||
|
||||
# Get file URI and mime type
|
||||
file_uri = file_info["file"]["uri"]
|
||||
mime_type = "text/plain"
|
||||
|
||||
# Create context with file reference
|
||||
context = OpenAILLMContext(
|
||||
[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Greet the user and let them know you have access to a document they can ask you about. Mention that you can discuss its contents.",
|
||||
},
|
||||
{
|
||||
"type": "file_data",
|
||||
"file_data": {"mime_type": mime_type, "file_uri": file_uri},
|
||||
},
|
||||
],
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
logger.info("File reference added to conversation context")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error uploading file: {e}")
|
||||
# Continue with a basic context if file upload fails
|
||||
context = OpenAILLMContext(
|
||||
[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Greet the user and explain that there was an issue with file upload, but you're ready to help with other tasks.",
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
# Create context aggregator
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
# Build the pipeline
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
transport.output(),
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
# Configure the pipeline task
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
# Handle client connection event
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation using standard context frame
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
# Handle client disconnection events
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
|
||||
@transport.event_handler("on_client_closed")
|
||||
async def on_client_closed(transport, client):
|
||||
logger.info(f"Client closed connection")
|
||||
await task.cancel()
|
||||
|
||||
# Run the pipeline
|
||||
runner = PipelineRunner(handle_sigint=False)
|
||||
await runner.run(task)
|
||||
|
||||
# Clean up: delete the uploaded file and temporary file
|
||||
if file_info:
|
||||
try:
|
||||
await llm.file_api.delete_file(file_info["file"]["name"])
|
||||
logger.info("Cleaned up uploaded file from Gemini")
|
||||
except Exception as e:
|
||||
logger.error(f"Error cleaning up file: {e}")
|
||||
|
||||
# Remove temporary file
|
||||
try:
|
||||
os.unlink(sample_file_path)
|
||||
logger.info("Cleaned up temporary file")
|
||||
except Exception as e:
|
||||
logger.error(f"Error removing temporary file: {e}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
|
||||
upload_example_file = input("""
|
||||
|
||||
Please pass in a TEXT filepath to test upload.
|
||||
NOTE: Files are stored on Google's servers for 48 hours.
|
||||
|
||||
Press Enter to use a default test file.
|
||||
|
||||
text filepath : """)
|
||||
if upload_example_file:
|
||||
print(f"Uploading file: {upload_example_file}")
|
||||
sample_file_path = upload_example_file.strip()
|
||||
else:
|
||||
print(f"Using default file")
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
133
examples/foundational/39c-mcp-run-http.py
Normal file
133
examples/foundational/39c-mcp-run-http.py
Normal file
@@ -0,0 +1,133 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from mcp.client.session_group import StreamableHttpParameters
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.google.llm import GoogleLLMService
|
||||
from pipecat.services.mcp_service import MCPClient
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
llm = GoogleLLMService(api_key=os.getenv("GOOGLE_API_KEY"), model="gemini-2.0-flash")
|
||||
|
||||
try:
|
||||
# Github MCP docs: https://github.com/github/github-mcp-server
|
||||
# Enable Github Copilot on your GitHub account. Free tier is ok. (https://github.com/settings/copilot)
|
||||
# Generate a personal access token. It must be a Fine-grained token, classic tokens are not supported. (https://github.com/settings/personal-access-tokens)
|
||||
# Set permissions you want to use (eg. "all repositories", "profile: read/write", etc)
|
||||
mcp = MCPClient(
|
||||
server_params=StreamableHttpParameters(
|
||||
url="https://api.githubcopilot.com/mcp/",
|
||||
headers={"Authorization": f"Bearer {os.getenv('GITHUB_PERSONAL_ACCESS_TOKEN')}"},
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"error setting up mcp")
|
||||
logger.exception("error trace:")
|
||||
|
||||
tools = await mcp.register_tools(llm)
|
||||
|
||||
system = f"""
|
||||
You are a helpful LLM in a WebRTC call.
|
||||
Your goal is to answer questions about the user's GitHub repositories and account.
|
||||
You have access to a number of tools provided by Github. Use any and all tools to help users.
|
||||
Your output will be converted to audio so don't include special characters in your answers.
|
||||
Don't overexplain what you are doing.
|
||||
Just respond with short sentences when you are carrying out tool calls.
|
||||
"""
|
||||
|
||||
messages = [{"role": "system", "content": system}]
|
||||
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt,
|
||||
context_aggregator.user(), # User spoken responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses and tool context
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected: {client}")
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
@@ -102,6 +102,7 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
|
||||
access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
|
||||
region=os.getenv("AWS_REGION"), # as of 2025-05-06, us-east-1 is the only supported region
|
||||
session_token=os.getenv("AWS_SESSION_TOKEN"),
|
||||
voice_id="tiffany", # matthew, tiffany, amy
|
||||
# you could choose to pass instruction here rather than via context
|
||||
# system_instruction=system_instruction
|
||||
|
||||
@@ -10,8 +10,8 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.interruptions.min_words_interruption_strategy import MinWordsInterruptionStrategy
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import MinWordsInterruptionStrategy
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
|
||||
@@ -191,7 +191,17 @@ class WebsocketClientApp {
|
||||
const startTime = Date.now();
|
||||
|
||||
this.recordingSerializer = new RecordingSerializer()
|
||||
const transport = this.ENABLE_RECORDING_MODE ? new WebSocketTransport({serializer: this.recordingSerializer}) : new WebSocketTransport();
|
||||
const transport = this.ENABLE_RECORDING_MODE ?
|
||||
new WebSocketTransport({
|
||||
serializer: this.recordingSerializer,
|
||||
recorderSampleRate: 8000,
|
||||
playerSampleRate:8000
|
||||
}) :
|
||||
new WebSocketTransport({
|
||||
serializer: new ProtobufFrameSerializer(),
|
||||
recorderSampleRate: 8000,
|
||||
playerSampleRate:8000
|
||||
});
|
||||
this.websocketTransport = transport
|
||||
|
||||
const RTVIConfig: RTVIClientOptions = {
|
||||
|
||||
4
examples/freeze-test/env.example
Normal file
4
examples/freeze-test/env.example
Normal file
@@ -0,0 +1,4 @@
|
||||
SENTRY_DSN=
|
||||
DEEPGRAM_API_KEY=
|
||||
CARTESIA_API_KEY=
|
||||
OPENAI_API_KEY=
|
||||
@@ -18,7 +18,6 @@ from fastapi import FastAPI, Request, WebSocket
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import RedirectResponse
|
||||
from loguru import logger
|
||||
from pipecat_ai_small_webrtc_prebuilt.frontend import SmallWebRTCPrebuiltUI
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import (
|
||||
@@ -27,11 +26,13 @@ from pipecat.frames.frames import (
|
||||
Frame,
|
||||
InterimTranscriptionFrame,
|
||||
LLMFullResponseEndFrame,
|
||||
LLMMessagesFrame,
|
||||
StartFrame,
|
||||
StartInterruptionFrame,
|
||||
StopFrame,
|
||||
StopInterruptionFrame,
|
||||
TranscriptionFrame,
|
||||
TTSSpeakFrame,
|
||||
UserStartedSpeakingFrame,
|
||||
UserStoppedSpeakingFrame,
|
||||
)
|
||||
@@ -47,6 +48,7 @@ from pipecat.processors.aggregators.openai_llm_context import (
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.processors.frameworks.rtvi import RTVIConfig, RTVIProcessor
|
||||
from pipecat.processors.metrics.sentry import SentryMetrics
|
||||
from pipecat.processors.user_idle_processor import UserIdleProcessor
|
||||
from pipecat.serializers.protobuf import ProtobufFrameSerializer
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
@@ -78,9 +80,6 @@ app.add_middleware(
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# Mount the frontend at /
|
||||
app.mount("/client", SmallWebRTCPrebuiltUI)
|
||||
|
||||
|
||||
class SimulateFreezeInput(FrameProcessor):
|
||||
def __init__(
|
||||
@@ -188,6 +187,37 @@ async def run_example(websocket_client):
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
async def handle_user_idle(user_idle: UserIdleProcessor, retry_count: int) -> bool:
|
||||
if retry_count == 1:
|
||||
# First attempt: Add a gentle prompt to the conversation
|
||||
messages.append(
|
||||
{
|
||||
"role": "system",
|
||||
"content": "The user has been quiet. Politely and briefly ask if they're still there.",
|
||||
}
|
||||
)
|
||||
await user_idle.push_frame(LLMMessagesFrame(messages))
|
||||
return True
|
||||
elif retry_count == 2:
|
||||
# Second attempt: More direct prompt
|
||||
messages.append(
|
||||
{
|
||||
"role": "system",
|
||||
"content": "The user is still inactive. Ask if they'd like to continue our conversation.",
|
||||
}
|
||||
)
|
||||
await user_idle.push_frame(LLMMessagesFrame(messages))
|
||||
return True
|
||||
else:
|
||||
# Third attempt: End the conversation
|
||||
await user_idle.push_frame(
|
||||
TTSSpeakFrame("It seems like you're busy right now. Have a nice day!")
|
||||
)
|
||||
await task.queue_frame(EndFrame())
|
||||
return False
|
||||
|
||||
user_idle = UserIdleProcessor(callback=handle_user_idle, timeout=10.0)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
@@ -222,6 +252,7 @@ async def run_example(websocket_client):
|
||||
stt,
|
||||
],
|
||||
),
|
||||
user_idle,
|
||||
rtvi,
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
@@ -238,6 +269,8 @@ async def run_example(websocket_client):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
audio_in_sample_rate=8000,
|
||||
audio_out_sample_rate=8000,
|
||||
),
|
||||
idle_timeout_secs=120,
|
||||
observers=[
|
||||
@@ -249,6 +282,10 @@ async def run_example(websocket_client):
|
||||
# LLMTextFrame: None,
|
||||
OpenAILLMContextFrame: None,
|
||||
LLMFullResponseEndFrame: None,
|
||||
UserStartedSpeakingFrame: None,
|
||||
UserStoppedSpeakingFrame: None,
|
||||
StartInterruptionFrame: None,
|
||||
StopInterruptionFrame: None,
|
||||
},
|
||||
exclude_fields={
|
||||
"result",
|
||||
|
||||
4
examples/freeze-test/requirements.txt
Normal file
4
examples/freeze-test/requirements.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
python-dotenv
|
||||
fastapi[all]
|
||||
uvicorn
|
||||
pipecat-ai[silero,websocket,openai, deepgram, cartesia, sentry]
|
||||
@@ -1,6 +1,6 @@
|
||||
fastapi
|
||||
uvicorn
|
||||
python-dotenv
|
||||
pipecat-ai[webrtc,silero,cartesia,deepgram,openai,tracing]
|
||||
pipecat-ai[daily,webrtc,silero,cartesia,deepgram,openai,tracing]
|
||||
pipecat-ai-small-webrtc-prebuilt
|
||||
opentelemetry-exporter-otlp-proto-grpc
|
||||
@@ -26,7 +26,7 @@ Create a `.env` file with your API keys to enable tracing:
|
||||
```
|
||||
ENABLE_TRACING=true
|
||||
# OTLP endpoint for Langfuse
|
||||
OTEL_EXPORTER_OTLP_ENDPOINT=http://cloud.langfuse.com/api/public/otel
|
||||
OTEL_EXPORTER_OTLP_ENDPOINT=https://cloud.langfuse.com/api/public/otel
|
||||
OTEL_EXPORTER_OTLP_HEADERS=Authorization=Basic%20<base64_encoded_api_key>
|
||||
# Set to any value to enable console output for debugging
|
||||
# OTEL_CONSOLE_EXPORT=true
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
fastapi
|
||||
uvicorn
|
||||
python-dotenv
|
||||
pipecat-ai[webrtc,silero,cartesia,deepgram,openai,tracing]
|
||||
pipecat-ai[daily,webrtc,silero,cartesia,deepgram,openai,tracing]
|
||||
pipecat-ai-small-webrtc-prebuilt
|
||||
opentelemetry-exporter-otlp-proto-http
|
||||
@@ -1,4 +1,4 @@
|
||||
pipecat-ai[daily,elevenlabs,openai,silero]
|
||||
pipecat-ai[daily,cartesia,openai,silero]
|
||||
fastapi==0.115.6
|
||||
uvicorn
|
||||
python-dotenv
|
||||
|
||||
@@ -15,7 +15,6 @@
|
||||
90031FC22C616EE900408370 /* SimpleChatbotUITests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 90031FC12C616EE900408370 /* SimpleChatbotUITests.swift */; };
|
||||
90031FC42C616EE900408370 /* SimpleChatbotUITestsLaunchTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 90031FC32C616EE900408370 /* SimpleChatbotUITestsLaunchTests.swift */; };
|
||||
90031FDC2C6D5DD700408370 /* ToastModifier.swift in Sources */ = {isa = PBXBuildFile; fileRef = 90031FDB2C6D5DD700408370 /* ToastModifier.swift */; };
|
||||
907C98842D37E6AF0079441F /* PipecatClientIOSDaily in Frameworks */ = {isa = PBXBuildFile; productRef = 907C98832D37E6AF0079441F /* PipecatClientIOSDaily */; };
|
||||
90ABB98E2C735ED6000D9CC7 /* MeetingView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 90ABB98D2C735ED6000D9CC7 /* MeetingView.swift */; };
|
||||
90ABB9902C736A8B000D9CC7 /* WaveformView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 90ABB98F2C736A8B000D9CC7 /* WaveformView.swift */; };
|
||||
90ABB9932C73820D000D9CC7 /* MicrophoneView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 90ABB9922C73820D000D9CC7 /* MicrophoneView.swift */; };
|
||||
@@ -25,6 +24,8 @@
|
||||
90ABB9A32C74E1CE000D9CC7 /* SettingsView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 90ABB9A22C74E1CE000D9CC7 /* SettingsView.swift */; };
|
||||
90ABB9A62C74EA8A000D9CC7 /* SettingsPreference.swift in Sources */ = {isa = PBXBuildFile; fileRef = 90ABB9A52C74EA8A000D9CC7 /* SettingsPreference.swift */; };
|
||||
90ABB9A82C74EAB1000D9CC7 /* SettingsManager.swift in Sources */ = {isa = PBXBuildFile; fileRef = 90ABB9A72C74EAB1000D9CC7 /* SettingsManager.swift */; };
|
||||
90CC98B02E158093003C2706 /* PipecatClientIOSDaily in Frameworks */ = {isa = PBXBuildFile; productRef = 90CC98AF2E158093003C2706 /* PipecatClientIOSDaily */; };
|
||||
90CC98B62E15820B003C2706 /* PipecatClientIOSDaily in Frameworks */ = {isa = PBXBuildFile; productRef = 90CC98B52E15820B003C2706 /* PipecatClientIOSDaily */; };
|
||||
/* End PBXBuildFile section */
|
||||
|
||||
/* Begin PBXContainerItemProxy section */
|
||||
@@ -73,7 +74,8 @@
|
||||
isa = PBXFrameworksBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
907C98842D37E6AF0079441F /* PipecatClientIOSDaily in Frameworks */,
|
||||
90CC98B62E15820B003C2706 /* PipecatClientIOSDaily in Frameworks */,
|
||||
90CC98B02E158093003C2706 /* PipecatClientIOSDaily in Frameworks */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
@@ -218,7 +220,8 @@
|
||||
);
|
||||
name = SimpleChatbot;
|
||||
packageProductDependencies = (
|
||||
907C98832D37E6AF0079441F /* PipecatClientIOSDaily */,
|
||||
90CC98AF2E158093003C2706 /* PipecatClientIOSDaily */,
|
||||
90CC98B52E15820B003C2706 /* PipecatClientIOSDaily */,
|
||||
);
|
||||
productName = SimpleChatbot;
|
||||
productReference = 90031FA32C616EE700408370 /* SimpleChatbot.app */;
|
||||
@@ -293,7 +296,7 @@
|
||||
);
|
||||
mainGroup = 90031F9A2C616EE700408370;
|
||||
packageReferences = (
|
||||
907C98822D37E6AF0079441F /* XCRemoteSwiftPackageReference "pipecat-client-ios-daily" */,
|
||||
90CC98B42E15820B003C2706 /* XCRemoteSwiftPackageReference "pipecat-client-ios-daily" */,
|
||||
);
|
||||
productRefGroup = 90031FA42C616EE700408370 /* Products */;
|
||||
projectDirPath = "";
|
||||
@@ -682,20 +685,24 @@
|
||||
/* End XCConfigurationList section */
|
||||
|
||||
/* Begin XCRemoteSwiftPackageReference section */
|
||||
907C98822D37E6AF0079441F /* XCRemoteSwiftPackageReference "pipecat-client-ios-daily" */ = {
|
||||
90CC98B42E15820B003C2706 /* XCRemoteSwiftPackageReference "pipecat-client-ios-daily" */ = {
|
||||
isa = XCRemoteSwiftPackageReference;
|
||||
repositoryURL = "https://github.com/pipecat-ai/pipecat-client-ios-daily/";
|
||||
requirement = {
|
||||
kind = upToNextMajorVersion;
|
||||
minimumVersion = 0.3.2;
|
||||
minimumVersion = 0.3.6;
|
||||
};
|
||||
};
|
||||
/* End XCRemoteSwiftPackageReference section */
|
||||
|
||||
/* Begin XCSwiftPackageProductDependency section */
|
||||
907C98832D37E6AF0079441F /* PipecatClientIOSDaily */ = {
|
||||
90CC98AF2E158093003C2706 /* PipecatClientIOSDaily */ = {
|
||||
isa = XCSwiftPackageProductDependency;
|
||||
package = 907C98822D37E6AF0079441F /* XCRemoteSwiftPackageReference "pipecat-client-ios-daily" */;
|
||||
productName = PipecatClientIOSDaily;
|
||||
};
|
||||
90CC98B52E15820B003C2706 /* PipecatClientIOSDaily */ = {
|
||||
isa = XCSwiftPackageProductDependency;
|
||||
package = 90CC98B42E15820B003C2706 /* XCRemoteSwiftPackageReference "pipecat-client-ios-daily" */;
|
||||
productName = PipecatClientIOSDaily;
|
||||
};
|
||||
/* End XCSwiftPackageProductDependency section */
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
{
|
||||
"originHash" : "cc17f08b06def9570d775e9c6f7a8dc10d1588b98127e977c47d052abac659b7",
|
||||
"pins" : [
|
||||
{
|
||||
"identity" : "daily-client-ios",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/daily-co/daily-client-ios.git",
|
||||
"state" : {
|
||||
"revision" : "15804ce495780da3ec2d05ab99736315f7bfbd24",
|
||||
"version" : "0.28.0"
|
||||
"revision" : "431938db25e5807120e89e2dc5bab1c076729f59",
|
||||
"version" : "0.31.0"
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -14,8 +15,8 @@
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/pipecat-ai/pipecat-client-ios.git",
|
||||
"state" : {
|
||||
"revision" : "c679512e367002a1a67da85d503fec72d9b17191",
|
||||
"version" : "0.3.2"
|
||||
"revision" : "f92b5e68e56a8311f7d8ead68a7a5674843cbc40",
|
||||
"version" : "0.3.6"
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -23,10 +24,10 @@
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/pipecat-ai/pipecat-client-ios-daily/",
|
||||
"state" : {
|
||||
"revision" : "a337fe6642c52376d2f90eafcb965f5be772ce72",
|
||||
"version" : "0.3.2"
|
||||
"revision" : "8f494da903192c22c367ecf9e51248c9b651fbc6",
|
||||
"version" : "0.3.6"
|
||||
}
|
||||
}
|
||||
],
|
||||
"version" : 2
|
||||
"version" : 3
|
||||
}
|
||||
|
||||
@@ -78,10 +78,11 @@ class CallContainerModel: ObservableObject {
|
||||
self.saveCredentials(backendURL: baseUrl)
|
||||
}
|
||||
|
||||
@MainActor
|
||||
func disconnect() {
|
||||
self.rtviClientIOS?.disconnect(completion: nil)
|
||||
self.rtviClientIOS?.release()
|
||||
Task { @MainActor in
|
||||
try await self.rtviClientIOS?.disconnect()
|
||||
self.rtviClientIOS?.release()
|
||||
}
|
||||
}
|
||||
|
||||
func showError(message: String) {
|
||||
|
||||
104
examples/storytelling-chatbot/client/package-lock.json
generated
104
examples/storytelling-chatbot/client/package-lock.json
generated
@@ -345,9 +345,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/env": {
|
||||
"version": "14.2.28",
|
||||
"resolved": "https://registry.npmjs.org/@next/env/-/env-14.2.28.tgz",
|
||||
"integrity": "sha512-PAmWhJfJQlP+kxZwCjrVd9QnR5x0R3u0mTXTiZDgSd4h5LdXmjxCCWbN9kq6hkZBOax8Rm3xDW5HagWyJuT37g=="
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/env/-/env-14.2.30.tgz",
|
||||
"integrity": "sha512-KBiBKrDY6kxTQWGzKjQB7QirL3PiiOkV7KW98leHFjtVRKtft76Ra5qSA/SL75xT44dp6hOcqiiJ6iievLOYug=="
|
||||
},
|
||||
"node_modules/@next/eslint-plugin-next": {
|
||||
"version": "14.1.4",
|
||||
@@ -359,9 +359,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-darwin-arm64": {
|
||||
"version": "14.2.28",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-14.2.28.tgz",
|
||||
"integrity": "sha512-kzGChl9setxYWpk3H6fTZXXPFFjg7urptLq5o5ZgYezCrqlemKttwMT5iFyx/p1e/JeglTwDFRtb923gTJ3R1w==",
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-14.2.30.tgz",
|
||||
"integrity": "sha512-EAqfOTb3bTGh9+ewpO/jC59uACadRHM6TSA9DdxJB/6gxOpyV+zrbqeXiFTDy9uV6bmipFDkfpAskeaDcO+7/g==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -374,9 +374,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-darwin-x64": {
|
||||
"version": "14.2.28",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-14.2.28.tgz",
|
||||
"integrity": "sha512-z6FXYHDJlFOzVEOiiJ/4NG8aLCeayZdcRSMjPDysW297Up6r22xw6Ea9AOwQqbNsth8JNgIK8EkWz2IDwaLQcw==",
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-14.2.30.tgz",
|
||||
"integrity": "sha512-TyO7Wz1IKE2kGv8dwQ0bmPL3s44EKVencOqwIY69myoS3rdpO1NPg5xPM5ymKu7nfX4oYJrpMxv8G9iqLsnL4A==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -389,9 +389,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-arm64-gnu": {
|
||||
"version": "14.2.28",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-14.2.28.tgz",
|
||||
"integrity": "sha512-9ARHLEQXhAilNJ7rgQX8xs9aH3yJSj888ssSjJLeldiZKR4D7N08MfMqljk77fAwZsWwsrp8ohHsMvurvv9liQ==",
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-14.2.30.tgz",
|
||||
"integrity": "sha512-I5lg1fgPJ7I5dk6mr3qCH1hJYKJu1FsfKSiTKoYwcuUf53HWTrEkwmMI0t5ojFKeA6Vu+SfT2zVy5NS0QLXV4Q==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -404,9 +404,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-arm64-musl": {
|
||||
"version": "14.2.28",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-14.2.28.tgz",
|
||||
"integrity": "sha512-p6gvatI1nX41KCizEe6JkF0FS/cEEF0u23vKDpl+WhPe/fCTBeGkEBh7iW2cUM0rvquPVwPWdiUR6Ebr/kQWxQ==",
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-14.2.30.tgz",
|
||||
"integrity": "sha512-8GkNA+sLclQyxgzCDs2/2GSwBc92QLMrmYAmoP2xehe5MUKBLB2cgo34Yu242L1siSkwQkiV4YLdCnjwc/Micw==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -419,9 +419,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-x64-gnu": {
|
||||
"version": "14.2.28",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-14.2.28.tgz",
|
||||
"integrity": "sha512-nsiSnz2wO6GwMAX2o0iucONlVL7dNgKUqt/mDTATGO2NY59EO/ZKnKEr80BJFhuA5UC1KZOMblJHWZoqIJddpA==",
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-14.2.30.tgz",
|
||||
"integrity": "sha512-8Ly7okjssLuBoe8qaRCcjGtcMsv79hwzn/63wNeIkzJVFVX06h5S737XNr7DZwlsbTBDOyI6qbL2BJB5n6TV/w==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -434,9 +434,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-x64-musl": {
|
||||
"version": "14.2.28",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-14.2.28.tgz",
|
||||
"integrity": "sha512-+IuGQKoI3abrXFqx7GtlvNOpeExUH1mTIqCrh1LGFf8DnlUcTmOOCApEnPJUSLrSbzOdsF2ho2KhnQoO0I1RDw==",
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-14.2.30.tgz",
|
||||
"integrity": "sha512-dBmV1lLNeX4mR7uI7KNVHsGQU+OgTG5RGFPi3tBJpsKPvOPtg9poyav/BYWrB3GPQL4dW5YGGgalwZ79WukbKQ==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -449,9 +449,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-win32-arm64-msvc": {
|
||||
"version": "14.2.28",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-14.2.28.tgz",
|
||||
"integrity": "sha512-l61WZ3nevt4BAnGksUVFKy2uJP5DPz2E0Ma/Oklvo3sGj9sw3q7vBWONFRgz+ICiHpW5mV+mBrkB3XEubMrKaA==",
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-14.2.30.tgz",
|
||||
"integrity": "sha512-6MMHi2Qc1Gkq+4YLXAgbYslE1f9zMGBikKMdmQRHXjkGPot1JY3n5/Qrbg40Uvbi8//wYnydPnyvNhI1DMUW1g==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -464,9 +464,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-win32-ia32-msvc": {
|
||||
"version": "14.2.28",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-ia32-msvc/-/swc-win32-ia32-msvc-14.2.28.tgz",
|
||||
"integrity": "sha512-+Kcp1T3jHZnJ9v9VTJ/yf1t/xmtFAc/Sge4v7mVc1z+NYfYzisi8kJ9AsY8itbgq+WgEwMtOpiLLJsUy2qnXZw==",
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-ia32-msvc/-/swc-win32-ia32-msvc-14.2.30.tgz",
|
||||
"integrity": "sha512-pVZMnFok5qEX4RT59mK2hEVtJX+XFfak+/rjHpyFh7juiT52r177bfFKhnlafm0UOSldhXjj32b+LZIOdswGTg==",
|
||||
"cpu": [
|
||||
"ia32"
|
||||
],
|
||||
@@ -479,9 +479,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-win32-x64-msvc": {
|
||||
"version": "14.2.28",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-14.2.28.tgz",
|
||||
"integrity": "sha512-1gCmpvyhz7DkB1srRItJTnmR2UwQPAUXXIg9r0/56g3O8etGmwlX68skKXJOp9EejW3hhv7nSQUJ2raFiz4MoA==",
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-14.2.30.tgz",
|
||||
"integrity": "sha512-4KCo8hMZXMjpTzs3HOqOGYYwAXymXIy7PEPAXNEcEOyKqkjiDlECumrWziy+JEF0Oi4ILHGxzgQ3YiMGG2t/Lg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -1317,9 +1317,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@typescript-eslint/typescript-estree/node_modules/brace-expansion": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
|
||||
"integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
|
||||
"version": "2.0.2",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz",
|
||||
"integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0"
|
||||
@@ -1960,9 +1960,9 @@
|
||||
"integrity": "sha512-AlcaJBi/pqqJBIQ8U9Mcpc9i8Aqxn88Skv5d+xBX006BY5u8N3mGLHa5Lgppa7L/HfwgwLgZ6NYs+Ag6uUmJRA=="
|
||||
},
|
||||
"node_modules/brace-expansion": {
|
||||
"version": "1.1.11",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
|
||||
"integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
|
||||
"version": "1.1.12",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
|
||||
"integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0",
|
||||
@@ -3391,9 +3391,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/glob/node_modules/brace-expansion": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
|
||||
"integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
|
||||
"version": "2.0.2",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz",
|
||||
"integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==",
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0"
|
||||
}
|
||||
@@ -4389,11 +4389,11 @@
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/next": {
|
||||
"version": "14.2.28",
|
||||
"resolved": "https://registry.npmjs.org/next/-/next-14.2.28.tgz",
|
||||
"integrity": "sha512-QLEIP/kYXynIxtcKB6vNjtWLVs3Y4Sb+EClTC/CSVzdLD1gIuItccpu/n1lhmduffI32iPGEK2cLLxxt28qgYA==",
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/next/-/next-14.2.30.tgz",
|
||||
"integrity": "sha512-+COdu6HQrHHFQ1S/8BBsCag61jZacmvbuL2avHvQFbWa2Ox7bE+d8FyNgxRLjXQ5wtPyQwEmk85js/AuaG2Sbg==",
|
||||
"dependencies": {
|
||||
"@next/env": "14.2.28",
|
||||
"@next/env": "14.2.30",
|
||||
"@swc/helpers": "0.5.5",
|
||||
"busboy": "1.6.0",
|
||||
"caniuse-lite": "^1.0.30001579",
|
||||
@@ -4408,15 +4408,15 @@
|
||||
"node": ">=18.17.0"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@next/swc-darwin-arm64": "14.2.28",
|
||||
"@next/swc-darwin-x64": "14.2.28",
|
||||
"@next/swc-linux-arm64-gnu": "14.2.28",
|
||||
"@next/swc-linux-arm64-musl": "14.2.28",
|
||||
"@next/swc-linux-x64-gnu": "14.2.28",
|
||||
"@next/swc-linux-x64-musl": "14.2.28",
|
||||
"@next/swc-win32-arm64-msvc": "14.2.28",
|
||||
"@next/swc-win32-ia32-msvc": "14.2.28",
|
||||
"@next/swc-win32-x64-msvc": "14.2.28"
|
||||
"@next/swc-darwin-arm64": "14.2.30",
|
||||
"@next/swc-darwin-x64": "14.2.30",
|
||||
"@next/swc-linux-arm64-gnu": "14.2.30",
|
||||
"@next/swc-linux-arm64-musl": "14.2.30",
|
||||
"@next/swc-linux-x64-gnu": "14.2.30",
|
||||
"@next/swc-linux-x64-musl": "14.2.30",
|
||||
"@next/swc-win32-arm64-msvc": "14.2.30",
|
||||
"@next/swc-win32-ia32-msvc": "14.2.30",
|
||||
"@next/swc-win32-x64-msvc": "14.2.30"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@opentelemetry/api": "^1.1.0",
|
||||
|
||||
@@ -6,10 +6,10 @@ Basic implementation using the [Pipecat JavaScript SDK](https://docs.pipecat.ai/
|
||||
|
||||
1. Run the bot server. See the [server README](../README).
|
||||
|
||||
2. Navigate to the `client/javascript` directory:
|
||||
2. Navigate to the `client` directory:
|
||||
|
||||
```bash
|
||||
cd client/javascript
|
||||
cd client
|
||||
```
|
||||
|
||||
3. Install dependencies:
|
||||
|
||||
@@ -22,6 +22,7 @@ classifiers = [
|
||||
dependencies = [
|
||||
"aiohttp~=3.11.12",
|
||||
"audioop-lts~=0.2.1; python_version>='3.13'",
|
||||
"docstring_parser~=0.16",
|
||||
"loguru~=0.7.3",
|
||||
"Markdown~=3.7",
|
||||
"numpy~=1.26.4",
|
||||
@@ -31,7 +32,7 @@ dependencies = [
|
||||
"pyloudnorm~=0.1.1",
|
||||
"resampy~=0.4.3",
|
||||
"soxr~=0.5.0",
|
||||
"openai~=1.70.0"
|
||||
"openai~=1.70.0",
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
@@ -47,14 +48,14 @@ azure = [ "azure-cognitiveservices-speech~=1.42.0"]
|
||||
cartesia = [ "cartesia~=2.0.3", "websockets~=13.1" ]
|
||||
cerebras = []
|
||||
deepseek = []
|
||||
daily = [ "daily-python~=0.19.3" ]
|
||||
daily = [ "daily-python~=0.19.4" ]
|
||||
deepgram = [ "deepgram-sdk~=4.1.0" ]
|
||||
elevenlabs = [ "websockets~=13.1" ]
|
||||
fal = [ "fal-client~=0.5.9" ]
|
||||
fireworks = []
|
||||
fish = [ "ormsgpack~=1.7.0", "websockets~=13.1" ]
|
||||
gladia = [ "websockets~=13.1" ]
|
||||
google = [ "google-cloud-speech~=2.32.0", "google-cloud-texttospeech~=2.26.0", "google-genai~=1.14.0", "websockets~=13.1" ]
|
||||
google = [ "google-cloud-speech~=2.32.0", "google-cloud-texttospeech~=2.26.0", "google-genai~=1.24.0", "websockets~=13.1" ]
|
||||
grok = []
|
||||
groq = [ "groq~=0.23.0" ]
|
||||
gstreamer = [ "pygobject~=3.50.0" ]
|
||||
@@ -64,7 +65,7 @@ langchain = [ "langchain~=0.3.20", "langchain-community~=0.3.20", "langchain-ope
|
||||
livekit = [ "livekit~=0.22.0", "livekit-api~=0.8.2", "tenacity~=9.0.0" ]
|
||||
lmnt = [ "websockets~=13.1" ]
|
||||
local = [ "pyaudio~=0.2.14" ]
|
||||
mcp = [ "mcp[cli]~=1.6.0" ]
|
||||
mcp = [ "mcp[cli]~=1.9.4" ]
|
||||
mem0 = [ "mem0ai~=0.1.94" ]
|
||||
mlx-whisper = [ "mlx-whisper~=0.4.2" ]
|
||||
moondream = [ "einops~=0.8.0", "timm~=1.0.13", "transformers~=4.48.0" ]
|
||||
@@ -123,8 +124,21 @@ select = [
|
||||
"D", # Docstring rules
|
||||
"I", # Import rules
|
||||
]
|
||||
# Ignore requirement for __init__ docstrings
|
||||
ignore = ["D107"]
|
||||
ignore = [
|
||||
"D105", # Missing docstring in magic methods (__str__, __repr__, etc.)
|
||||
]
|
||||
|
||||
[tool.ruff.lint.per-file-ignores]
|
||||
# Skip docstring checks for non-source code
|
||||
"examples/**/*.py" = ["D"]
|
||||
"tests/**/*.py" = ["D"]
|
||||
"scripts/**/*.py" = ["D"]
|
||||
"docs/**/*.py" = ["D"]
|
||||
# Skip D104 (missing docstring in public package) for __init__.py files
|
||||
"**/__init__.py" = ["D104"]
|
||||
# Skip specific rules for generated protobuf files
|
||||
"**/*_pb2.py" = ["D"]
|
||||
"src/pipecat/services/__init__.py" = ["D"]
|
||||
|
||||
[tool.ruff.lint.pydocstyle]
|
||||
convention = "google"
|
||||
|
||||
@@ -111,11 +111,16 @@ TESTS_26 = [
|
||||
# ("26d-gemini-multimodal-live-text.py", PROMPT_SIMPLE_MATH, None),
|
||||
]
|
||||
|
||||
TESTS_40 = [
|
||||
("40-aws-nova-sonic.py", PROMPT_SIMPLE_MATH, None),
|
||||
]
|
||||
|
||||
TESTS = [
|
||||
*TESTS_07,
|
||||
*TESTS_14,
|
||||
*TESTS_19,
|
||||
*TESTS_26,
|
||||
*TESTS_40,
|
||||
]
|
||||
|
||||
|
||||
|
||||
@@ -2,4 +2,4 @@ ruff format src
|
||||
ruff format examples
|
||||
ruff format tests
|
||||
ruff format scripts
|
||||
ruff check --select I --fix
|
||||
ruff check --select I,D --fix
|
||||
@@ -1,3 +1,27 @@
|
||||
#!/bin/sh
|
||||
#!/bin/bash
|
||||
|
||||
NO_COLOR=1 ruff format --diff
|
||||
# Color codes for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
echo "🔍 Running pre-commit checks..."
|
||||
|
||||
# Change to project root (one level up from scripts/)
|
||||
cd "$(dirname "$0")/.."
|
||||
|
||||
# Format check
|
||||
echo "📝 Checking code formatting..."
|
||||
if ! NO_COLOR=1 ruff format --diff --check; then
|
||||
echo -e "${RED}❌ Code formatting issues found. Run 'ruff format' to fix.${NC}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Lint check
|
||||
echo "🔍 Running linter..."
|
||||
if ! ruff check; then
|
||||
echo -e "${RED}❌ Linting issues found.${NC}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo -e "${GREEN}✅ All pre-commit checks passed!${NC}"
|
||||
@@ -1,3 +1,15 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Base adapter for LLM provider integration.
|
||||
|
||||
This module provides the abstract base class for implementing LLM provider-specific
|
||||
adapters that handle tool format conversion and standardization.
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, List, Union, cast
|
||||
|
||||
@@ -7,12 +19,35 @@ from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
|
||||
|
||||
class BaseLLMAdapter(ABC):
|
||||
"""Abstract base class for LLM provider adapters.
|
||||
|
||||
Provides a standard interface for converting between Pipecat's standardized
|
||||
tool schemas and provider-specific tool formats. Subclasses must implement
|
||||
provider-specific conversion logic.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def to_provider_tools_format(self, tools_schema: ToolsSchema) -> List[Any]:
|
||||
"""Converts tools to the provider's format."""
|
||||
"""Convert tools schema to the provider's specific format.
|
||||
|
||||
Args:
|
||||
tools_schema: The standardized tools schema to convert.
|
||||
|
||||
Returns:
|
||||
List of tools in the provider's expected format.
|
||||
"""
|
||||
pass
|
||||
|
||||
def from_standard_tools(self, tools: Any) -> List[Any]:
|
||||
"""Convert tools from standard format to provider format.
|
||||
|
||||
Args:
|
||||
tools: Tools in standard format or provider-specific format.
|
||||
|
||||
Returns:
|
||||
List of tools converted to provider format, or original tools
|
||||
if not in standard format.
|
||||
"""
|
||||
if isinstance(tools, ToolsSchema):
|
||||
logger.debug(f"Retrieving the tools using the adapter: {type(self)}")
|
||||
return self.to_provider_tools_format(tools)
|
||||
|
||||
296
src/pipecat/adapters/schemas/direct_function.py
Normal file
296
src/pipecat/adapters/schemas/direct_function.py
Normal file
@@ -0,0 +1,296 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Direct function wrapper utilities for LLM function calling.
|
||||
|
||||
This module provides utilities for wrapping "direct" functions that handle LLM
|
||||
function calls. Direct functions have their metadata automatically extracted
|
||||
from function signatures and docstrings, allowing them to be used without
|
||||
accompanying configurations (as FunctionSchemas or in provider-specific
|
||||
formats).
|
||||
"""
|
||||
|
||||
import inspect
|
||||
import types
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Callable,
|
||||
Dict,
|
||||
List,
|
||||
Mapping,
|
||||
Protocol,
|
||||
Set,
|
||||
Tuple,
|
||||
Union,
|
||||
get_args,
|
||||
get_origin,
|
||||
get_type_hints,
|
||||
)
|
||||
|
||||
import docstring_parser
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pipecat.services.llm_service import FunctionCallParams
|
||||
|
||||
|
||||
class DirectFunction(Protocol):
|
||||
"""Protocol for a "direct" function that handles LLM function calls.
|
||||
|
||||
"Direct" functions' metadata is automatically extracted from their function signature and
|
||||
docstrings, allowing them to be used without accompanying function configurations (as
|
||||
`FunctionSchema`s or in provider-specific formats).
|
||||
"""
|
||||
|
||||
async def __call__(self, params: "FunctionCallParams", **kwargs: Any) -> None:
|
||||
"""Execute the direct function.
|
||||
|
||||
Args:
|
||||
params: Function call parameters from the LLM service.
|
||||
**kwargs: Additional keyword arguments passed to the function.
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
class BaseDirectFunctionWrapper:
|
||||
"""Base class for a wrapper around a DirectFunction.
|
||||
|
||||
Provides functionality to:
|
||||
|
||||
- extract metadata from the function signature and docstring
|
||||
- use that metadata to generate a corresponding FunctionSchema
|
||||
"""
|
||||
|
||||
def __init__(self, function: Callable):
|
||||
"""Initialize the direct function wrapper.
|
||||
|
||||
Args:
|
||||
function: The function to wrap and extract metadata from.
|
||||
"""
|
||||
self.__class__.validate_function(function)
|
||||
self.function = function
|
||||
self._initialize_metadata()
|
||||
|
||||
@classmethod
|
||||
def special_first_param_name(cls) -> str:
|
||||
"""Get the name of the special first function parameter.
|
||||
|
||||
The special first parameter is ignored by metadata extraction as it's
|
||||
not relevant to the LLM (e.g., 'params' for FunctionCallParams).
|
||||
|
||||
Returns:
|
||||
The name of the special first parameter.
|
||||
"""
|
||||
raise NotImplementedError("Subclasses must define the special first parameter name.")
|
||||
|
||||
@classmethod
|
||||
def validate_function(cls, function: Callable) -> None:
|
||||
"""Validate that the function meets direct function requirements.
|
||||
|
||||
Args:
|
||||
function: The function to validate.
|
||||
|
||||
Raises:
|
||||
Exception: If function doesn't meet requirements (not async, missing
|
||||
parameters, incorrect first parameter name).
|
||||
"""
|
||||
if not inspect.iscoroutinefunction(function):
|
||||
raise Exception(f"Direct function {function.__name__} must be async")
|
||||
params = list(inspect.signature(function).parameters.items())
|
||||
special_first_param_name = cls.special_first_param_name()
|
||||
if len(params) == 0:
|
||||
raise Exception(
|
||||
f"Direct function {function.__name__} must have at least one parameter ({special_first_param_name})"
|
||||
)
|
||||
first_param_name = params[0][0]
|
||||
if first_param_name != special_first_param_name:
|
||||
raise Exception(
|
||||
f"Direct function {function.__name__} first parameter must be named '{special_first_param_name}'"
|
||||
)
|
||||
|
||||
def to_function_schema(self) -> FunctionSchema:
|
||||
"""Convert the wrapped function to a FunctionSchema.
|
||||
|
||||
Returns:
|
||||
A FunctionSchema instance with extracted metadata.
|
||||
"""
|
||||
return FunctionSchema(
|
||||
name=self.name,
|
||||
description=self.description,
|
||||
properties=self.properties,
|
||||
required=self.required,
|
||||
)
|
||||
|
||||
def _initialize_metadata(self):
|
||||
"""Initialize metadata from function signature and docstring."""
|
||||
# Get function name
|
||||
self.name = self.function.__name__
|
||||
|
||||
# Parse docstring for description and parameters
|
||||
docstring = docstring_parser.parse(inspect.getdoc(self.function))
|
||||
|
||||
# Get function description
|
||||
self.description = (docstring.description or "").strip()
|
||||
|
||||
# Get function parameters as JSON schemas, and the list of required parameters
|
||||
self.properties, self.required = self._get_parameters_as_jsonschema(
|
||||
self.function, docstring.params
|
||||
)
|
||||
|
||||
# TODO: maybe to better support things like enums, check if each type is a pydantic type and use its convert-to-jsonschema function
|
||||
def _get_parameters_as_jsonschema(
|
||||
self, func: Callable, docstring_params: List[docstring_parser.DocstringParam]
|
||||
) -> Tuple[Dict[str, Any], List[str]]:
|
||||
"""Get function parameters as a dictionary of JSON schemas and a list of required parameters.
|
||||
|
||||
Ignore the first parameter, as it's expected to be the "special" one.
|
||||
|
||||
Args:
|
||||
func: Function to get parameters from.
|
||||
docstring_params: List of parameters extracted from the function's docstring.
|
||||
|
||||
Returns:
|
||||
A tuple containing:
|
||||
|
||||
- A dictionary mapping each function parameter to its JSON schema
|
||||
- A list of required parameter names
|
||||
"""
|
||||
sig = inspect.signature(func)
|
||||
hints = get_type_hints(func)
|
||||
properties = {}
|
||||
required = []
|
||||
|
||||
for name, param in sig.parameters.items():
|
||||
# Ignore 'self' parameter
|
||||
if name == "self":
|
||||
continue
|
||||
|
||||
# Ignore the first parameter, which is expected to be the "special" one
|
||||
# (We have already validated that this is the case in validate_function())
|
||||
is_first_param = name == next(iter(sig.parameters))
|
||||
if is_first_param:
|
||||
continue
|
||||
|
||||
type_hint = hints.get(name)
|
||||
|
||||
# Convert type hint to JSON schema
|
||||
properties[name] = self._typehint_to_jsonschema(type_hint)
|
||||
|
||||
# Add whether the parameter is required
|
||||
# If the parameter has no default value, it's required
|
||||
if param.default is inspect.Parameter.empty:
|
||||
required.append(name)
|
||||
|
||||
# Add parameter description from docstring
|
||||
for doc_param in docstring_params:
|
||||
if doc_param.arg_name == name:
|
||||
properties[name]["description"] = doc_param.description or ""
|
||||
|
||||
return properties, required
|
||||
|
||||
def _typehint_to_jsonschema(self, type_hint: Any) -> Dict[str, Any]:
|
||||
"""Convert a Python type hint to a JSON Schema.
|
||||
|
||||
Args:
|
||||
type_hint: A Python type hint
|
||||
|
||||
Returns:
|
||||
A dictionary representing the JSON Schema
|
||||
"""
|
||||
if type_hint is None:
|
||||
return {}
|
||||
|
||||
# Handle basic types
|
||||
if type_hint is type(None):
|
||||
return {"type": "null"}
|
||||
if type_hint is str:
|
||||
return {"type": "string"}
|
||||
elif type_hint is int:
|
||||
return {"type": "integer"}
|
||||
elif type_hint is float:
|
||||
return {"type": "number"}
|
||||
elif type_hint is bool:
|
||||
return {"type": "boolean"}
|
||||
elif type_hint is dict or type_hint is Dict:
|
||||
return {"type": "object"}
|
||||
elif type_hint is list or type_hint is List:
|
||||
return {"type": "array"}
|
||||
|
||||
# Get origin and arguments for complex types
|
||||
origin = get_origin(type_hint)
|
||||
args = get_args(type_hint)
|
||||
|
||||
# Handle Optional/Union types
|
||||
if origin is Union or origin is types.UnionType:
|
||||
return {"anyOf": [self._typehint_to_jsonschema(arg) for arg in args]}
|
||||
|
||||
# Handle List, Tuple, Set with specific item types
|
||||
if origin in (list, List, tuple, Tuple, set, Set) and args:
|
||||
return {"type": "array", "items": self._typehint_to_jsonschema(args[0])}
|
||||
|
||||
# Handle Dict with specific key/value types
|
||||
if origin in (dict, Dict) and len(args) == 2:
|
||||
# For JSON Schema, keys must be strings
|
||||
return {"type": "object", "additionalProperties": self._typehint_to_jsonschema(args[1])}
|
||||
|
||||
# Handle TypedDict
|
||||
if hasattr(type_hint, "__annotations__"):
|
||||
properties = {}
|
||||
required = []
|
||||
|
||||
# NOTE: this does not yet support some fields being required and others not, which could happen when:
|
||||
# - the base class is a TypedDict with required fields (total=True or not specified) and the derived class has optional fields (total=False)
|
||||
# - Python 3.11+ NotRequired is used
|
||||
all_fields_required = getattr(type_hint, "__total__", True)
|
||||
|
||||
for field_name, field_type in get_type_hints(type_hint).items():
|
||||
properties[field_name] = self._typehint_to_jsonschema(field_type)
|
||||
if all_fields_required:
|
||||
required.append(field_name)
|
||||
|
||||
schema = {"type": "object", "properties": properties}
|
||||
|
||||
if required:
|
||||
schema["required"] = required
|
||||
|
||||
return schema
|
||||
|
||||
# Default to any type if we can't determine the specific schema
|
||||
return {}
|
||||
|
||||
|
||||
class DirectFunctionWrapper(BaseDirectFunctionWrapper):
|
||||
"""Wrapper around a DirectFunction for LLM function calling.
|
||||
|
||||
This class:
|
||||
|
||||
- Extracts metadata from the function signature and docstring
|
||||
- Generates a corresponding FunctionSchema
|
||||
- Helps with function invocation
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def special_first_param_name(cls) -> str:
|
||||
"""Get the special first parameter name for direct functions.
|
||||
|
||||
Returns:
|
||||
The string "params" which is expected as the first parameter.
|
||||
"""
|
||||
return "params"
|
||||
|
||||
async def invoke(self, args: Mapping[str, Any], params: "FunctionCallParams"):
|
||||
"""Invoke the wrapped function with the provided arguments.
|
||||
|
||||
Args:
|
||||
args: Arguments to pass to the function.
|
||||
params: Function call parameters from the LLM service.
|
||||
|
||||
Returns:
|
||||
The result of the function call.
|
||||
"""
|
||||
return await self.function(params=params, **args)
|
||||
@@ -4,6 +4,13 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Function schema utilities for AI tool definitions.
|
||||
|
||||
This module provides standardized function schema representation for defining
|
||||
tools and functions used with AI models, ensuring consistent formatting
|
||||
across different AI service providers.
|
||||
"""
|
||||
|
||||
from typing import Any, Dict, List
|
||||
|
||||
|
||||
@@ -13,17 +20,19 @@ class FunctionSchema:
|
||||
Provides a structured way to define function tools used with AI models like OpenAI.
|
||||
This schema defines the function's name, description, parameter properties, and
|
||||
required parameters, following specifications required by AI service providers.
|
||||
|
||||
Args:
|
||||
name: Name of the function to be called.
|
||||
description: Description of what the function does.
|
||||
properties: Dictionary defining parameter types, descriptions, and constraints.
|
||||
required: List of property names that are required parameters.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, name: str, description: str, properties: Dict[str, Any], required: List[str]
|
||||
) -> None:
|
||||
"""Initialize the function schema.
|
||||
|
||||
Args:
|
||||
name: Name of the function to be called.
|
||||
description: Description of what the function does.
|
||||
properties: Dictionary defining parameter types, descriptions, and constraints.
|
||||
required: List of property names that are required parameters.
|
||||
"""
|
||||
self._name = name
|
||||
self._description = description
|
||||
self._properties = properties
|
||||
|
||||
@@ -4,40 +4,88 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Tools schema definitions for function calling adapters.
|
||||
|
||||
This module provides schemas for managing both standardized function tools
|
||||
and custom adapter-specific tools in the Pipecat framework.
|
||||
"""
|
||||
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from pipecat.adapters.schemas.direct_function import DirectFunction, DirectFunctionWrapper
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
|
||||
|
||||
class AdapterType(Enum):
|
||||
"""Supported adapter types for custom tools.
|
||||
|
||||
Parameters:
|
||||
GEMINI: Google Gemini adapter - currently the only service supporting custom tools.
|
||||
"""
|
||||
|
||||
GEMINI = "gemini" # that is the only service where we are able to add custom tools for now
|
||||
|
||||
|
||||
class ToolsSchema:
|
||||
"""Schema for managing both standard and custom function calling tools.
|
||||
|
||||
This class provides a unified interface for handling standardized function
|
||||
schemas alongside custom tools that may not follow the standard format,
|
||||
such as adapter-specific search tools.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
standard_tools: List[FunctionSchema],
|
||||
standard_tools: List[FunctionSchema | DirectFunction],
|
||||
custom_tools: Optional[Dict[AdapterType, List[Dict[str, Any]]]] = None,
|
||||
) -> None:
|
||||
"""
|
||||
A schema for tools that includes both standardized function schemas
|
||||
and custom tools that do not follow the FunctionSchema format.
|
||||
"""Initialize the tools schema.
|
||||
|
||||
:param standard_tools: List of tools following FunctionSchema.
|
||||
:param custom_tools: List of tools in a custom format (e.g., search_tool).
|
||||
Args:
|
||||
standard_tools: List of tools following the standardized FunctionSchema format.
|
||||
custom_tools: Dictionary mapping adapter types to their custom tool definitions.
|
||||
These tools may not follow the FunctionSchema format (e.g., search_tool).
|
||||
"""
|
||||
self._standard_tools = standard_tools
|
||||
|
||||
def _map_standard_tools(tools):
|
||||
schemas = []
|
||||
for tool in tools:
|
||||
if isinstance(tool, FunctionSchema):
|
||||
schemas.append(tool)
|
||||
elif callable(tool):
|
||||
wrapper = DirectFunctionWrapper(tool)
|
||||
schemas.append(wrapper.to_function_schema())
|
||||
else:
|
||||
raise TypeError(f"Unsupported tool type: {type(tool)}")
|
||||
return schemas
|
||||
|
||||
self._standard_tools = _map_standard_tools(standard_tools)
|
||||
self._custom_tools = custom_tools
|
||||
|
||||
@property
|
||||
def standard_tools(self) -> List[FunctionSchema]:
|
||||
"""Get the list of standard function schema tools.
|
||||
|
||||
Returns:
|
||||
List of tools following the FunctionSchema format.
|
||||
"""
|
||||
return self._standard_tools
|
||||
|
||||
@property
|
||||
def custom_tools(self) -> Dict[AdapterType, List[Dict[str, Any]]]:
|
||||
"""Get the custom tools dictionary.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping adapter types to their custom tool definitions.
|
||||
"""
|
||||
return self._custom_tools
|
||||
|
||||
@custom_tools.setter
|
||||
def custom_tools(self, value: Dict[AdapterType, List[Dict[str, Any]]]) -> None:
|
||||
"""Set the custom tools dictionary.
|
||||
|
||||
Args:
|
||||
value: Dictionary mapping adapter types to their custom tool definitions.
|
||||
"""
|
||||
self._custom_tools = value
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Anthropic LLM adapter for Pipecat."""
|
||||
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from pipecat.adapters.base_llm_adapter import BaseLLMAdapter
|
||||
@@ -12,8 +14,22 @@ from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
|
||||
|
||||
class AnthropicLLMAdapter(BaseLLMAdapter):
|
||||
"""Adapter for converting tool schemas to Anthropic's function-calling format.
|
||||
|
||||
This adapter handles the conversion of Pipecat's standard function schemas
|
||||
to the specific format required by Anthropic's Claude models for function calling.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def _to_anthropic_function_format(function: FunctionSchema) -> Dict[str, Any]:
|
||||
"""Convert a single function schema to Anthropic's format.
|
||||
|
||||
Args:
|
||||
function: The function schema to convert.
|
||||
|
||||
Returns:
|
||||
Dictionary containing the function definition in Anthropic's format.
|
||||
"""
|
||||
return {
|
||||
"name": function.name,
|
||||
"description": function.description,
|
||||
@@ -25,10 +41,13 @@ class AnthropicLLMAdapter(BaseLLMAdapter):
|
||||
}
|
||||
|
||||
def to_provider_tools_format(self, tools_schema: ToolsSchema) -> List[Dict[str, Any]]:
|
||||
"""Converts function schemas to Anthropic's function-calling format.
|
||||
"""Convert function schemas to Anthropic's function-calling format.
|
||||
|
||||
:return: Anthropic formatted function call definition.
|
||||
Args:
|
||||
tools_schema: The tools schema containing functions to convert.
|
||||
|
||||
Returns:
|
||||
List of function definitions formatted for Anthropic's API.
|
||||
"""
|
||||
|
||||
functions_schema = tools_schema.standard_tools
|
||||
return [self._to_anthropic_function_format(func) for func in functions_schema]
|
||||
|
||||
@@ -3,6 +3,9 @@
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""AWS Nova Sonic LLM adapter for Pipecat."""
|
||||
|
||||
import json
|
||||
from typing import Any, Dict, List
|
||||
|
||||
@@ -12,8 +15,22 @@ from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
|
||||
|
||||
class AWSNovaSonicLLMAdapter(BaseLLMAdapter):
|
||||
"""Adapter for AWS Nova Sonic language models.
|
||||
|
||||
Converts Pipecat's standard function schemas into AWS Nova Sonic's
|
||||
specific function-calling format, enabling tool use with Nova Sonic models.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def _to_aws_nova_sonic_function_format(function: FunctionSchema) -> Dict[str, Any]:
|
||||
"""Convert a function schema to AWS Nova Sonic format.
|
||||
|
||||
Args:
|
||||
function: The function schema to convert.
|
||||
|
||||
Returns:
|
||||
Dictionary in AWS Nova Sonic function format with toolSpec structure.
|
||||
"""
|
||||
return {
|
||||
"toolSpec": {
|
||||
"name": function.name,
|
||||
@@ -31,10 +48,13 @@ class AWSNovaSonicLLMAdapter(BaseLLMAdapter):
|
||||
}
|
||||
|
||||
def to_provider_tools_format(self, tools_schema: ToolsSchema) -> List[Dict[str, Any]]:
|
||||
"""Converts function schemas to AWS Nova Sonic function-calling format.
|
||||
"""Convert tools schema to AWS Nova Sonic function-calling format.
|
||||
|
||||
:return: AWS Nova Sonic formatted function call definition.
|
||||
Args:
|
||||
tools_schema: The tools schema containing function definitions to convert.
|
||||
|
||||
Returns:
|
||||
List of dictionaries in AWS Nova Sonic function format.
|
||||
"""
|
||||
|
||||
functions_schema = tools_schema.standard_tools
|
||||
return [self._to_aws_nova_sonic_function_format(func) for func in functions_schema]
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""AWS Bedrock LLM adapter for Pipecat."""
|
||||
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from pipecat.adapters.base_llm_adapter import BaseLLMAdapter
|
||||
@@ -12,8 +14,22 @@ from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
|
||||
|
||||
class AWSBedrockLLMAdapter(BaseLLMAdapter):
|
||||
"""Adapter for AWS Bedrock LLM integration with Pipecat.
|
||||
|
||||
Provides conversion utilities for transforming Pipecat function schemas
|
||||
into AWS Bedrock's expected tool format for function calling capabilities.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def _to_bedrock_function_format(function: FunctionSchema) -> Dict[str, Any]:
|
||||
"""Convert a function schema to Bedrock's tool format.
|
||||
|
||||
Args:
|
||||
function: The function schema to convert.
|
||||
|
||||
Returns:
|
||||
Dictionary formatted for Bedrock's tool specification.
|
||||
"""
|
||||
return {
|
||||
"toolSpec": {
|
||||
"name": function.name,
|
||||
@@ -29,10 +45,13 @@ class AWSBedrockLLMAdapter(BaseLLMAdapter):
|
||||
}
|
||||
|
||||
def to_provider_tools_format(self, tools_schema: ToolsSchema) -> List[Dict[str, Any]]:
|
||||
"""Converts function schemas to Bedrock's function-calling format.
|
||||
"""Convert function schemas to Bedrock's function-calling format.
|
||||
|
||||
:return: Bedrock formatted function call definition.
|
||||
Args:
|
||||
tools_schema: The tools schema containing functions to convert.
|
||||
|
||||
Returns:
|
||||
List of Bedrock formatted function call definitions.
|
||||
"""
|
||||
|
||||
functions_schema = tools_schema.standard_tools
|
||||
return [self._to_bedrock_function_format(func) for func in functions_schema]
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Gemini LLM adapter for Pipecat."""
|
||||
|
||||
from typing import Any, Dict, List, Union
|
||||
|
||||
from pipecat.adapters.base_llm_adapter import BaseLLMAdapter
|
||||
@@ -11,12 +13,23 @@ from pipecat.adapters.schemas.tools_schema import AdapterType, ToolsSchema
|
||||
|
||||
|
||||
class GeminiLLMAdapter(BaseLLMAdapter):
|
||||
"""LLM adapter for Google's Gemini service.
|
||||
|
||||
Provides tool schema conversion functionality to transform standard tool
|
||||
definitions into Gemini's specific function-calling format for use with
|
||||
Gemini LLM models.
|
||||
"""
|
||||
|
||||
def to_provider_tools_format(self, tools_schema: ToolsSchema) -> List[Dict[str, Any]]:
|
||||
"""Converts function schemas to Gemini's function-calling format.
|
||||
"""Convert tool schemas to Gemini's function-calling format.
|
||||
|
||||
:return: Gemini formatted function call definition.
|
||||
Args:
|
||||
tools_schema: The tools schema containing standard and custom tool definitions.
|
||||
|
||||
Returns:
|
||||
List of tool definitions formatted for Gemini's function-calling API.
|
||||
Includes both converted standard tools and any custom Gemini-specific tools.
|
||||
"""
|
||||
|
||||
functions_schema = tools_schema.standard_tools
|
||||
formatted_standard_tools = [
|
||||
{"function_declarations": [func.to_default_dict() for func in functions_schema]}
|
||||
|
||||
@@ -3,6 +3,9 @@
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""OpenAI LLM adapter for Pipecat."""
|
||||
|
||||
from typing import List
|
||||
|
||||
from openai.types.chat import ChatCompletionToolParam
|
||||
@@ -12,10 +15,22 @@ from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
|
||||
|
||||
class OpenAILLMAdapter(BaseLLMAdapter):
|
||||
def to_provider_tools_format(self, tools_schema: ToolsSchema) -> List[ChatCompletionToolParam]:
|
||||
"""Converts function schemas to OpenAI's function-calling format.
|
||||
"""Adapter for converting tool schemas to OpenAI's format.
|
||||
|
||||
:return: OpenAI formatted function call definition.
|
||||
Provides conversion utilities for transforming Pipecat's standard tool
|
||||
schemas into the format expected by OpenAI's ChatCompletion API for
|
||||
function calling capabilities.
|
||||
"""
|
||||
|
||||
def to_provider_tools_format(self, tools_schema: ToolsSchema) -> List[ChatCompletionToolParam]:
|
||||
"""Convert function schemas to OpenAI's function-calling format.
|
||||
|
||||
Args:
|
||||
tools_schema: The Pipecat tools schema to convert.
|
||||
|
||||
Returns:
|
||||
List of OpenAI formatted function call definitions ready for use
|
||||
with ChatCompletion API.
|
||||
"""
|
||||
functions_schema = tools_schema.standard_tools
|
||||
return [
|
||||
|
||||
@@ -3,6 +3,9 @@
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""OpenAI Realtime LLM adapter for Pipecat."""
|
||||
|
||||
from typing import Any, Dict, List, Union
|
||||
|
||||
from pipecat.adapters.base_llm_adapter import BaseLLMAdapter
|
||||
@@ -11,8 +14,22 @@ from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
|
||||
|
||||
class OpenAIRealtimeLLMAdapter(BaseLLMAdapter):
|
||||
"""LLM adapter for OpenAI Realtime API function calling.
|
||||
|
||||
Converts Pipecat's tool schemas into the specific format required by
|
||||
OpenAI's Realtime API for function calling capabilities.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def _to_openai_realtime_function_format(function: FunctionSchema) -> Dict[str, Any]:
|
||||
"""Convert a function schema to OpenAI Realtime format.
|
||||
|
||||
Args:
|
||||
function: The function schema to convert.
|
||||
|
||||
Returns:
|
||||
Dictionary in OpenAI Realtime function format.
|
||||
"""
|
||||
return {
|
||||
"type": "function",
|
||||
"name": function.name,
|
||||
@@ -25,10 +42,13 @@ class OpenAIRealtimeLLMAdapter(BaseLLMAdapter):
|
||||
}
|
||||
|
||||
def to_provider_tools_format(self, tools_schema: ToolsSchema) -> List[Dict[str, Any]]:
|
||||
"""Converts function schemas to Openai Realtime function-calling format.
|
||||
"""Convert tool schemas to OpenAI Realtime function-calling format.
|
||||
|
||||
:return: Openai Realtime formatted function call definition.
|
||||
Args:
|
||||
tools_schema: The tools schema containing functions to convert.
|
||||
|
||||
Returns:
|
||||
List of function definitions in OpenAI Realtime format.
|
||||
"""
|
||||
|
||||
functions_schema = tools_schema.standard_tools
|
||||
return [self._to_openai_realtime_function_format(func) for func in functions_schema]
|
||||
|
||||
@@ -4,44 +4,68 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Base audio filter interface for input transport audio processing.
|
||||
|
||||
This module provides the abstract base class for implementing audio filters
|
||||
that process audio data before VAD and downstream processing in input transports.
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from pipecat.frames.frames import FilterControlFrame
|
||||
|
||||
|
||||
class BaseAudioFilter(ABC):
|
||||
"""This is a base class for input transport audio filters. If an audio
|
||||
"""Base class for input transport audio filters.
|
||||
|
||||
This is a base class for input transport audio filters. If an audio
|
||||
filter is provided to the input transport it will be used to process audio
|
||||
before VAD and before pushing it downstream. There are control frames to
|
||||
update filter settings or to enable or disable the filter at runtime.
|
||||
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
async def start(self, sample_rate: int):
|
||||
"""This will be called from the input transport when the transport is
|
||||
"""Initialize the filter when the input transport starts.
|
||||
|
||||
This will be called from the input transport when the transport is
|
||||
started. It can be used to initialize the filter. The input transport
|
||||
sample rate is provided so the filter can adjust to that sample rate.
|
||||
|
||||
Args:
|
||||
sample_rate: The sample rate of the input transport in Hz.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def stop(self):
|
||||
"""This will be called from the input transport when the transport is
|
||||
stopping.
|
||||
"""Clean up the filter when the input transport stops.
|
||||
|
||||
This will be called from the input transport when the transport is
|
||||
stopping.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def process_frame(self, frame: FilterControlFrame):
|
||||
"""This will be called when the input transport receives a
|
||||
"""Process control frames for runtime filter configuration.
|
||||
|
||||
This will be called when the input transport receives a
|
||||
FilterControlFrame.
|
||||
|
||||
Args:
|
||||
frame: The control frame containing filter commands or settings.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def filter(self, audio: bytes) -> bytes:
|
||||
"""Apply the audio filter to the provided audio data.
|
||||
|
||||
Args:
|
||||
audio: Raw audio data as bytes to be filtered.
|
||||
|
||||
Returns:
|
||||
Filtered audio data as bytes.
|
||||
"""
|
||||
pass
|
||||
|
||||
@@ -4,6 +4,12 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Koala noise suppression audio filter for Pipecat.
|
||||
|
||||
This module provides an audio filter implementation using PicoVoice's Koala
|
||||
Noise Suppression engine to reduce background noise in audio streams.
|
||||
"""
|
||||
|
||||
from typing import Sequence
|
||||
|
||||
import numpy as np
|
||||
@@ -21,12 +27,19 @@ except ModuleNotFoundError as e:
|
||||
|
||||
|
||||
class KoalaFilter(BaseAudioFilter):
|
||||
"""This is an audio filter that uses Koala Noise Suppression (from
|
||||
PicoVoice).
|
||||
"""Audio filter using Koala Noise Suppression from PicoVoice.
|
||||
|
||||
Provides real-time noise suppression for audio streams using PicoVoice's
|
||||
Koala engine. The filter buffers audio data to match Koala's required
|
||||
frame length and processes it in chunks.
|
||||
"""
|
||||
|
||||
def __init__(self, *, access_key: str) -> None:
|
||||
"""Initialize the Koala noise suppression filter.
|
||||
|
||||
Args:
|
||||
access_key: PicoVoice access key for Koala engine authentication.
|
||||
"""
|
||||
self._access_key = access_key
|
||||
|
||||
self._filtering = True
|
||||
@@ -36,6 +49,11 @@ class KoalaFilter(BaseAudioFilter):
|
||||
self._audio_buffer = bytearray()
|
||||
|
||||
async def start(self, sample_rate: int):
|
||||
"""Initialize the filter with the transport's sample rate.
|
||||
|
||||
Args:
|
||||
sample_rate: The sample rate of the input transport in Hz.
|
||||
"""
|
||||
self._sample_rate = sample_rate
|
||||
if self._sample_rate != self._koala.sample_rate:
|
||||
logger.warning(
|
||||
@@ -44,13 +62,30 @@ class KoalaFilter(BaseAudioFilter):
|
||||
self._koala_ready = False
|
||||
|
||||
async def stop(self):
|
||||
"""Clean up the Koala engine when stopping."""
|
||||
self._koala.reset()
|
||||
|
||||
async def process_frame(self, frame: FilterControlFrame):
|
||||
"""Process control frames to enable/disable filtering.
|
||||
|
||||
Args:
|
||||
frame: The control frame containing filter commands.
|
||||
"""
|
||||
if isinstance(frame, FilterEnableFrame):
|
||||
self._filtering = frame.enable
|
||||
|
||||
async def filter(self, audio: bytes) -> bytes:
|
||||
"""Apply Koala noise suppression to audio data.
|
||||
|
||||
Buffers incoming audio and processes it in chunks that match Koala's
|
||||
required frame length. Returns filtered audio data.
|
||||
|
||||
Args:
|
||||
audio: Raw audio data as bytes to be filtered.
|
||||
|
||||
Returns:
|
||||
Noise-suppressed audio data as bytes.
|
||||
"""
|
||||
if not self._koala_ready or not self._filtering:
|
||||
return audio
|
||||
|
||||
|
||||
@@ -4,6 +4,12 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Krisp noise reduction audio filter for Pipecat.
|
||||
|
||||
This module provides an audio filter implementation using Krisp's noise
|
||||
reduction technology to suppress background noise in audio streams.
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
import numpy as np
|
||||
@@ -21,14 +27,27 @@ except ModuleNotFoundError as e:
|
||||
|
||||
|
||||
class KrispProcessorManager:
|
||||
"""
|
||||
Ensures that only one KrispAudioProcessor instance exists for the entire program.
|
||||
"""Singleton manager for KrispAudioProcessor instances.
|
||||
|
||||
Ensures that only one KrispAudioProcessor instance exists for the entire
|
||||
program.
|
||||
"""
|
||||
|
||||
_krisp_instance = None
|
||||
|
||||
@classmethod
|
||||
def get_processor(cls, sample_rate: int, sample_type: str, channels: int, model_path: str):
|
||||
"""Get or create a KrispAudioProcessor instance.
|
||||
|
||||
Args:
|
||||
sample_rate: Audio sample rate in Hz.
|
||||
sample_type: Audio sample type (e.g., "PCM_16").
|
||||
channels: Number of audio channels.
|
||||
model_path: Path to the Krisp model file.
|
||||
|
||||
Returns:
|
||||
Shared KrispAudioProcessor instance.
|
||||
"""
|
||||
if cls._krisp_instance is None:
|
||||
cls._krisp_instance = KrispAudioProcessor(
|
||||
sample_rate, sample_type, channels, model_path
|
||||
@@ -37,14 +56,26 @@ class KrispProcessorManager:
|
||||
|
||||
|
||||
class KrispFilter(BaseAudioFilter):
|
||||
"""Audio filter using Krisp noise reduction technology.
|
||||
|
||||
Provides real-time noise reduction for audio streams using Krisp's
|
||||
proprietary noise suppression algorithms. Requires a Krisp model file
|
||||
for operation.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, sample_type: str = "PCM_16", channels: int = 1, model_path: str = None
|
||||
) -> None:
|
||||
"""Initializes the KrispAudioProcessor with customizable audio processing settings.
|
||||
"""Initialize the Krisp noise reduction filter.
|
||||
|
||||
:param sample_type: The type of audio sample, default is 'PCM_16'.
|
||||
:param channels: Number of audio channels, default is 1.
|
||||
:param model_path: Path to the Krisp model; defaults to environment variable KRISP_MODEL_PATH if not provided.
|
||||
Args:
|
||||
sample_type: The audio sample format. Defaults to "PCM_16".
|
||||
channels: Number of audio channels. Defaults to 1.
|
||||
model_path: Path to the Krisp model file. If None, uses KRISP_MODEL_PATH
|
||||
environment variable.
|
||||
|
||||
Raises:
|
||||
ValueError: If model_path is not provided and KRISP_MODEL_PATH is not set.
|
||||
"""
|
||||
super().__init__()
|
||||
|
||||
@@ -63,19 +94,41 @@ class KrispFilter(BaseAudioFilter):
|
||||
self._krisp_processor = None
|
||||
|
||||
async def start(self, sample_rate: int):
|
||||
"""Initialize the Krisp processor with the transport's sample rate.
|
||||
|
||||
Args:
|
||||
sample_rate: The sample rate of the input transport in Hz.
|
||||
"""
|
||||
self._sample_rate = sample_rate
|
||||
self._krisp_processor = KrispProcessorManager.get_processor(
|
||||
self._sample_rate, self._sample_type, self._channels, self._model_path
|
||||
)
|
||||
|
||||
async def stop(self):
|
||||
"""Clean up the Krisp processor when stopping."""
|
||||
self._krisp_processor = None
|
||||
|
||||
async def process_frame(self, frame: FilterControlFrame):
|
||||
"""Process control frames to enable/disable filtering.
|
||||
|
||||
Args:
|
||||
frame: The control frame containing filter commands.
|
||||
"""
|
||||
if isinstance(frame, FilterEnableFrame):
|
||||
self._filtering = frame.enable
|
||||
|
||||
async def filter(self, audio: bytes) -> bytes:
|
||||
"""Apply Krisp noise reduction to audio data.
|
||||
|
||||
Converts audio to float32, applies Krisp noise reduction processing,
|
||||
and returns the filtered audio clipped to int16 range.
|
||||
|
||||
Args:
|
||||
audio: Raw audio data as bytes to be filtered.
|
||||
|
||||
Returns:
|
||||
Noise-reduced audio data as bytes.
|
||||
"""
|
||||
if not self._filtering:
|
||||
return audio
|
||||
|
||||
|
||||
@@ -4,6 +4,13 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Noisereduce audio filter for Pipecat.
|
||||
|
||||
This module provides an audio filter implementation using the noisereduce
|
||||
library to reduce background noise in audio streams through spectral
|
||||
gating algorithms.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from loguru import logger
|
||||
|
||||
@@ -21,21 +28,51 @@ except ModuleNotFoundError as e:
|
||||
|
||||
|
||||
class NoisereduceFilter(BaseAudioFilter):
|
||||
"""Audio filter using the noisereduce library for noise suppression.
|
||||
|
||||
Applies spectral gating noise reduction algorithms to suppress background
|
||||
noise in audio streams. Uses the noisereduce library's default noise
|
||||
reduction parameters.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
"""Initialize the noisereduce filter."""
|
||||
self._filtering = True
|
||||
self._sample_rate = 0
|
||||
|
||||
async def start(self, sample_rate: int):
|
||||
"""Initialize the filter with the transport's sample rate.
|
||||
|
||||
Args:
|
||||
sample_rate: The sample rate of the input transport in Hz.
|
||||
"""
|
||||
self._sample_rate = sample_rate
|
||||
|
||||
async def stop(self):
|
||||
"""Clean up the filter when stopping."""
|
||||
pass
|
||||
|
||||
async def process_frame(self, frame: FilterControlFrame):
|
||||
"""Process control frames to enable/disable filtering.
|
||||
|
||||
Args:
|
||||
frame: The control frame containing filter commands.
|
||||
"""
|
||||
if isinstance(frame, FilterEnableFrame):
|
||||
self._filtering = frame.enable
|
||||
|
||||
async def filter(self, audio: bytes) -> bytes:
|
||||
"""Apply noise reduction to audio data using spectral gating.
|
||||
|
||||
Converts audio to float32, applies noisereduce processing, and returns
|
||||
the filtered audio clipped to int16 range.
|
||||
|
||||
Args:
|
||||
audio: Raw audio data as bytes to be filtered.
|
||||
|
||||
Returns:
|
||||
Noise-reduced audio data as bytes.
|
||||
"""
|
||||
if not self._filtering:
|
||||
return audio
|
||||
|
||||
|
||||
@@ -4,31 +4,51 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Base interruption strategy for determining when users can interrupt bot speech."""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
|
||||
class BaseInterruptionStrategy(ABC):
|
||||
"""This is a base class for interruption strategies. Interruption strategies
|
||||
"""Base class for interruption strategies.
|
||||
|
||||
This is a base class for interruption strategies. Interruption strategies
|
||||
decide when the user can interrupt the bot while the bot is speaking. For
|
||||
example, there could be strategies based on audio volume or strategies based
|
||||
on the number of words the user spoke.
|
||||
|
||||
"""
|
||||
|
||||
async def append_audio(self, audio: bytes, sample_rate: int):
|
||||
"""Appends audio to the strategy. Not all strategies handle audio."""
|
||||
"""Append audio data to the strategy for analysis.
|
||||
|
||||
Not all strategies handle audio. Default implementation does nothing.
|
||||
|
||||
Args:
|
||||
audio: Raw audio bytes to append.
|
||||
sample_rate: Sample rate of the audio data in Hz.
|
||||
"""
|
||||
pass
|
||||
|
||||
async def append_text(self, text: str):
|
||||
"""Appends text to the strategy. Not all strategies handle text."""
|
||||
"""Append text data to the strategy for analysis.
|
||||
|
||||
Not all strategies handle text. Default implementation does nothing.
|
||||
|
||||
Args:
|
||||
text: Text string to append for analysis.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def should_interrupt(self) -> bool:
|
||||
"""This is called when the user stops speaking and it's time to decide
|
||||
"""Determine if the user should interrupt the bot.
|
||||
|
||||
This is called when the user stops speaking and it's time to decide
|
||||
whether the user should interrupt the bot. The decision will be based on
|
||||
the aggregated audio and/or text.
|
||||
|
||||
Returns:
|
||||
True if the user should interrupt the bot, False otherwise.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
@@ -4,31 +4,47 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Minimum words interruption strategy for word count-based interruptions."""
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.interruptions.base_interruption_strategy import BaseInterruptionStrategy
|
||||
|
||||
|
||||
class MinWordsInterruptionStrategy(BaseInterruptionStrategy):
|
||||
"""This is an interruption strategy based on a minimum number of words said
|
||||
"""Interruption strategy based on minimum number of words spoken.
|
||||
|
||||
This is an interruption strategy based on a minimum number of words said
|
||||
by the user. That is, the strategy will be true if the user has said at
|
||||
least that amount of words.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, *, min_words: int):
|
||||
"""Initialize the minimum words interruption strategy.
|
||||
|
||||
Args:
|
||||
min_words: Minimum number of words required to trigger an interruption.
|
||||
"""
|
||||
super().__init__()
|
||||
self._min_words = min_words
|
||||
self._text = ""
|
||||
|
||||
async def append_text(self, text: str):
|
||||
"""Appends text for later analysis. Not all strategies need to handle
|
||||
text.
|
||||
"""Append text for word count analysis.
|
||||
|
||||
Args:
|
||||
text: Text string to append to the accumulated text.
|
||||
|
||||
Note: Not all strategies need to handle text.
|
||||
"""
|
||||
self._text += text
|
||||
|
||||
async def should_interrupt(self) -> bool:
|
||||
"""Check if the minimum word count has been reached.
|
||||
|
||||
Returns:
|
||||
True if the user has spoken at least the minimum number of words.
|
||||
"""
|
||||
word_count = len(self._text.split())
|
||||
interrupt = word_count >= self._min_words
|
||||
logger.debug(
|
||||
@@ -37,4 +53,5 @@ class MinWordsInterruptionStrategy(BaseInterruptionStrategy):
|
||||
return interrupt
|
||||
|
||||
async def reset(self):
|
||||
"""Reset the accumulated text for the next analysis cycle."""
|
||||
self._text = ""
|
||||
|
||||
@@ -4,50 +4,73 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Base audio mixer for output transport integration.
|
||||
|
||||
Provides the abstract base class for audio mixers that can be integrated with
|
||||
output transports to mix incoming audio with generated audio from the mixer.
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from pipecat.frames.frames import MixerControlFrame
|
||||
|
||||
|
||||
class BaseAudioMixer(ABC):
|
||||
"""This is a base class for output transport audio mixers. If an audio mixer
|
||||
"""Base class for output transport audio mixers.
|
||||
|
||||
This is a base class for output transport audio mixers. If an audio mixer
|
||||
is provided to the output transport it will be used to mix the audio frames
|
||||
coming into to the transport with the audio generated from the mixer. There
|
||||
are control frames to update mixer settings or to enable or disable the
|
||||
mixer at runtime.
|
||||
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
async def start(self, sample_rate: int):
|
||||
"""This will be called from the output transport when the transport is
|
||||
"""Initialize the mixer when the output transport starts.
|
||||
|
||||
This will be called from the output transport when the transport is
|
||||
started. It can be used to initialize the mixer. The output transport
|
||||
sample rate is provided so the mixer can adjust to that sample rate.
|
||||
|
||||
Args:
|
||||
sample_rate: The sample rate of the output transport in Hz.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def stop(self):
|
||||
"""This will be called from the output transport when the transport is
|
||||
stopping.
|
||||
"""Clean up the mixer when the output transport stops.
|
||||
|
||||
This will be called from the output transport when the transport is
|
||||
stopping.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def process_frame(self, frame: MixerControlFrame):
|
||||
"""This will be called when the output transport receives a
|
||||
"""Process mixer control frames from the transport.
|
||||
|
||||
This will be called when the output transport receives a
|
||||
MixerControlFrame.
|
||||
|
||||
Args:
|
||||
frame: The mixer control frame to process.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def mix(self, audio: bytes) -> bytes:
|
||||
"""This is called with the audio that is about to be sent from the
|
||||
"""Mix transport audio with mixer-generated audio.
|
||||
|
||||
This is called with the audio that is about to be sent from the
|
||||
output transport and that should be mixed with the mixer audio if the
|
||||
mixer is enabled.
|
||||
|
||||
Args:
|
||||
audio: Raw audio bytes from the transport to mix.
|
||||
|
||||
Returns:
|
||||
Mixed audio bytes combining transport and mixer audio.
|
||||
"""
|
||||
pass
|
||||
|
||||
@@ -4,6 +4,13 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Soundfile-based audio mixer for file playback integration.
|
||||
|
||||
Provides an audio mixer that combines incoming audio with audio loaded from
|
||||
files using the soundfile library. Supports multiple audio formats and
|
||||
runtime configuration changes.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from typing import Any, Dict, Mapping
|
||||
|
||||
@@ -24,7 +31,9 @@ except ModuleNotFoundError as e:
|
||||
|
||||
|
||||
class SoundfileMixer(BaseAudioMixer):
|
||||
"""This is an audio mixer that mixes incoming audio with audio from a
|
||||
"""Audio mixer that combines incoming audio with file-based audio.
|
||||
|
||||
This is an audio mixer that mixes incoming audio with audio from a
|
||||
file. It uses the soundfile library to load files so it supports multiple
|
||||
formats. The audio files need to only have one channel (mono) and it needs
|
||||
to match the sample rate of the output transport.
|
||||
@@ -33,7 +42,6 @@ class SoundfileMixer(BaseAudioMixer):
|
||||
`MixerUpdateSettingsFrame` has the following settings available: `sound`
|
||||
(str) and `volume` (float) to be able to update to a different sound file or
|
||||
to change the volume at runtime.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@@ -46,6 +54,16 @@ class SoundfileMixer(BaseAudioMixer):
|
||||
loop: bool = True,
|
||||
**kwargs,
|
||||
):
|
||||
"""Initialize the soundfile mixer.
|
||||
|
||||
Args:
|
||||
sound_files: Mapping of sound names to file paths for loading.
|
||||
default_sound: Name of the default sound to play initially.
|
||||
volume: Mixing volume level (0.0 to 1.0). Defaults to 0.4.
|
||||
mixing: Whether mixing is initially enabled. Defaults to True.
|
||||
loop: Whether to loop audio files when they end. Defaults to True.
|
||||
**kwargs: Additional arguments passed to parent class.
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
self._sound_files = sound_files
|
||||
self._volume = volume
|
||||
@@ -58,14 +76,28 @@ class SoundfileMixer(BaseAudioMixer):
|
||||
self._loop = loop
|
||||
|
||||
async def start(self, sample_rate: int):
|
||||
"""Initialize the mixer and load all sound files.
|
||||
|
||||
Args:
|
||||
sample_rate: The sample rate of the output transport in Hz.
|
||||
"""
|
||||
self._sample_rate = sample_rate
|
||||
for sound_name, file_name in self._sound_files.items():
|
||||
await asyncio.to_thread(self._load_sound_file, sound_name, file_name)
|
||||
|
||||
async def stop(self):
|
||||
"""Clean up mixer resources.
|
||||
|
||||
Currently performs no cleanup as sound data is managed by garbage collection.
|
||||
"""
|
||||
pass
|
||||
|
||||
async def process_frame(self, frame: MixerControlFrame):
|
||||
"""Process mixer control frames to update settings or enable/disable mixing.
|
||||
|
||||
Args:
|
||||
frame: The mixer control frame to process.
|
||||
"""
|
||||
if isinstance(frame, MixerUpdateSettingsFrame):
|
||||
await self._update_settings(frame)
|
||||
elif isinstance(frame, MixerEnableFrame):
|
||||
@@ -73,12 +105,22 @@ class SoundfileMixer(BaseAudioMixer):
|
||||
pass
|
||||
|
||||
async def mix(self, audio: bytes) -> bytes:
|
||||
"""Mix transport audio with the current sound file.
|
||||
|
||||
Args:
|
||||
audio: Raw audio bytes from the transport to mix.
|
||||
|
||||
Returns:
|
||||
Mixed audio bytes combining transport and file audio.
|
||||
"""
|
||||
return self._mix_with_sound(audio)
|
||||
|
||||
async def _enable_mixing(self, enable: bool):
|
||||
"""Enable or disable audio mixing."""
|
||||
self._mixing = enable
|
||||
|
||||
async def _update_settings(self, frame: MixerUpdateSettingsFrame):
|
||||
"""Update mixer settings from a control frame."""
|
||||
for setting, value in frame.settings.items():
|
||||
match setting:
|
||||
case "sound":
|
||||
@@ -89,6 +131,11 @@ class SoundfileMixer(BaseAudioMixer):
|
||||
await self._update_loop(value)
|
||||
|
||||
async def _change_sound(self, sound: str):
|
||||
"""Change the currently playing sound file.
|
||||
|
||||
Args:
|
||||
sound: Name of the sound file to switch to.
|
||||
"""
|
||||
if sound in self._sound_files:
|
||||
self._current_sound = sound
|
||||
self._sound_pos = 0
|
||||
@@ -96,12 +143,15 @@ class SoundfileMixer(BaseAudioMixer):
|
||||
logger.error(f"Sound {sound} is not available")
|
||||
|
||||
async def _update_volume(self, volume: float):
|
||||
"""Update the mixing volume level."""
|
||||
self._volume = volume
|
||||
|
||||
async def _update_loop(self, loop: bool):
|
||||
"""Update the looping behavior."""
|
||||
self._loop = loop
|
||||
|
||||
def _load_sound_file(self, sound_name: str, file_name: str):
|
||||
"""Load an audio file into memory for mixing."""
|
||||
try:
|
||||
logger.debug(f"Loading mixer sound from {file_name}")
|
||||
sound, sample_rate = sf.read(file_name, dtype="int16")
|
||||
@@ -118,10 +168,7 @@ class SoundfileMixer(BaseAudioMixer):
|
||||
logger.error(f"Unable to open file {file_name}: {e}")
|
||||
|
||||
def _mix_with_sound(self, audio: bytes):
|
||||
"""Mixes raw audio frames with chunks of the same length from the sound
|
||||
file.
|
||||
|
||||
"""
|
||||
"""Mix raw audio frames with chunks of the same length from the sound file."""
|
||||
if not self._mixing or not self._current_sound in self._sounds:
|
||||
return audio
|
||||
|
||||
|
||||
@@ -4,27 +4,35 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Base audio resampler interface for Pipecat.
|
||||
|
||||
This module defines the abstract base class for audio resampling implementations,
|
||||
providing a common interface for converting audio between different sample rates.
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
|
||||
class BaseAudioResampler(ABC):
|
||||
"""Abstract base class for audio resampling. This class defines an
|
||||
interface for audio resampling implementations.
|
||||
"""Abstract base class for audio resampling implementations.
|
||||
|
||||
This class defines the interface that all audio resampling implementations
|
||||
must follow, providing a standardized way to convert audio data between
|
||||
different sample rates.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
async def resample(self, audio: bytes, in_rate: int, out_rate: int) -> bytes:
|
||||
"""
|
||||
Resamples the given audio data to a different sample rate.
|
||||
"""Resamples the given audio data to a different sample rate.
|
||||
|
||||
This is an abstract method that must be implemented in subclasses.
|
||||
|
||||
Parameters:
|
||||
audio (bytes): The audio data to be resampled, represented as a byte string.
|
||||
in_rate (int): The original sample rate of the audio data (in Hz).
|
||||
out_rate (int): The desired sample rate for the resampled audio data (in Hz).
|
||||
Args:
|
||||
audio: The audio data to be resampled, as raw bytes.
|
||||
in_rate: The original sample rate of the audio data in Hz.
|
||||
out_rate: The desired sample rate for the output audio in Hz.
|
||||
|
||||
Returns:
|
||||
bytes: The resampled audio data as a byte string.
|
||||
The resampled audio data as raw bytes.
|
||||
"""
|
||||
pass
|
||||
|
||||
@@ -4,6 +4,12 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Resampy-based audio resampler implementation.
|
||||
|
||||
This module provides an audio resampler that uses the resampy library
|
||||
for high-quality audio sample rate conversion.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import resampy
|
||||
|
||||
@@ -11,12 +17,31 @@ from pipecat.audio.resamplers.base_audio_resampler import BaseAudioResampler
|
||||
|
||||
|
||||
class ResampyResampler(BaseAudioResampler):
|
||||
"""Audio resampler implementation using the resampy library."""
|
||||
"""Audio resampler implementation using the resampy library.
|
||||
|
||||
This resampler uses the resampy library's Kaiser windowing filter
|
||||
for high-quality audio resampling with good performance characteristics.
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
"""Initialize the resampy resampler.
|
||||
|
||||
Args:
|
||||
**kwargs: Additional keyword arguments (currently unused).
|
||||
"""
|
||||
pass
|
||||
|
||||
async def resample(self, audio: bytes, in_rate: int, out_rate: int) -> bytes:
|
||||
"""Resample audio data using resampy library.
|
||||
|
||||
Args:
|
||||
audio: Input audio data as raw bytes (16-bit signed integers).
|
||||
in_rate: Original sample rate in Hz.
|
||||
out_rate: Target sample rate in Hz.
|
||||
|
||||
Returns:
|
||||
Resampled audio data as raw bytes (16-bit signed integers).
|
||||
"""
|
||||
if in_rate == out_rate:
|
||||
return audio
|
||||
audio_data = np.frombuffer(audio, dtype=np.int16)
|
||||
|
||||
@@ -4,6 +4,17 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""SoX-based audio resampler implementation.
|
||||
|
||||
This module provides an audio resampler that uses the SoX resampler library
|
||||
for very high-quality audio sample rate conversion.
|
||||
|
||||
When to use the SOXRAudioResampler:
|
||||
1. For batch processing of complete audio files
|
||||
2. When you have all the audio data available at once
|
||||
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import soxr
|
||||
|
||||
@@ -11,12 +22,32 @@ from pipecat.audio.resamplers.base_audio_resampler import BaseAudioResampler
|
||||
|
||||
|
||||
class SOXRAudioResampler(BaseAudioResampler):
|
||||
"""Audio resampler implementation using the SoX resampler library."""
|
||||
"""Audio resampler implementation using the SoX resampler library.
|
||||
|
||||
This resampler uses the SoX resampler library configured for very high
|
||||
quality (VHQ) resampling, providing excellent audio quality at the cost
|
||||
of additional computational overhead.
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
"""Initialize the SoX audio resampler.
|
||||
|
||||
Args:
|
||||
**kwargs: Additional keyword arguments (currently unused).
|
||||
"""
|
||||
pass
|
||||
|
||||
async def resample(self, audio: bytes, in_rate: int, out_rate: int) -> bytes:
|
||||
"""Resample audio data using SoX resampler library.
|
||||
|
||||
Args:
|
||||
audio: Input audio data as raw bytes (16-bit signed integers).
|
||||
in_rate: Original sample rate in Hz.
|
||||
out_rate: Target sample rate in Hz.
|
||||
|
||||
Returns:
|
||||
Resampled audio data as raw bytes (16-bit signed integers).
|
||||
"""
|
||||
if in_rate == out_rate:
|
||||
return audio
|
||||
audio_data = np.frombuffer(audio, dtype=np.int16)
|
||||
|
||||
101
src/pipecat/audio/resamplers/soxr_stream_resampler.py
Normal file
101
src/pipecat/audio/resamplers/soxr_stream_resampler.py
Normal file
@@ -0,0 +1,101 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""SoX-based audio resampler stream implementation.
|
||||
|
||||
This module provides an audio resampler that uses the SoX ResampleStream library
|
||||
for very high quality audio sample rate conversion.
|
||||
|
||||
When to use the SOXRStreamAudioResampler:
|
||||
1. For real-time processing scenarios
|
||||
2. When dealing with very long audio signals
|
||||
3. When processing audio in chunks or streams
|
||||
4. When you need to reuse the same resampler configuration multiple times, as it saves initialization overhead
|
||||
|
||||
"""
|
||||
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
import soxr
|
||||
|
||||
from pipecat.audio.resamplers.base_audio_resampler import BaseAudioResampler
|
||||
|
||||
CLEAR_STREAM_AFTER_SECS = 0.2
|
||||
|
||||
|
||||
class SOXRStreamAudioResampler(BaseAudioResampler):
|
||||
"""Audio resampler implementation using the SoX ResampleStream library.
|
||||
|
||||
This resampler uses the SoX ResampleStream library configured for very high
|
||||
quality (VHQ) resampling, providing excellent audio quality at the cost
|
||||
of additional computational overhead.
|
||||
It keeps an internal history which avoids clicks at chunk boundaries.
|
||||
|
||||
Notes:
|
||||
- Only supports mono audio (1 channel).
|
||||
- Input must be 16-bit signed PCM audio as raw bytes.
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
"""Initialize the resampler.
|
||||
|
||||
Args:
|
||||
**kwargs: Additional keyword arguments (currently unused).
|
||||
"""
|
||||
self._in_rate: float | None = None
|
||||
self._out_rate: float | None = None
|
||||
self._last_resample_time: float = 0
|
||||
self._soxr_stream: soxr.ResampleStream | None = None
|
||||
|
||||
def _initialize(self, in_rate: float, out_rate: float):
|
||||
self._in_rate = in_rate
|
||||
self._out_rate = out_rate
|
||||
self._last_resample_time = time.time()
|
||||
self._soxr_stream = soxr.ResampleStream(
|
||||
in_rate=in_rate, out_rate=out_rate, num_channels=1, quality="VHQ", dtype="int16"
|
||||
)
|
||||
|
||||
def _maybe_clear_internal_state(self):
|
||||
current_time = time.time()
|
||||
time_since_last_resample = current_time - self._last_resample_time
|
||||
# If more than CLEAR_STREAM_AFTER_SECS seconds have passed, clear the resampler state
|
||||
if time_since_last_resample > CLEAR_STREAM_AFTER_SECS:
|
||||
if self._soxr_stream:
|
||||
self._soxr_stream.clear()
|
||||
self._last_resample_time = current_time
|
||||
|
||||
def _maybe_initialize_sox_stream(self, in_rate: int, out_rate: int):
|
||||
if self._soxr_stream is None:
|
||||
self._initialize(in_rate, out_rate)
|
||||
else:
|
||||
self._maybe_clear_internal_state()
|
||||
|
||||
if self._in_rate != in_rate or self._out_rate != out_rate:
|
||||
raise ValueError(
|
||||
f"SOXRStreamAudioResampler cannot be reused with different sample rates: "
|
||||
f"expected {self._in_rate}->{self._out_rate}, got {in_rate}->{out_rate}"
|
||||
)
|
||||
|
||||
async def resample(self, audio: bytes, in_rate: int, out_rate: int) -> bytes:
|
||||
"""Resample audio data using soxr.ResampleStream resampler library.
|
||||
|
||||
Args:
|
||||
audio: Input audio data as raw bytes (16-bit signed integers).
|
||||
in_rate: Original sample rate in Hz.
|
||||
out_rate: Target sample rate in Hz.
|
||||
|
||||
Returns:
|
||||
Resampled audio data as raw bytes (16-bit signed integers).
|
||||
"""
|
||||
if in_rate == out_rate:
|
||||
return audio
|
||||
|
||||
self._maybe_initialize_sox_stream(in_rate, out_rate)
|
||||
audio_data = np.frombuffer(audio, dtype=np.int16)
|
||||
resampled_audio = self._soxr_stream.resample_chunk(audio_data)
|
||||
result = resampled_audio.astype(np.int16).tobytes()
|
||||
return result
|
||||
@@ -4,6 +4,12 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Base turn analyzer for determining end-of-turn in audio conversations.
|
||||
|
||||
This module provides the abstract base class and enumeration for analyzing
|
||||
when a user has finished speaking in a conversation.
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from enum import Enum
|
||||
from typing import Optional, Tuple
|
||||
@@ -12,6 +18,13 @@ from pipecat.metrics.metrics import MetricsData
|
||||
|
||||
|
||||
class EndOfTurnState(Enum):
|
||||
"""State enumeration for end-of-turn analysis results.
|
||||
|
||||
Parameters:
|
||||
COMPLETE: The user has finished their turn and stopped speaking.
|
||||
INCOMPLETE: The user is still speaking or may continue speaking.
|
||||
"""
|
||||
|
||||
COMPLETE = 1
|
||||
INCOMPLETE = 2
|
||||
|
||||
@@ -24,6 +37,12 @@ class BaseTurnAnalyzer(ABC):
|
||||
"""
|
||||
|
||||
def __init__(self, *, sample_rate: Optional[int] = None):
|
||||
"""Initialize the turn analyzer.
|
||||
|
||||
Args:
|
||||
sample_rate: Optional initial sample rate for audio processing.
|
||||
If provided, this will be used as the fixed sample rate.
|
||||
"""
|
||||
self._init_sample_rate = sample_rate
|
||||
self._sample_rate = 0
|
||||
|
||||
|
||||
@@ -4,6 +4,13 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Smart turn analyzer base class using ML models for end-of-turn detection.
|
||||
|
||||
This module provides the base implementation for smart turn analyzers that use
|
||||
machine learning models to determine when a user has finished speaking, going
|
||||
beyond simple silence-based detection.
|
||||
"""
|
||||
|
||||
import time
|
||||
from abc import abstractmethod
|
||||
from typing import Any, Dict, Optional, Tuple
|
||||
@@ -23,6 +30,14 @@ USE_ONLY_LAST_VAD_SEGMENT = True
|
||||
|
||||
|
||||
class SmartTurnParams(BaseModel):
|
||||
"""Configuration parameters for smart turn analysis.
|
||||
|
||||
Parameters:
|
||||
stop_secs: Maximum silence duration in seconds before ending turn.
|
||||
pre_speech_ms: Milliseconds of audio to include before speech starts.
|
||||
max_duration_secs: Maximum duration in seconds for audio segments.
|
||||
"""
|
||||
|
||||
stop_secs: float = STOP_SECS
|
||||
pre_speech_ms: float = PRE_SPEECH_MS
|
||||
max_duration_secs: float = MAX_DURATION_SECONDS
|
||||
@@ -31,13 +46,28 @@ class SmartTurnParams(BaseModel):
|
||||
|
||||
|
||||
class SmartTurnTimeoutException(Exception):
|
||||
"""Exception raised when smart turn analysis times out."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class BaseSmartTurn(BaseTurnAnalyzer):
|
||||
"""Base class for smart turn analyzers using ML models.
|
||||
|
||||
Provides common functionality for smart turn detection including audio
|
||||
buffering, speech tracking, and ML model integration. Subclasses must
|
||||
implement the specific model prediction logic.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, *, sample_rate: Optional[int] = None, params: Optional[SmartTurnParams] = None
|
||||
):
|
||||
"""Initialize the smart turn analyzer.
|
||||
|
||||
Args:
|
||||
sample_rate: Optional sample rate for audio processing.
|
||||
params: Configuration parameters for turn analysis behavior.
|
||||
"""
|
||||
super().__init__(sample_rate=sample_rate)
|
||||
self._params = params or SmartTurnParams()
|
||||
# Configuration
|
||||
@@ -50,9 +80,23 @@ class BaseSmartTurn(BaseTurnAnalyzer):
|
||||
|
||||
@property
|
||||
def speech_triggered(self) -> bool:
|
||||
"""Check if speech has been detected and triggered analysis.
|
||||
|
||||
Returns:
|
||||
True if speech has been detected and turn analysis is active.
|
||||
"""
|
||||
return self._speech_triggered
|
||||
|
||||
def append_audio(self, buffer: bytes, is_speech: bool) -> EndOfTurnState:
|
||||
"""Append audio data for turn analysis.
|
||||
|
||||
Args:
|
||||
buffer: Raw audio data bytes to append for analysis.
|
||||
is_speech: Whether the audio buffer contains detected speech.
|
||||
|
||||
Returns:
|
||||
Current end-of-turn state after processing the audio.
|
||||
"""
|
||||
# Convert raw audio to float32 format and append to the buffer
|
||||
audio_int16 = np.frombuffer(buffer, dtype=np.int16)
|
||||
audio_float32 = np.frombuffer(audio_int16, dtype=np.int16).astype(np.float32) / 32768.0
|
||||
@@ -92,6 +136,12 @@ class BaseSmartTurn(BaseTurnAnalyzer):
|
||||
return state
|
||||
|
||||
async def analyze_end_of_turn(self) -> Tuple[EndOfTurnState, Optional[MetricsData]]:
|
||||
"""Analyze the current audio state to determine if turn has ended.
|
||||
|
||||
Returns:
|
||||
Tuple containing the end-of-turn state and optional metrics data
|
||||
from the ML model analysis.
|
||||
"""
|
||||
state, result = await self._process_speech_segment(self._audio_buffer)
|
||||
if state == EndOfTurnState.COMPLETE or USE_ONLY_LAST_VAD_SEGMENT:
|
||||
self._clear(state)
|
||||
@@ -99,9 +149,11 @@ class BaseSmartTurn(BaseTurnAnalyzer):
|
||||
return state, result
|
||||
|
||||
def clear(self):
|
||||
"""Reset the turn analyzer to its initial state."""
|
||||
self._clear(EndOfTurnState.COMPLETE)
|
||||
|
||||
def _clear(self, turn_state: EndOfTurnState):
|
||||
"""Clear internal state based on turn completion status."""
|
||||
# If the state is still incomplete, keep the _speech_triggered as True
|
||||
self._speech_triggered = turn_state == EndOfTurnState.INCOMPLETE
|
||||
self._audio_buffer = []
|
||||
@@ -111,6 +163,7 @@ class BaseSmartTurn(BaseTurnAnalyzer):
|
||||
async def _process_speech_segment(
|
||||
self, audio_buffer
|
||||
) -> Tuple[EndOfTurnState, Optional[MetricsData]]:
|
||||
"""Process accumulated audio segment using ML model."""
|
||||
state = EndOfTurnState.INCOMPLETE
|
||||
|
||||
if not audio_buffer:
|
||||
@@ -188,14 +241,5 @@ class BaseSmartTurn(BaseTurnAnalyzer):
|
||||
|
||||
@abstractmethod
|
||||
async def _predict_endpoint(self, audio_array: np.ndarray) -> Dict[str, Any]:
|
||||
"""Abstract method to predict if a turn has ended based on audio.
|
||||
|
||||
Args:
|
||||
audio_array: Float32 numpy array of audio samples at 16kHz.
|
||||
|
||||
Returns:
|
||||
Dictionary with:
|
||||
- prediction: 1 if turn is complete, else 0
|
||||
- probability: Confidence of the prediction
|
||||
"""
|
||||
"""Predict end-of-turn using ML model from audio data."""
|
||||
pass
|
||||
|
||||
@@ -4,6 +4,16 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Fal.ai smart turn analyzer implementation.
|
||||
|
||||
This module provides a smart turn analyzer that uses Fal.ai's hosted smart-turn model
|
||||
for end-of-turn detection in conversations.
|
||||
|
||||
Note: To learn more about the smart-turn model, visit:
|
||||
- https://fal.ai/models/fal-ai/smart-turn/playground
|
||||
- https://github.com/pipecat-ai/smart-turn
|
||||
"""
|
||||
|
||||
from typing import Optional
|
||||
|
||||
import aiohttp
|
||||
@@ -12,6 +22,12 @@ from pipecat.audio.turn.smart_turn.http_smart_turn import HttpSmartTurnAnalyzer
|
||||
|
||||
|
||||
class FalSmartTurnAnalyzer(HttpSmartTurnAnalyzer):
|
||||
"""Smart turn analyzer using Fal.ai's hosted smart-turn model.
|
||||
|
||||
Extends HttpSmartTurnAnalyzer to provide integration with Fal.ai's
|
||||
smart turn detection API endpoint with proper authentication.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
@@ -20,6 +36,14 @@ class FalSmartTurnAnalyzer(HttpSmartTurnAnalyzer):
|
||||
api_key: Optional[str] = None,
|
||||
**kwargs,
|
||||
):
|
||||
"""Initialize the Fal.ai smart turn analyzer.
|
||||
|
||||
Args:
|
||||
aiohttp_session: HTTP client session for making API requests.
|
||||
url: Fal.ai API endpoint URL for smart turn detection.
|
||||
api_key: API key for authenticating with Fal.ai service.
|
||||
**kwargs: Additional arguments passed to parent HttpSmartTurnAnalyzer.
|
||||
"""
|
||||
headers = {}
|
||||
if api_key:
|
||||
headers = {"Authorization": f"Key {api_key}"}
|
||||
|
||||
@@ -4,6 +4,12 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""HTTP-based smart turn analyzer for remote ML inference.
|
||||
|
||||
This module provides a smart turn analyzer that sends audio data to remote
|
||||
HTTP endpoints for ML-based end-of-turn detection.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import io
|
||||
from typing import Any, Dict, Optional
|
||||
@@ -16,6 +22,12 @@ from pipecat.audio.turn.smart_turn.base_smart_turn import BaseSmartTurn, SmartTu
|
||||
|
||||
|
||||
class HttpSmartTurnAnalyzer(BaseSmartTurn):
|
||||
"""Smart turn analyzer using HTTP-based ML inference.
|
||||
|
||||
Sends audio data to remote HTTP endpoints for ML-based end-of-turn
|
||||
prediction. Handles serialization, HTTP communication, and error recovery.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
@@ -24,12 +36,21 @@ class HttpSmartTurnAnalyzer(BaseSmartTurn):
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
**kwargs,
|
||||
):
|
||||
"""Initialize the HTTP smart turn analyzer.
|
||||
|
||||
Args:
|
||||
url: HTTP endpoint URL for the smart turn ML service.
|
||||
aiohttp_session: HTTP client session for making requests.
|
||||
headers: Optional HTTP headers to include in requests.
|
||||
**kwargs: Additional arguments passed to BaseSmartTurn.
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
self._url = url
|
||||
self._headers = headers or {}
|
||||
self._aiohttp_session = aiohttp_session
|
||||
|
||||
def _serialize_array(self, audio_array: np.ndarray) -> bytes:
|
||||
"""Serialize NumPy audio array to bytes for HTTP transmission."""
|
||||
logger.trace("Serializing NumPy array to bytes...")
|
||||
buffer = io.BytesIO()
|
||||
np.save(buffer, audio_array)
|
||||
@@ -38,6 +59,7 @@ class HttpSmartTurnAnalyzer(BaseSmartTurn):
|
||||
return serialized_bytes
|
||||
|
||||
async def _send_raw_request(self, data_bytes: bytes) -> Dict[str, Any]:
|
||||
"""Send raw audio data to the HTTP endpoint for prediction."""
|
||||
headers = {"Content-Type": "application/octet-stream"}
|
||||
headers.update(self._headers)
|
||||
|
||||
@@ -83,6 +105,7 @@ class HttpSmartTurnAnalyzer(BaseSmartTurn):
|
||||
raise Exception("Failed to send raw request to Daily Smart Turn.")
|
||||
|
||||
async def _predict_endpoint(self, audio_array: np.ndarray) -> Dict[str, Any]:
|
||||
"""Predict end-of-turn using remote HTTP ML service."""
|
||||
try:
|
||||
serialized_array = self._serialize_array(audio_array)
|
||||
return await self._send_raw_request(serialized_array)
|
||||
|
||||
@@ -4,6 +4,11 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Local CoreML smart turn analyzer for on-device ML inference.
|
||||
|
||||
This module provides a smart turn analyzer that uses CoreML models for
|
||||
local end-of-turn detection without requiring network connectivity.
|
||||
"""
|
||||
|
||||
from typing import Any, Dict
|
||||
|
||||
@@ -25,7 +30,24 @@ except ModuleNotFoundError as e:
|
||||
|
||||
|
||||
class LocalCoreMLSmartTurnAnalyzer(BaseSmartTurn):
|
||||
"""Local smart turn analyzer using CoreML models.
|
||||
|
||||
Provides end-of-turn detection using locally-stored CoreML models,
|
||||
enabling offline operation without network dependencies. Optimized
|
||||
for Apple Silicon and other CoreML-compatible hardware.
|
||||
"""
|
||||
|
||||
def __init__(self, *, smart_turn_model_path: str, **kwargs):
|
||||
"""Initialize the local CoreML smart turn analyzer.
|
||||
|
||||
Args:
|
||||
smart_turn_model_path: Path to directory containing the CoreML model
|
||||
and feature extractor files.
|
||||
**kwargs: Additional arguments passed to BaseSmartTurn.
|
||||
|
||||
Raises:
|
||||
Exception: If smart_turn_model_path is not provided or model loading fails.
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
|
||||
if not smart_turn_model_path:
|
||||
@@ -41,6 +63,7 @@ class LocalCoreMLSmartTurnAnalyzer(BaseSmartTurn):
|
||||
logger.debug("Loaded Local Smart Turn")
|
||||
|
||||
async def _predict_endpoint(self, audio_array: np.ndarray) -> Dict[str, Any]:
|
||||
"""Predict end-of-turn using local CoreML model."""
|
||||
inputs = self._turn_processor(
|
||||
audio_array,
|
||||
sampling_rate=16000,
|
||||
|
||||
@@ -4,6 +4,11 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Local PyTorch smart turn analyzer for on-device ML inference.
|
||||
|
||||
This module provides a smart turn analyzer that uses PyTorch models for
|
||||
local end-of-turn detection without requiring network connectivity.
|
||||
"""
|
||||
|
||||
from typing import Any, Dict
|
||||
|
||||
@@ -24,7 +29,21 @@ except ModuleNotFoundError as e:
|
||||
|
||||
|
||||
class LocalSmartTurnAnalyzer(BaseSmartTurn):
|
||||
"""Local smart turn analyzer using PyTorch models.
|
||||
|
||||
Provides end-of-turn detection using locally-stored PyTorch models,
|
||||
enabling offline operation without network dependencies. Uses
|
||||
Wav2Vec2-BERT architecture for audio sequence classification.
|
||||
"""
|
||||
|
||||
def __init__(self, *, smart_turn_model_path: str, **kwargs):
|
||||
"""Initialize the local PyTorch smart turn analyzer.
|
||||
|
||||
Args:
|
||||
smart_turn_model_path: Path to directory containing the PyTorch model
|
||||
and feature extractor files. If empty, uses default HuggingFace model.
|
||||
**kwargs: Additional arguments passed to BaseSmartTurn.
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
|
||||
if not smart_turn_model_path:
|
||||
@@ -46,6 +65,7 @@ class LocalSmartTurnAnalyzer(BaseSmartTurn):
|
||||
logger.debug("Loaded Local Smart Turn")
|
||||
|
||||
async def _predict_endpoint(self, audio_array: np.ndarray) -> Dict[str, Any]:
|
||||
"""Predict end-of-turn using local PyTorch model."""
|
||||
inputs = self._turn_processor(
|
||||
audio_array,
|
||||
sampling_rate=16000,
|
||||
|
||||
@@ -4,21 +4,87 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Audio utility functions for Pipecat.
|
||||
|
||||
This module provides common audio processing utilities including mixing,
|
||||
format conversion, volume calculation, and codec transformations for
|
||||
various audio formats used in Pipecat pipelines.
|
||||
"""
|
||||
|
||||
import audioop
|
||||
|
||||
import numpy as np
|
||||
import pyloudnorm as pyln
|
||||
import soxr
|
||||
|
||||
from pipecat.audio.resamplers.base_audio_resampler import BaseAudioResampler
|
||||
from pipecat.audio.resamplers.soxr_resampler import SOXRAudioResampler
|
||||
from pipecat.audio.resamplers.soxr_stream_resampler import SOXRStreamAudioResampler
|
||||
|
||||
|
||||
def create_default_resampler(**kwargs) -> BaseAudioResampler:
|
||||
"""Create a default audio resampler instance.
|
||||
|
||||
. deprecated:: 0.0.74
|
||||
This function is deprecated and will be removed in a future version.
|
||||
Use `create_stream_resampler` for real-time processing scenarios or
|
||||
`create_file_resampler` for batch processing of complete audio files.
|
||||
|
||||
Args:
|
||||
**kwargs: Additional keyword arguments passed to the resampler constructor.
|
||||
|
||||
Returns:
|
||||
A configured SOXRAudioResampler instance.
|
||||
"""
|
||||
import warnings
|
||||
|
||||
warnings.warn(
|
||||
"`create_default_resampler` is deprecated. "
|
||||
"Use `create_stream_resampler` for real-time processing scenarios or "
|
||||
"`create_file_resampler` for batch processing of complete audio files.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
return SOXRAudioResampler(**kwargs)
|
||||
|
||||
|
||||
def create_file_resampler(**kwargs) -> BaseAudioResampler:
|
||||
"""Create an audio resampler instance for batch processing of complete audio files.
|
||||
|
||||
Args:
|
||||
**kwargs: Additional keyword arguments passed to the resampler constructor.
|
||||
|
||||
Returns:
|
||||
A configured SOXRAudioResampler instance.
|
||||
"""
|
||||
return SOXRAudioResampler(**kwargs)
|
||||
|
||||
|
||||
def create_stream_resampler(**kwargs) -> BaseAudioResampler:
|
||||
"""Create a stream audio resampler instance.
|
||||
|
||||
Args:
|
||||
**kwargs: Additional keyword arguments passed to the resampler constructor.
|
||||
|
||||
Returns:
|
||||
A configured SOXRStreamAudioResampler instance.
|
||||
"""
|
||||
return SOXRStreamAudioResampler(**kwargs)
|
||||
|
||||
|
||||
def mix_audio(audio1: bytes, audio2: bytes) -> bytes:
|
||||
"""Mix two audio streams together by adding their samples.
|
||||
|
||||
Both audio streams are assumed to be 16-bit signed integer PCM data.
|
||||
If the streams have different lengths, the shorter one is zero-padded
|
||||
to match the longer stream.
|
||||
|
||||
Args:
|
||||
audio1: First audio stream as raw bytes (16-bit signed integers).
|
||||
audio2: Second audio stream as raw bytes (16-bit signed integers).
|
||||
|
||||
Returns:
|
||||
Mixed audio data as raw bytes with samples clipped to 16-bit range.
|
||||
"""
|
||||
data1 = np.frombuffer(audio1, dtype=np.int16)
|
||||
data2 = np.frombuffer(audio2, dtype=np.int16)
|
||||
|
||||
@@ -37,6 +103,19 @@ def mix_audio(audio1: bytes, audio2: bytes) -> bytes:
|
||||
|
||||
|
||||
def interleave_stereo_audio(left_audio: bytes, right_audio: bytes) -> bytes:
|
||||
"""Interleave left and right mono audio channels into stereo audio.
|
||||
|
||||
Takes two mono audio streams and combines them into a single stereo
|
||||
stream by interleaving the samples (L, R, L, R, ...). If the channels
|
||||
have different lengths, both are truncated to the shorter length.
|
||||
|
||||
Args:
|
||||
left_audio: Left channel audio as raw bytes (16-bit signed integers).
|
||||
right_audio: Right channel audio as raw bytes (16-bit signed integers).
|
||||
|
||||
Returns:
|
||||
Interleaved stereo audio data as raw bytes.
|
||||
"""
|
||||
left = np.frombuffer(left_audio, dtype=np.int16)
|
||||
right = np.frombuffer(right_audio, dtype=np.int16)
|
||||
|
||||
@@ -50,12 +129,34 @@ def interleave_stereo_audio(left_audio: bytes, right_audio: bytes) -> bytes:
|
||||
|
||||
|
||||
def normalize_value(value, min_value, max_value):
|
||||
"""Normalize a value to the range [0, 1] and clamp it to bounds.
|
||||
|
||||
Args:
|
||||
value: The value to normalize.
|
||||
min_value: The minimum value of the input range.
|
||||
max_value: The maximum value of the input range.
|
||||
|
||||
Returns:
|
||||
Normalized value clamped to the range [0, 1].
|
||||
"""
|
||||
normalized = (value - min_value) / (max_value - min_value)
|
||||
normalized_clamped = max(0, min(1, normalized))
|
||||
return normalized_clamped
|
||||
|
||||
|
||||
def calculate_audio_volume(audio: bytes, sample_rate: int) -> float:
|
||||
"""Calculate the loudness level of audio data using EBU R128 standard.
|
||||
|
||||
Uses the pyloudnorm library to calculate integrated loudness according
|
||||
to the EBU R128 recommendation, then normalizes the result to [0, 1].
|
||||
|
||||
Args:
|
||||
audio: Audio data as raw bytes (16-bit signed integers).
|
||||
sample_rate: Sample rate of the audio in Hz.
|
||||
|
||||
Returns:
|
||||
Normalized loudness value between 0 (quiet) and 1 (loud).
|
||||
"""
|
||||
audio_np = np.frombuffer(audio, dtype=np.int16)
|
||||
audio_float = audio_np.astype(np.float64)
|
||||
|
||||
@@ -71,12 +172,37 @@ def calculate_audio_volume(audio: bytes, sample_rate: int) -> float:
|
||||
|
||||
|
||||
def exp_smoothing(value: float, prev_value: float, factor: float) -> float:
|
||||
"""Apply exponential smoothing to a value.
|
||||
|
||||
Exponential smoothing is used to reduce noise in time-series data by
|
||||
giving more weight to recent values while still considering historical data.
|
||||
|
||||
Args:
|
||||
value: The new value to incorporate.
|
||||
prev_value: The previous smoothed value.
|
||||
factor: Smoothing factor between 0 and 1. Higher values give more
|
||||
weight to the new value.
|
||||
|
||||
Returns:
|
||||
The exponentially smoothed value.
|
||||
"""
|
||||
return prev_value + factor * (value - prev_value)
|
||||
|
||||
|
||||
async def ulaw_to_pcm(
|
||||
ulaw_bytes: bytes, in_rate: int, out_rate: int, resampler: BaseAudioResampler
|
||||
):
|
||||
"""Convert μ-law encoded audio to PCM and optionally resample.
|
||||
|
||||
Args:
|
||||
ulaw_bytes: μ-law encoded audio data as raw bytes.
|
||||
in_rate: Original sample rate of the μ-law audio in Hz.
|
||||
out_rate: Desired output sample rate in Hz.
|
||||
resampler: Audio resampler instance for rate conversion.
|
||||
|
||||
Returns:
|
||||
PCM audio data as raw bytes at the specified output rate.
|
||||
"""
|
||||
# Convert μ-law to PCM
|
||||
in_pcm_bytes = audioop.ulaw2lin(ulaw_bytes, 2)
|
||||
|
||||
@@ -87,6 +213,17 @@ async def ulaw_to_pcm(
|
||||
|
||||
|
||||
async def pcm_to_ulaw(pcm_bytes: bytes, in_rate: int, out_rate: int, resampler: BaseAudioResampler):
|
||||
"""Convert PCM audio to μ-law encoding and optionally resample.
|
||||
|
||||
Args:
|
||||
pcm_bytes: PCM audio data as raw bytes (16-bit signed integers).
|
||||
in_rate: Original sample rate of the PCM audio in Hz.
|
||||
out_rate: Desired output sample rate in Hz.
|
||||
resampler: Audio resampler instance for rate conversion.
|
||||
|
||||
Returns:
|
||||
μ-law encoded audio data as raw bytes at the specified output rate.
|
||||
"""
|
||||
# Resample
|
||||
in_pcm_bytes = await resampler.resample(pcm_bytes, in_rate, out_rate)
|
||||
|
||||
@@ -99,6 +236,17 @@ async def pcm_to_ulaw(pcm_bytes: bytes, in_rate: int, out_rate: int, resampler:
|
||||
async def alaw_to_pcm(
|
||||
alaw_bytes: bytes, in_rate: int, out_rate: int, resampler: BaseAudioResampler
|
||||
) -> bytes:
|
||||
"""Convert A-law encoded audio to PCM and optionally resample.
|
||||
|
||||
Args:
|
||||
alaw_bytes: A-law encoded audio data as raw bytes.
|
||||
in_rate: Original sample rate of the A-law audio in Hz.
|
||||
out_rate: Desired output sample rate in Hz.
|
||||
resampler: Audio resampler instance for rate conversion.
|
||||
|
||||
Returns:
|
||||
PCM audio data as raw bytes at the specified output rate.
|
||||
"""
|
||||
# Convert a-law to PCM
|
||||
in_pcm_bytes = audioop.alaw2lin(alaw_bytes, 2)
|
||||
|
||||
@@ -109,6 +257,17 @@ async def alaw_to_pcm(
|
||||
|
||||
|
||||
async def pcm_to_alaw(pcm_bytes: bytes, in_rate: int, out_rate: int, resampler: BaseAudioResampler):
|
||||
"""Convert PCM audio to A-law encoding and optionally resample.
|
||||
|
||||
Args:
|
||||
pcm_bytes: PCM audio data as raw bytes (16-bit signed integers).
|
||||
in_rate: Original sample rate of the PCM audio in Hz.
|
||||
out_rate: Desired output sample rate in Hz.
|
||||
resampler: Audio resampler instance for rate conversion.
|
||||
|
||||
Returns:
|
||||
A-law encoded audio data as raw bytes at the specified output rate.
|
||||
"""
|
||||
# Resample
|
||||
in_pcm_bytes = await resampler.resample(pcm_bytes, in_rate, out_rate)
|
||||
|
||||
|
||||
@@ -4,6 +4,13 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Silero Voice Activity Detection (VAD) implementation for Pipecat.
|
||||
|
||||
This module provides a VAD analyzer based on the Silero VAD ONNX model,
|
||||
which can detect voice activity in audio streams with high accuracy.
|
||||
Supports 8kHz and 16kHz sample rates.
|
||||
"""
|
||||
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
@@ -25,11 +32,20 @@ except ModuleNotFoundError as e:
|
||||
|
||||
|
||||
class SileroOnnxModel:
|
||||
"""ONNX runtime wrapper for the Silero VAD model.
|
||||
|
||||
Provides voice activity detection using the pre-trained Silero VAD model
|
||||
with ONNX runtime for efficient inference. Handles model state management
|
||||
and input validation for audio processing.
|
||||
"""
|
||||
|
||||
def __init__(self, path, force_onnx_cpu=True):
|
||||
import numpy as np
|
||||
|
||||
global np
|
||||
"""Initialize the Silero ONNX model.
|
||||
|
||||
Args:
|
||||
path: Path to the ONNX model file.
|
||||
force_onnx_cpu: Whether to force CPU execution provider.
|
||||
"""
|
||||
opts = onnxruntime.SessionOptions()
|
||||
opts.inter_op_num_threads = 1
|
||||
opts.intra_op_num_threads = 1
|
||||
@@ -45,6 +61,7 @@ class SileroOnnxModel:
|
||||
self.sample_rates = [8000, 16000]
|
||||
|
||||
def _validate_input(self, x, sr: int):
|
||||
"""Validate and preprocess input audio data."""
|
||||
if np.ndim(x) == 1:
|
||||
x = np.expand_dims(x, 0)
|
||||
if np.ndim(x) > 2:
|
||||
@@ -60,12 +77,18 @@ class SileroOnnxModel:
|
||||
return x, sr
|
||||
|
||||
def reset_states(self, batch_size=1):
|
||||
"""Reset the internal model states.
|
||||
|
||||
Args:
|
||||
batch_size: Batch size for state initialization. Defaults to 1.
|
||||
"""
|
||||
self._state = np.zeros((2, batch_size, 128), dtype="float32")
|
||||
self._context = np.zeros((batch_size, 0), dtype="float32")
|
||||
self._last_sr = 0
|
||||
self._last_batch_size = 0
|
||||
|
||||
def __call__(self, x, sr: int):
|
||||
"""Process audio input through the VAD model."""
|
||||
x, sr = self._validate_input(x, sr)
|
||||
num_samples = 512 if sr == 16000 else 256
|
||||
|
||||
@@ -105,7 +128,20 @@ class SileroOnnxModel:
|
||||
|
||||
|
||||
class SileroVADAnalyzer(VADAnalyzer):
|
||||
"""Voice Activity Detection analyzer using the Silero VAD model.
|
||||
|
||||
Implements VAD analysis using the pre-trained Silero ONNX model for
|
||||
accurate voice activity detection. Supports 8kHz and 16kHz sample rates
|
||||
with automatic model state management and periodic resets.
|
||||
"""
|
||||
|
||||
def __init__(self, *, sample_rate: Optional[int] = None, params: Optional[VADParams] = None):
|
||||
"""Initialize the Silero VAD analyzer.
|
||||
|
||||
Args:
|
||||
sample_rate: Audio sample rate (8000 or 16000 Hz). If None, will be set later.
|
||||
params: VAD parameters for detection thresholds and timing.
|
||||
"""
|
||||
super().__init__(sample_rate=sample_rate, params=params)
|
||||
|
||||
logger.debug("Loading Silero VAD model...")
|
||||
@@ -137,6 +173,14 @@ class SileroVADAnalyzer(VADAnalyzer):
|
||||
#
|
||||
|
||||
def set_sample_rate(self, sample_rate: int):
|
||||
"""Set the sample rate for audio processing.
|
||||
|
||||
Args:
|
||||
sample_rate: Audio sample rate (must be 8000 or 16000 Hz).
|
||||
|
||||
Raises:
|
||||
ValueError: If sample rate is not 8000 or 16000 Hz.
|
||||
"""
|
||||
if sample_rate != 16000 and sample_rate != 8000:
|
||||
raise ValueError(
|
||||
f"Silero VAD sample rate needs to be 16000 or 8000 (sample rate: {sample_rate})"
|
||||
@@ -145,9 +189,22 @@ class SileroVADAnalyzer(VADAnalyzer):
|
||||
super().set_sample_rate(sample_rate)
|
||||
|
||||
def num_frames_required(self) -> int:
|
||||
"""Get the number of audio frames required for VAD analysis.
|
||||
|
||||
Returns:
|
||||
Number of frames required (512 for 16kHz, 256 for 8kHz).
|
||||
"""
|
||||
return 512 if self.sample_rate == 16000 else 256
|
||||
|
||||
def voice_confidence(self, buffer) -> float:
|
||||
"""Calculate voice activity confidence for the given audio buffer.
|
||||
|
||||
Args:
|
||||
buffer: Audio buffer to analyze.
|
||||
|
||||
Returns:
|
||||
Voice confidence score between 0.0 and 1.0.
|
||||
"""
|
||||
try:
|
||||
audio_int16 = np.frombuffer(buffer, np.int16)
|
||||
# Divide by 32768 because we have signed 16-bit data.
|
||||
|
||||
@@ -4,6 +4,13 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Voice Activity Detection (VAD) analyzer base classes and utilities.
|
||||
|
||||
This module provides the abstract base class for VAD analyzers and associated
|
||||
data structures for voice activity detection in audio streams. Includes state
|
||||
management, parameter configuration, and audio analysis framework.
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
@@ -20,6 +27,15 @@ VAD_MIN_VOLUME = 0.6
|
||||
|
||||
|
||||
class VADState(Enum):
|
||||
"""Voice Activity Detection states.
|
||||
|
||||
Parameters:
|
||||
QUIET: No voice activity detected.
|
||||
STARTING: Voice activity beginning, transitioning from quiet.
|
||||
SPEAKING: Active voice detected and confirmed.
|
||||
STOPPING: Voice activity ending, transitioning to quiet.
|
||||
"""
|
||||
|
||||
QUIET = 1
|
||||
STARTING = 2
|
||||
SPEAKING = 3
|
||||
@@ -27,6 +43,15 @@ class VADState(Enum):
|
||||
|
||||
|
||||
class VADParams(BaseModel):
|
||||
"""Configuration parameters for Voice Activity Detection.
|
||||
|
||||
Parameters:
|
||||
confidence: Minimum confidence threshold for voice detection.
|
||||
start_secs: Duration to wait before confirming voice start.
|
||||
stop_secs: Duration to wait before confirming voice stop.
|
||||
min_volume: Minimum audio volume threshold for voice detection.
|
||||
"""
|
||||
|
||||
confidence: float = VAD_CONFIDENCE
|
||||
start_secs: float = VAD_START_SECS
|
||||
stop_secs: float = VAD_STOP_SECS
|
||||
@@ -34,7 +59,20 @@ class VADParams(BaseModel):
|
||||
|
||||
|
||||
class VADAnalyzer(ABC):
|
||||
"""Abstract base class for Voice Activity Detection analyzers.
|
||||
|
||||
Provides the framework for implementing VAD analysis with configurable
|
||||
parameters, state management, and audio processing capabilities.
|
||||
Subclasses must implement the core voice confidence calculation.
|
||||
"""
|
||||
|
||||
def __init__(self, *, sample_rate: Optional[int] = None, params: Optional[VADParams] = None):
|
||||
"""Initialize the VAD analyzer.
|
||||
|
||||
Args:
|
||||
sample_rate: Audio sample rate in Hz. If None, will be set later.
|
||||
params: VAD parameters for detection configuration.
|
||||
"""
|
||||
self._init_sample_rate = sample_rate
|
||||
self._sample_rate = 0
|
||||
self._params = params or VADParams()
|
||||
@@ -48,29 +86,67 @@ class VADAnalyzer(ABC):
|
||||
|
||||
@property
|
||||
def sample_rate(self) -> int:
|
||||
"""Get the current sample rate.
|
||||
|
||||
Returns:
|
||||
Current audio sample rate in Hz.
|
||||
"""
|
||||
return self._sample_rate
|
||||
|
||||
@property
|
||||
def num_channels(self) -> int:
|
||||
"""Get the number of audio channels.
|
||||
|
||||
Returns:
|
||||
Number of audio channels (always 1 for mono).
|
||||
"""
|
||||
return self._num_channels
|
||||
|
||||
@property
|
||||
def params(self) -> VADParams:
|
||||
"""Get the current VAD parameters.
|
||||
|
||||
Returns:
|
||||
Current VAD configuration parameters.
|
||||
"""
|
||||
return self._params
|
||||
|
||||
@abstractmethod
|
||||
def num_frames_required(self) -> int:
|
||||
"""Get the number of audio frames required for analysis.
|
||||
|
||||
Returns:
|
||||
Number of frames needed for VAD processing.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def voice_confidence(self, buffer) -> float:
|
||||
"""Calculate voice activity confidence for the given audio buffer.
|
||||
|
||||
Args:
|
||||
buffer: Audio buffer to analyze.
|
||||
|
||||
Returns:
|
||||
Voice confidence score between 0.0 and 1.0.
|
||||
"""
|
||||
pass
|
||||
|
||||
def set_sample_rate(self, sample_rate: int):
|
||||
"""Set the sample rate for audio processing.
|
||||
|
||||
Args:
|
||||
sample_rate: Audio sample rate in Hz.
|
||||
"""
|
||||
self._sample_rate = self._init_sample_rate or sample_rate
|
||||
self.set_params(self._params)
|
||||
|
||||
def set_params(self, params: VADParams):
|
||||
"""Set VAD parameters and recalculate internal values.
|
||||
|
||||
Args:
|
||||
params: VAD parameters for detection configuration.
|
||||
"""
|
||||
logger.debug(f"Setting VAD params to: {params}")
|
||||
self._params = params
|
||||
self._vad_frames = self.num_frames_required()
|
||||
@@ -85,10 +161,22 @@ class VADAnalyzer(ABC):
|
||||
self._vad_state: VADState = VADState.QUIET
|
||||
|
||||
def _get_smoothed_volume(self, audio: bytes) -> float:
|
||||
"""Calculate smoothed audio volume using exponential smoothing."""
|
||||
volume = calculate_audio_volume(audio, self.sample_rate)
|
||||
return exp_smoothing(volume, self._prev_volume, self._smoothing_factor)
|
||||
|
||||
def analyze_audio(self, buffer) -> VADState:
|
||||
"""Analyze audio buffer and return current VAD state.
|
||||
|
||||
Processes incoming audio data, maintains internal state, and determines
|
||||
voice activity status based on confidence and volume thresholds.
|
||||
|
||||
Args:
|
||||
buffer: Audio buffer to analyze.
|
||||
|
||||
Returns:
|
||||
Current VAD state after processing the buffer.
|
||||
"""
|
||||
self._vad_buffer += buffer
|
||||
|
||||
num_required_bytes = self._vad_frames_num_bytes
|
||||
|
||||
@@ -4,14 +4,33 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Base clock interface for Pipecat timing operations."""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
|
||||
class BaseClock(ABC):
|
||||
"""Abstract base class for clock implementations.
|
||||
|
||||
Provides a common interface for timing operations used in Pipecat
|
||||
for synchronization, scheduling, and time-based processing.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def get_time(self) -> int:
|
||||
"""Get the current time value.
|
||||
|
||||
Returns:
|
||||
The current time as an integer value. The specific unit and
|
||||
reference point depend on the concrete implementation.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def start(self):
|
||||
"""Start or initialize the clock.
|
||||
|
||||
Performs any necessary initialization or starts the timing mechanism.
|
||||
This method should be called before using get_time().
|
||||
"""
|
||||
pass
|
||||
|
||||
@@ -4,17 +4,42 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""System clock implementation for Pipecat."""
|
||||
|
||||
import time
|
||||
|
||||
from pipecat.clocks.base_clock import BaseClock
|
||||
|
||||
|
||||
class SystemClock(BaseClock):
|
||||
"""A monotonic clock implementation using system time.
|
||||
|
||||
Provides high-precision timing using the system's monotonic clock,
|
||||
which is not affected by system clock adjustments and is suitable
|
||||
for measuring elapsed time in real-time applications.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the system clock.
|
||||
|
||||
The clock starts in an uninitialized state and must be started
|
||||
explicitly using the start() method before time measurement begins.
|
||||
"""
|
||||
self._time = 0
|
||||
|
||||
def get_time(self) -> int:
|
||||
"""Get the elapsed time since the clock was started.
|
||||
|
||||
Returns:
|
||||
The elapsed time in nanoseconds since start() was called.
|
||||
Returns 0 if the clock has not been started yet.
|
||||
"""
|
||||
return time.monotonic_ns() - self._time if self._time > 0 else 0
|
||||
|
||||
def start(self):
|
||||
"""Start the clock and begin time measurement.
|
||||
|
||||
Records the current monotonic time as the reference point
|
||||
for all subsequent get_time() calls.
|
||||
"""
|
||||
self._time = time.monotonic_ns()
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Daily.co room configuration utilities for Pipecat examples."""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
from typing import Optional
|
||||
@@ -14,6 +16,17 @@ from pipecat.transports.services.helpers.daily_rest import DailyRESTHelper
|
||||
|
||||
|
||||
async def configure(aiohttp_session: aiohttp.ClientSession):
|
||||
"""Configure Daily.co room URL and token from arguments or environment.
|
||||
|
||||
Args:
|
||||
aiohttp_session: HTTP session for making API requests.
|
||||
|
||||
Returns:
|
||||
Tuple containing the room URL and authentication token.
|
||||
|
||||
Raises:
|
||||
Exception: If room URL or API key are not provided.
|
||||
"""
|
||||
(url, token, _) = await configure_with_args(aiohttp_session)
|
||||
return (url, token)
|
||||
|
||||
@@ -21,6 +34,18 @@ async def configure(aiohttp_session: aiohttp.ClientSession):
|
||||
async def configure_with_args(
|
||||
aiohttp_session: aiohttp.ClientSession, parser: Optional[argparse.ArgumentParser] = None
|
||||
):
|
||||
"""Configure Daily.co room with command-line argument parsing.
|
||||
|
||||
Args:
|
||||
aiohttp_session: HTTP session for making API requests.
|
||||
parser: Optional argument parser. If None, creates a default one.
|
||||
|
||||
Returns:
|
||||
Tuple containing room URL, authentication token, and parsed arguments.
|
||||
|
||||
Raises:
|
||||
Exception: If room URL or API key are not provided via arguments or environment.
|
||||
"""
|
||||
if not parser:
|
||||
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
|
||||
parser.add_argument(
|
||||
|
||||
@@ -4,10 +4,18 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Pipecat example runner with support for multiple transport types.
|
||||
|
||||
This module provides a unified interface for running Pipecat examples across
|
||||
different transport types including Daily.co, WebRTC, and Twilio. It handles
|
||||
setup, configuration, and lifecycle management for each transport type.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from contextlib import asynccontextmanager
|
||||
from typing import Any, Callable, Dict, Mapping, Optional
|
||||
@@ -35,6 +43,15 @@ load_dotenv(override=True)
|
||||
|
||||
|
||||
def get_transport_client_id(transport: BaseTransport, client: Any) -> str:
|
||||
"""Get client identifier from transport-specific client object.
|
||||
|
||||
Args:
|
||||
transport: The transport instance.
|
||||
client: Transport-specific client object.
|
||||
|
||||
Returns:
|
||||
Client identifier string, empty if transport not supported.
|
||||
"""
|
||||
if isinstance(transport, SmallWebRTCTransport):
|
||||
return client.pc_id
|
||||
elif isinstance(transport, DailyTransport):
|
||||
@@ -46,6 +63,13 @@ def get_transport_client_id(transport: BaseTransport, client: Any) -> str:
|
||||
async def maybe_capture_participant_camera(
|
||||
transport: BaseTransport, client: Any, framerate: int = 0
|
||||
):
|
||||
"""Capture participant camera video if transport supports it.
|
||||
|
||||
Args:
|
||||
transport: The transport instance.
|
||||
client: Transport-specific client object.
|
||||
framerate: Video capture framerate. Defaults to 0 (auto).
|
||||
"""
|
||||
if isinstance(transport, DailyTransport):
|
||||
await transport.capture_participant_video(
|
||||
client["id"], framerate=framerate, video_source="camera"
|
||||
@@ -55,17 +79,84 @@ async def maybe_capture_participant_camera(
|
||||
async def maybe_capture_participant_screen(
|
||||
transport: BaseTransport, client: Any, framerate: int = 0
|
||||
):
|
||||
"""Capture participant screen video if transport supports it.
|
||||
|
||||
Args:
|
||||
transport: The transport instance.
|
||||
client: Transport-specific client object.
|
||||
framerate: Video capture framerate. Defaults to 0 (auto).
|
||||
"""
|
||||
if isinstance(transport, DailyTransport):
|
||||
await transport.capture_participant_video(
|
||||
client["id"], framerate=framerate, video_source="screenVideo"
|
||||
)
|
||||
|
||||
|
||||
def smallwebrtc_sdp_cleanup_ice_candidates(text: str, pattern: str) -> str:
|
||||
"""Clean up ICE candidates in SDP text for SmallWebRTC.
|
||||
|
||||
Args:
|
||||
text: SDP text to clean up.
|
||||
pattern: Pattern to match for candidate filtering.
|
||||
|
||||
Returns:
|
||||
Cleaned SDP text with filtered ICE candidates.
|
||||
"""
|
||||
result = []
|
||||
lines = text.splitlines()
|
||||
for line in lines:
|
||||
if re.search("a=candidate", line):
|
||||
if re.search(pattern, line) and not re.search("raddr", line):
|
||||
result.append(line)
|
||||
else:
|
||||
result.append(line)
|
||||
return "\r\n".join(result)
|
||||
|
||||
|
||||
def smallwebrtc_sdp_cleanup_fingerprints(text: str) -> str:
|
||||
"""Remove unsupported fingerprint algorithms from SDP text.
|
||||
|
||||
Args:
|
||||
text: SDP text to clean up.
|
||||
|
||||
Returns:
|
||||
SDP text with sha-384 and sha-512 fingerprints removed.
|
||||
"""
|
||||
result = []
|
||||
lines = text.splitlines()
|
||||
for line in lines:
|
||||
if not re.search("sha-384", line) and not re.search("sha-512", line):
|
||||
result.append(line)
|
||||
return "\r\n".join(result)
|
||||
|
||||
|
||||
def smallwebrtc_sdp_munging(sdp: str, host: str) -> str:
|
||||
"""Apply SDP modifications for SmallWebRTC compatibility.
|
||||
|
||||
Args:
|
||||
sdp: Original SDP string.
|
||||
host: Host address for ICE candidate filtering.
|
||||
|
||||
Returns:
|
||||
Modified SDP string with fingerprint and ICE candidate cleanup.
|
||||
"""
|
||||
sdp = smallwebrtc_sdp_cleanup_fingerprints(sdp)
|
||||
sdp = smallwebrtc_sdp_cleanup_ice_candidates(sdp, host)
|
||||
return sdp
|
||||
|
||||
|
||||
def run_example_daily(
|
||||
run_example: Callable,
|
||||
args: argparse.Namespace,
|
||||
transport_params: Mapping[str, Callable] = {},
|
||||
):
|
||||
"""Run example using Daily.co transport.
|
||||
|
||||
Args:
|
||||
run_example: The example function to run.
|
||||
args: Parsed command-line arguments.
|
||||
transport_params: Mapping of transport names to parameter factory functions.
|
||||
"""
|
||||
logger.info("Running example with DailyTransport...")
|
||||
|
||||
from pipecat.examples.daily_runner import configure
|
||||
@@ -87,6 +178,13 @@ def run_example_webrtc(
|
||||
args: argparse.Namespace,
|
||||
transport_params: Mapping[str, Callable] = {},
|
||||
):
|
||||
"""Run example using WebRTC transport with FastAPI server.
|
||||
|
||||
Args:
|
||||
run_example: The example function to run.
|
||||
args: Parsed command-line arguments.
|
||||
transport_params: Mapping of transport names to parameter factory functions.
|
||||
"""
|
||||
logger.info("Running example with SmallWebRTCTransport...")
|
||||
|
||||
from pipecat_ai_small_webrtc_prebuilt.frontend import SmallWebRTCPrebuiltUI
|
||||
@@ -96,21 +194,25 @@ def run_example_webrtc(
|
||||
# Store connections by pc_id
|
||||
pcs_map: Dict[str, SmallWebRTCConnection] = {}
|
||||
|
||||
ice_servers = [
|
||||
IceServer(
|
||||
urls="stun:stun.l.google.com:19302",
|
||||
)
|
||||
]
|
||||
|
||||
# Mount the frontend at /
|
||||
app.mount("/client", SmallWebRTCPrebuiltUI)
|
||||
|
||||
@app.get("/", include_in_schema=False)
|
||||
async def root_redirect():
|
||||
"""Redirect root requests to client interface."""
|
||||
return RedirectResponse(url="/client/")
|
||||
|
||||
@app.post("/api/offer")
|
||||
async def offer(request: dict, background_tasks: BackgroundTasks):
|
||||
"""Handle WebRTC offer requests and manage peer connections.
|
||||
|
||||
Args:
|
||||
request: WebRTC offer request containing SDP and connection details.
|
||||
background_tasks: FastAPI background tasks for running examples.
|
||||
|
||||
Returns:
|
||||
WebRTC answer with connection details.
|
||||
"""
|
||||
pc_id = request.get("pc_id")
|
||||
|
||||
if pc_id and pc_id in pcs_map:
|
||||
@@ -122,11 +224,16 @@ def run_example_webrtc(
|
||||
restart_pc=request.get("restart_pc", False),
|
||||
)
|
||||
else:
|
||||
pipecat_connection = SmallWebRTCConnection(ice_servers)
|
||||
pipecat_connection = SmallWebRTCConnection()
|
||||
await pipecat_connection.initialize(sdp=request["sdp"], type=request["type"])
|
||||
|
||||
@pipecat_connection.event_handler("closed")
|
||||
async def handle_disconnected(webrtc_connection: SmallWebRTCConnection):
|
||||
"""Handle WebRTC connection closure and cleanup.
|
||||
|
||||
Args:
|
||||
webrtc_connection: The closed WebRTC connection.
|
||||
"""
|
||||
logger.info(f"Discarding peer connection for pc_id: {webrtc_connection.pc_id}")
|
||||
pcs_map.pop(webrtc_connection.pc_id, None)
|
||||
|
||||
@@ -136,6 +243,10 @@ def run_example_webrtc(
|
||||
background_tasks.add_task(run_example, transport, args, False)
|
||||
|
||||
answer = pipecat_connection.get_answer()
|
||||
|
||||
if args.esp32 and args.host:
|
||||
answer["sdp"] = smallwebrtc_sdp_munging(answer["sdp"], args.host)
|
||||
|
||||
# Updating the peer connection inside the map
|
||||
pcs_map[answer["pc_id"]] = pipecat_connection
|
||||
|
||||
@@ -143,6 +254,14 @@ def run_example_webrtc(
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""Manage FastAPI application lifecycle and cleanup connections.
|
||||
|
||||
Args:
|
||||
app: The FastAPI application instance.
|
||||
|
||||
Yields:
|
||||
Control to the FastAPI application runtime.
|
||||
"""
|
||||
yield # Run app
|
||||
coros = [pc.disconnect() for pc in pcs_map.values()]
|
||||
await asyncio.gather(*coros)
|
||||
@@ -156,6 +275,13 @@ def run_example_twilio(
|
||||
args: argparse.Namespace,
|
||||
transport_params: Mapping[str, Callable] = {},
|
||||
):
|
||||
"""Run example using Twilio transport with FastAPI WebSocket server.
|
||||
|
||||
Args:
|
||||
run_example: The example function to run.
|
||||
args: Parsed command-line arguments.
|
||||
transport_params: Mapping of transport names to parameter factory functions.
|
||||
"""
|
||||
logger.info("Running example with FastAPIWebsocketTransport (Twilio)...")
|
||||
|
||||
app = FastAPI()
|
||||
@@ -170,6 +296,11 @@ def run_example_twilio(
|
||||
|
||||
@app.post("/")
|
||||
async def start_call():
|
||||
"""Handle Twilio webhook and return TwiML response.
|
||||
|
||||
Returns:
|
||||
TwiML XML response directing call to WebSocket stream.
|
||||
"""
|
||||
logger.debug("POST TwiML")
|
||||
|
||||
xml_content = f"""<?xml version="1.0" encoding="UTF-8"?>
|
||||
@@ -184,6 +315,11 @@ def run_example_twilio(
|
||||
|
||||
@app.websocket("/ws")
|
||||
async def websocket_endpoint(websocket: WebSocket):
|
||||
"""Handle Twilio WebSocket connections for voice streaming.
|
||||
|
||||
Args:
|
||||
websocket: The WebSocket connection from Twilio.
|
||||
"""
|
||||
await websocket.accept()
|
||||
|
||||
logger.debug("WebSocket connection accepted")
|
||||
@@ -216,6 +352,13 @@ def run_main(
|
||||
args: argparse.Namespace,
|
||||
transport_params: Mapping[str, Callable] = {},
|
||||
):
|
||||
"""Run the example with the specified transport type.
|
||||
|
||||
Args:
|
||||
run_example: The example function to run.
|
||||
args: Parsed command-line arguments.
|
||||
transport_params: Mapping of transport names to parameter factory functions.
|
||||
"""
|
||||
if args.transport not in transport_params:
|
||||
logger.error(f"Transport '{args.transport}' not supported by this example")
|
||||
return
|
||||
@@ -235,6 +378,13 @@ def main(
|
||||
parser: Optional[argparse.ArgumentParser] = None,
|
||||
transport_params: Mapping[str, Callable] = {},
|
||||
):
|
||||
"""Main entry point for running Pipecat examples with transport selection.
|
||||
|
||||
Args:
|
||||
run_example: The example function to run.
|
||||
parser: Optional argument parser. If None, creates a default one.
|
||||
transport_params: Mapping of transport names to parameter factory functions.
|
||||
"""
|
||||
if not parser:
|
||||
parser = argparse.ArgumentParser(description="Pipecat Bot Runner")
|
||||
parser.add_argument(
|
||||
@@ -254,9 +404,16 @@ def main(
|
||||
parser.add_argument(
|
||||
"--proxy", "-x", help="A public proxy host name (no protocol, e.g. proxy.example.com)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--esp32", action="store_true", default=False, help="Perform SDP munging for the ESP32"
|
||||
)
|
||||
parser.add_argument("--verbose", "-v", action="count", default=0)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.esp32 and args.host == "localhost":
|
||||
logger.error("For ESP32, you need to specify `--host IP` so we can do SDP munging.")
|
||||
return
|
||||
|
||||
# Log level
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="TRACE" if args.verbose else "DEBUG")
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,39 +1,102 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Metrics data models for Pipecat framework.
|
||||
|
||||
This module defines Pydantic models for various types of metrics data
|
||||
collected throughout the pipeline, including timing, token usage, and
|
||||
processing statistics.
|
||||
"""
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class MetricsData(BaseModel):
|
||||
"""Base class for all metrics data.
|
||||
|
||||
Parameters:
|
||||
processor: Name of the processor generating the metrics.
|
||||
model: Optional model name associated with the metrics.
|
||||
"""
|
||||
|
||||
processor: str
|
||||
model: Optional[str] = None
|
||||
|
||||
|
||||
class TTFBMetricsData(MetricsData):
|
||||
"""Time To First Byte (TTFB) metrics data.
|
||||
|
||||
Parameters:
|
||||
value: TTFB measurement in seconds.
|
||||
"""
|
||||
|
||||
value: float
|
||||
|
||||
|
||||
class ProcessingMetricsData(MetricsData):
|
||||
"""General processing time metrics data.
|
||||
|
||||
Parameters:
|
||||
value: Processing time measurement in seconds.
|
||||
"""
|
||||
|
||||
value: float
|
||||
|
||||
|
||||
class LLMTokenUsage(BaseModel):
|
||||
"""Token usage statistics for LLM operations.
|
||||
|
||||
Parameters:
|
||||
prompt_tokens: Number of tokens in the input prompt.
|
||||
completion_tokens: Number of tokens in the generated completion.
|
||||
total_tokens: Total number of tokens used (prompt + completion).
|
||||
cache_read_input_tokens: Number of tokens read from cache, if applicable.
|
||||
cache_creation_input_tokens: Number of tokens used to create cache entries, if applicable.
|
||||
"""
|
||||
|
||||
prompt_tokens: int
|
||||
completion_tokens: int
|
||||
total_tokens: int
|
||||
cache_read_input_tokens: Optional[int] = None
|
||||
cache_creation_input_tokens: Optional[int] = None
|
||||
reasoning_tokens: Optional[int] = None
|
||||
|
||||
|
||||
class LLMUsageMetricsData(MetricsData):
|
||||
"""LLM token usage metrics data.
|
||||
|
||||
Parameters:
|
||||
value: Token usage statistics for the LLM operation.
|
||||
"""
|
||||
|
||||
value: LLMTokenUsage
|
||||
|
||||
|
||||
class TTSUsageMetricsData(MetricsData):
|
||||
"""Text-to-Speech usage metrics data.
|
||||
|
||||
Parameters:
|
||||
value: Number of characters processed by TTS.
|
||||
"""
|
||||
|
||||
value: int
|
||||
|
||||
|
||||
class SmartTurnMetricsData(MetricsData):
|
||||
"""Metrics data for smart turn predictions."""
|
||||
"""Metrics data for smart turn predictions.
|
||||
|
||||
Parameters:
|
||||
is_complete: Whether the turn is predicted to be complete.
|
||||
probability: Confidence probability of the turn completion prediction.
|
||||
inference_time_ms: Time taken for inference in milliseconds.
|
||||
server_total_time_ms: Total server processing time in milliseconds.
|
||||
e2e_processing_time_ms: End-to-end processing time in milliseconds.
|
||||
"""
|
||||
|
||||
is_complete: bool
|
||||
probability: float
|
||||
|
||||
@@ -4,6 +4,13 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Base observer classes for monitoring frame flow in the Pipecat pipeline.
|
||||
|
||||
This module provides the foundation for observing frame transfers between
|
||||
processors without modifying the pipeline structure. Observers can be used
|
||||
for logging, debugging, analytics, and monitoring pipeline behavior.
|
||||
"""
|
||||
|
||||
from abc import abstractmethod
|
||||
from dataclasses import dataclass
|
||||
|
||||
@@ -18,19 +25,19 @@ if TYPE_CHECKING:
|
||||
|
||||
@dataclass
|
||||
class FramePushed:
|
||||
"""Represents an event where a frame is pushed from one processor to another
|
||||
within the pipeline.
|
||||
"""Event data for frame transfers between processors in the pipeline.
|
||||
|
||||
This data structure is typically used by observers to track the flow of
|
||||
frames through the pipeline for logging, debugging, or analytics purposes.
|
||||
|
||||
Attributes:
|
||||
source (FrameProcessor): The processor sending the frame.
|
||||
destination (FrameProcessor): The processor receiving the frame.
|
||||
frame (Frame): The frame being transferred.
|
||||
direction (FrameDirection): The direction of the transfer (e.g., downstream or upstream).
|
||||
timestamp (int): The time when the frame was pushed, based on the pipeline clock.
|
||||
Represents an event where a frame is pushed from one processor to another
|
||||
within the pipeline. This data structure is typically used by observers
|
||||
to track the flow of frames through the pipeline for logging, debugging,
|
||||
or analytics purposes.
|
||||
|
||||
Parameters:
|
||||
source: The processor sending the frame.
|
||||
destination: The processor receiving the frame.
|
||||
frame: The frame being transferred.
|
||||
direction: The direction of the transfer (e.g., downstream or upstream).
|
||||
timestamp: The time when the frame was pushed, based on the pipeline clock.
|
||||
"""
|
||||
|
||||
source: "FrameProcessor"
|
||||
@@ -41,11 +48,12 @@ class FramePushed:
|
||||
|
||||
|
||||
class BaseObserver(BaseObject):
|
||||
"""This is the base class for pipeline frame observers. Observers can view
|
||||
all the frames that go through the pipeline without the need to inject
|
||||
processors in the pipeline. This can be useful, for example, to implement
|
||||
frame loggers or debuggers among other things.
|
||||
"""Base class for pipeline frame observers.
|
||||
|
||||
Observers can view all frames that flow through the pipeline without
|
||||
needing to inject processors into the pipeline structure. This enables
|
||||
non-intrusive monitoring capabilities such as frame logging, debugging,
|
||||
performance analysis, and analytics collection.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
@@ -57,7 +65,6 @@ class BaseObserver(BaseObject):
|
||||
transferred through the pipeline.
|
||||
|
||||
Args:
|
||||
data (FramePushed): The event data containing details about the frame transfer.
|
||||
|
||||
data: The event data containing details about the frame transfer.
|
||||
"""
|
||||
pass
|
||||
|
||||
@@ -4,6 +4,13 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Debug logging observer for frame activity monitoring.
|
||||
|
||||
This module provides a debug observer that logs detailed frame activity
|
||||
to the console, making it useful for debugging pipeline behavior and
|
||||
understanding frame flow between processors.
|
||||
"""
|
||||
|
||||
from dataclasses import fields, is_dataclass
|
||||
from enum import Enum, auto
|
||||
from typing import Dict, Optional, Set, Tuple, Type, Union
|
||||
@@ -16,7 +23,12 @@ from pipecat.processors.frame_processor import FrameDirection
|
||||
|
||||
|
||||
class FrameEndpoint(Enum):
|
||||
"""Specifies which endpoint (source or destination) to filter on."""
|
||||
"""Specifies which endpoint (source or destination) to filter on.
|
||||
|
||||
Parameters:
|
||||
SOURCE: Filter on the source component that is pushing the frame.
|
||||
DESTINATION: Filter on the destination component receiving the frame.
|
||||
"""
|
||||
|
||||
SOURCE = auto()
|
||||
DESTINATION = auto()
|
||||
@@ -28,44 +40,37 @@ class DebugLogObserver(BaseObserver):
|
||||
Automatically extracts and formats data from any frame type, making it useful
|
||||
for debugging pipeline behavior without needing frame-specific observers.
|
||||
|
||||
Args:
|
||||
frame_types: Optional tuple of frame types to log, or a dict with frame type
|
||||
filters. If None, logs all frame types.
|
||||
exclude_fields: Optional set of field names to exclude from logging.
|
||||
|
||||
Examples:
|
||||
Log all frames from all services:
|
||||
```python
|
||||
observers = DebugLogObserver()
|
||||
```
|
||||
Log all frames from all services::
|
||||
|
||||
Log specific frame types from any source/destination:
|
||||
```python
|
||||
from pipecat.frames.frames import TranscriptionFrame, InterimTranscriptionFrame
|
||||
observers=[
|
||||
DebugLogObserver(frame_types=(LLMTextFrame,TranscriptionFrame,)),
|
||||
],
|
||||
```
|
||||
observers = DebugLogObserver()
|
||||
|
||||
Log frames with specific source/destination filters:
|
||||
```python
|
||||
from pipecat.frames.frames import StartInterruptionFrame, UserStartedSpeakingFrame, LLMTextFrame
|
||||
from pipecat.transports.base_output_transport import BaseOutputTransport
|
||||
from pipecat.services.stt_service import STTService
|
||||
Log specific frame types from any source/destination::
|
||||
|
||||
observers=[
|
||||
DebugLogObserver(
|
||||
frame_types={
|
||||
# Only log StartInterruptionFrame when source is BaseOutputTransport
|
||||
StartInterruptionFrame: (BaseOutputTransport, FrameEndpoint.SOURCE),
|
||||
# Only log UserStartedSpeakingFrame when destination is STTService
|
||||
UserStartedSpeakingFrame: (STTService, FrameEndpoint.DESTINATION),
|
||||
# Log LLMTextFrame regardless of source or destination type
|
||||
LLMTextFrame: None,
|
||||
}
|
||||
),
|
||||
],
|
||||
```
|
||||
from pipecat.frames.frames import LLMTextFrame, TranscriptionFrame
|
||||
observers=[
|
||||
DebugLogObserver(frame_types=(LLMTextFrame,TranscriptionFrame,)),
|
||||
]
|
||||
|
||||
Log frames with specific source/destination filters::
|
||||
|
||||
from pipecat.frames.frames import StartInterruptionFrame, UserStartedSpeakingFrame, LLMTextFrame
|
||||
from pipecat.observers.loggers.debug_log_observer import DebugLogObserver, FrameEndpoint
|
||||
from pipecat.transports.base_output import BaseOutputTransport
|
||||
from pipecat.services.stt_service import STTService
|
||||
|
||||
observers=[
|
||||
DebugLogObserver(
|
||||
frame_types={
|
||||
# Only log StartInterruptionFrame when source is BaseOutputTransport
|
||||
StartInterruptionFrame: (BaseOutputTransport, FrameEndpoint.SOURCE),
|
||||
# Only log UserStartedSpeakingFrame when destination is STTService
|
||||
UserStartedSpeakingFrame: (STTService, FrameEndpoint.DESTINATION),
|
||||
# Log LLMTextFrame regardless of source or destination type
|
||||
LLMTextFrame: None,
|
||||
}
|
||||
),
|
||||
]
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@@ -79,14 +84,17 @@ class DebugLogObserver(BaseObserver):
|
||||
"""Initialize the debug log observer.
|
||||
|
||||
Args:
|
||||
frame_types: Tuple of frame types to log, or a dict mapping frame types to
|
||||
filter configurations. Filter configs can be:
|
||||
- None to log all instances of the frame type
|
||||
- A tuple of (service_type, endpoint) to filter on a specific service
|
||||
and endpoint (SOURCE or DESTINATION)
|
||||
If None is provided instead of a tuple/dict, log all frames.
|
||||
exclude_fields: Set of field names to exclude from logging. If None, only binary
|
||||
data fields are excluded.
|
||||
frame_types: Frame types to log. Can be:
|
||||
|
||||
- Tuple of frame types to log all instances
|
||||
- Dict mapping frame types to filter configurations
|
||||
- None to log all frames
|
||||
|
||||
Filter configurations can be None (log all instances) or a tuple
|
||||
of (service_type, endpoint) to filter on specific services.
|
||||
exclude_fields: Field names to exclude from logging. Defaults to
|
||||
excluding binary data fields like 'audio', 'image', 'images'.
|
||||
**kwargs: Additional arguments passed to parent class.
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
|
||||
@@ -113,14 +121,7 @@ class DebugLogObserver(BaseObserver):
|
||||
)
|
||||
|
||||
def _format_value(self, value):
|
||||
"""Format a value for logging.
|
||||
|
||||
Args:
|
||||
value: The value to format.
|
||||
|
||||
Returns:
|
||||
str: A string representation of the value suitable for logging.
|
||||
"""
|
||||
"""Format a value for logging."""
|
||||
if value is None:
|
||||
return "None"
|
||||
elif isinstance(value, str):
|
||||
@@ -143,16 +144,7 @@ class DebugLogObserver(BaseObserver):
|
||||
return str(value)
|
||||
|
||||
def _should_log_frame(self, frame, src, dst):
|
||||
"""Determine if a frame should be logged based on filters.
|
||||
|
||||
Args:
|
||||
frame: The frame being processed
|
||||
src: The source component
|
||||
dst: The destination component
|
||||
|
||||
Returns:
|
||||
bool: True if the frame should be logged, False otherwise
|
||||
"""
|
||||
"""Determine if a frame should be logged based on filters."""
|
||||
# If no filters, log all frames
|
||||
if not self.frame_filters:
|
||||
return True
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""LLM logging observer for Pipecat."""
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.frames.frames import (
|
||||
@@ -34,10 +36,15 @@ class LLMLogObserver(BaseObserver):
|
||||
|
||||
This allows you to track when the LLM starts responding, what it generates,
|
||||
and when it finishes.
|
||||
|
||||
"""
|
||||
|
||||
async def on_push_frame(self, data: FramePushed):
|
||||
"""Handle frame push events and log LLM-related activities.
|
||||
|
||||
Args:
|
||||
data: The frame push event data containing source, destination,
|
||||
frame, direction, and timestamp information.
|
||||
"""
|
||||
src = data.source
|
||||
dst = data.destination
|
||||
frame = data.frame
|
||||
|
||||
@@ -4,6 +4,12 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Transcription logging observer for Pipecat.
|
||||
|
||||
This module provides an observer that logs transcription frames to the console,
|
||||
allowing developers to monitor speech-to-text activity in real-time.
|
||||
"""
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.frames.frames import (
|
||||
@@ -17,17 +23,23 @@ from pipecat.services.stt_service import STTService
|
||||
class TranscriptionLogObserver(BaseObserver):
|
||||
"""Observer to log transcription activity to the console.
|
||||
|
||||
Logs all frame instances (only from STT service) of:
|
||||
|
||||
- TranscriptionFrame
|
||||
- InterimTranscriptionFrame
|
||||
|
||||
This allows you to track when the LLM starts responding, what it generates,
|
||||
and when it finishes.
|
||||
Monitors and logs all transcription frames from STT services, including
|
||||
both final transcriptions and interim results. This allows developers
|
||||
to track speech recognition activity and debug transcription issues.
|
||||
|
||||
Only processes frames from STTService instances to avoid logging
|
||||
unrelated transcription frames from other sources.
|
||||
"""
|
||||
|
||||
async def on_push_frame(self, data: FramePushed):
|
||||
"""Handle frame push events and log transcription frames.
|
||||
|
||||
Logs TranscriptionFrame and InterimTranscriptionFrame instances
|
||||
with timestamps and user information for debugging purposes.
|
||||
|
||||
Args:
|
||||
data: Frame push event data containing source, frame, and timestamp.
|
||||
"""
|
||||
src = data.source
|
||||
frame = data.frame
|
||||
timestamp = data.timestamp
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Observer for measuring user-to-bot response latency."""
|
||||
|
||||
import time
|
||||
|
||||
from loguru import logger
|
||||
@@ -18,19 +20,28 @@ from pipecat.processors.frame_processor import FrameDirection
|
||||
|
||||
|
||||
class UserBotLatencyLogObserver(BaseObserver):
|
||||
"""Observer that logs the latency between when the user stops speaking and
|
||||
when the bot starts speaking.
|
||||
|
||||
This helps measure how quickly the AI services respond.
|
||||
"""Observer that measures time between user stopping speech and bot starting speech.
|
||||
|
||||
This helps measure how quickly the AI services respond by tracking
|
||||
conversation turn timing and logging latency metrics.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the latency observer.
|
||||
|
||||
Sets up tracking for processed frames and user speech timing
|
||||
to calculate response latencies.
|
||||
"""
|
||||
super().__init__()
|
||||
self._processed_frames = set()
|
||||
self._user_stopped_time = 0
|
||||
|
||||
async def on_push_frame(self, data: FramePushed):
|
||||
"""Process frames to track speech timing and calculate latency.
|
||||
|
||||
Args:
|
||||
data: Frame push event containing the frame and direction information.
|
||||
"""
|
||||
# Only process downstream frames
|
||||
if data.direction != FrameDirection.DOWNSTREAM:
|
||||
return
|
||||
|
||||
@@ -4,6 +4,12 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Turn tracking observer for conversation flow monitoring.
|
||||
|
||||
This module provides an observer that monitors conversation turns in a pipeline,
|
||||
tracking when turns start and end based on user and bot speech patterns.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from collections import deque
|
||||
|
||||
@@ -23,15 +29,30 @@ from pipecat.observers.base_observer import BaseObserver, FramePushed
|
||||
class TurnTrackingObserver(BaseObserver):
|
||||
"""Observer that tracks conversation turns in a pipeline.
|
||||
|
||||
This observer monitors the flow of conversation by tracking when turns
|
||||
start and end based on user and bot speaking patterns. It handles
|
||||
interruptions, timeouts, and maintains turn state throughout the pipeline.
|
||||
|
||||
Turn tracking logic:
|
||||
|
||||
- The first turn starts immediately when the pipeline starts (StartFrame)
|
||||
- Subsequent turns start when the user starts speaking
|
||||
- A turn ends when the bot stops speaking and either:
|
||||
|
||||
- The user starts speaking again
|
||||
- A timeout period elapses with no more bot speech
|
||||
"""
|
||||
|
||||
def __init__(self, max_frames=100, turn_end_timeout_secs=2.5, **kwargs):
|
||||
"""Initialize the turn tracking observer.
|
||||
|
||||
Args:
|
||||
max_frames: Maximum number of frame IDs to keep in history for
|
||||
duplicate detection. Defaults to 100.
|
||||
turn_end_timeout_secs: Timeout in seconds after bot stops speaking
|
||||
before automatically ending the turn. Defaults to 2.5.
|
||||
**kwargs: Additional arguments passed to the parent observer.
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
self._turn_count = 0
|
||||
self._is_turn_active = False
|
||||
@@ -49,7 +70,11 @@ class TurnTrackingObserver(BaseObserver):
|
||||
self._register_event_handler("on_turn_ended")
|
||||
|
||||
async def on_push_frame(self, data: FramePushed):
|
||||
"""Process frame events for turn tracking."""
|
||||
"""Process frame events for turn tracking.
|
||||
|
||||
Args:
|
||||
data: Frame push event data containing the frame and metadata.
|
||||
"""
|
||||
# Skip already processed frames
|
||||
if data.frame.id in self._processed_frames:
|
||||
return
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Base pipeline implementation for frame processing."""
|
||||
|
||||
from abc import abstractmethod
|
||||
from typing import List
|
||||
|
||||
@@ -11,9 +13,24 @@ from pipecat.processors.frame_processor import FrameProcessor
|
||||
|
||||
|
||||
class BasePipeline(FrameProcessor):
|
||||
"""Base class for all pipeline implementations.
|
||||
|
||||
Provides the foundation for pipeline processors that need to support
|
||||
metrics collection from their contained processors.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the base pipeline."""
|
||||
super().__init__()
|
||||
|
||||
@abstractmethod
|
||||
def processors_with_metrics(self) -> List[FrameProcessor]:
|
||||
"""Return processors that can generate metrics.
|
||||
|
||||
Implementing classes should collect and return all processors within
|
||||
their pipeline that support metrics generation.
|
||||
|
||||
Returns:
|
||||
List of frame processors that support metrics collection.
|
||||
"""
|
||||
pass
|
||||
|
||||
@@ -4,6 +4,12 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Base pipeline task implementation for managing pipeline execution.
|
||||
|
||||
This module provides the abstract base class and configuration for pipeline
|
||||
tasks that manage the lifecycle and execution of frame processing pipelines.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from abc import abstractmethod
|
||||
from dataclasses import dataclass
|
||||
@@ -15,44 +21,81 @@ from pipecat.utils.base_object import BaseObject
|
||||
|
||||
@dataclass
|
||||
class PipelineTaskParams:
|
||||
"""Specific configuration for the pipeline task."""
|
||||
"""Configuration parameters for pipeline task execution.
|
||||
|
||||
Parameters:
|
||||
loop: The asyncio event loop to use for task execution.
|
||||
"""
|
||||
|
||||
loop: asyncio.AbstractEventLoop
|
||||
|
||||
|
||||
class BasePipelineTask(BaseObject):
|
||||
"""Abstract base class for pipeline task implementations.
|
||||
|
||||
Defines the interface for managing pipeline execution lifecycle,
|
||||
including starting, stopping, and frame queuing operations.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def has_finished(self) -> bool:
|
||||
"""Indicates whether the tasks has finished. That is, all processors
|
||||
have stopped.
|
||||
"""Check if the pipeline task has finished execution.
|
||||
|
||||
Returns:
|
||||
True if all processors have stopped and the task is complete.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def stop_when_done(self):
|
||||
"""This is a helper function that sends an EndFrame to the pipeline in
|
||||
order to stop the task after everything in it has been processed.
|
||||
"""Schedule the pipeline to stop after processing all queued frames.
|
||||
|
||||
Implementing classes should send an EndFrame or equivalent signal to
|
||||
gracefully terminate the pipeline once all current processing is complete.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def cancel(self):
|
||||
"""Stops the running pipeline immediately."""
|
||||
"""Immediately stop the running pipeline.
|
||||
|
||||
Implementing classes should cancel all running tasks and stop frame
|
||||
processing without waiting for completion.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def run(self, params: PipelineTaskParams):
|
||||
"""Starts running the given pipeline."""
|
||||
"""Start and run the pipeline with the given parameters.
|
||||
|
||||
Implementing classes should initialize and execute the pipeline using
|
||||
the provided configuration parameters.
|
||||
|
||||
Args:
|
||||
params: Configuration parameters for pipeline execution.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def queue_frame(self, frame: Frame):
|
||||
"""Queue a frame to be pushed down the pipeline."""
|
||||
"""Queue a single frame for processing by the pipeline.
|
||||
|
||||
Implementing classes should add the frame to their processing queue
|
||||
for downstream handling.
|
||||
|
||||
Args:
|
||||
frame: The frame to be processed.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def queue_frames(self, frames: Iterable[Frame] | AsyncIterable[Frame]):
|
||||
"""Queues multiple frames to be pushed down the pipeline."""
|
||||
"""Queue multiple frames for processing by the pipeline.
|
||||
|
||||
Implementing classes should process the iterable/async iterable and
|
||||
add all frames to their processing queue.
|
||||
|
||||
Args:
|
||||
frames: An iterable or async iterable of frames to be processed.
|
||||
"""
|
||||
pass
|
||||
|
||||
@@ -4,6 +4,13 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Parallel pipeline implementation for concurrent frame processing.
|
||||
|
||||
This module provides a parallel pipeline that processes frames through multiple
|
||||
sub-pipelines concurrently, with coordination for system frames and proper
|
||||
handling of pipeline lifecycle events.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from itertools import chain
|
||||
from typing import Awaitable, Callable, Dict, List
|
||||
@@ -25,16 +32,34 @@ from pipecat.utils.asyncio.watchdog_queue import WatchdogQueue
|
||||
|
||||
|
||||
class ParallelPipelineSource(FrameProcessor):
|
||||
"""Source processor for parallel pipeline branches.
|
||||
|
||||
Handles frame routing for parallel pipeline inputs, directing system frames
|
||||
to the parent push function and other upstream frames to a queue for processing.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
upstream_queue: asyncio.Queue,
|
||||
push_frame_func: Callable[[Frame, FrameDirection], Awaitable[None]],
|
||||
):
|
||||
"""Initialize the parallel pipeline source.
|
||||
|
||||
Args:
|
||||
upstream_queue: Queue for collecting upstream frames from this branch.
|
||||
push_frame_func: Function to push frames to the parent parallel pipeline.
|
||||
"""
|
||||
super().__init__()
|
||||
self._up_queue = upstream_queue
|
||||
self._push_frame_func = push_frame_func
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
"""Process frames with special handling for system frames.
|
||||
|
||||
Args:
|
||||
frame: The frame to process.
|
||||
direction: The direction of frame flow.
|
||||
"""
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
match direction:
|
||||
@@ -48,16 +73,34 @@ class ParallelPipelineSource(FrameProcessor):
|
||||
|
||||
|
||||
class ParallelPipelineSink(FrameProcessor):
|
||||
"""Sink processor for parallel pipeline branches.
|
||||
|
||||
Handles frame routing for parallel pipeline outputs, directing system frames
|
||||
to the parent push function and other downstream frames to a queue for coordination.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
downstream_queue: asyncio.Queue,
|
||||
push_frame_func: Callable[[Frame, FrameDirection], Awaitable[None]],
|
||||
):
|
||||
"""Initialize the parallel pipeline sink.
|
||||
|
||||
Args:
|
||||
downstream_queue: Queue for collecting downstream frames from this branch.
|
||||
push_frame_func: Function to push frames to the parent parallel pipeline.
|
||||
"""
|
||||
super().__init__()
|
||||
self._down_queue = downstream_queue
|
||||
self._push_frame_func = push_frame_func
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
"""Process frames with special handling for system frames.
|
||||
|
||||
Args:
|
||||
frame: The frame to process.
|
||||
direction: The direction of frame flow.
|
||||
"""
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
match direction:
|
||||
@@ -71,7 +114,24 @@ class ParallelPipelineSink(FrameProcessor):
|
||||
|
||||
|
||||
class ParallelPipeline(BasePipeline):
|
||||
"""Pipeline that processes frames through multiple sub-pipelines concurrently.
|
||||
|
||||
Creates multiple parallel processing branches from the provided processor lists,
|
||||
coordinating frame flow and ensuring proper synchronization of lifecycle events
|
||||
like EndFrames. Each branch runs independently while system frames are handled
|
||||
specially to maintain pipeline coordination.
|
||||
"""
|
||||
|
||||
def __init__(self, *args):
|
||||
"""Initialize the parallel pipeline with processor lists.
|
||||
|
||||
Args:
|
||||
*args: Variable number of processor lists, each becoming a parallel branch.
|
||||
|
||||
Raises:
|
||||
Exception: If no processor lists are provided.
|
||||
TypeError: If any argument is not a list of processors.
|
||||
"""
|
||||
super().__init__()
|
||||
|
||||
if len(args) == 0:
|
||||
@@ -93,6 +153,11 @@ class ParallelPipeline(BasePipeline):
|
||||
#
|
||||
|
||||
def processors_with_metrics(self) -> List[FrameProcessor]:
|
||||
"""Collect processors that can generate metrics from all parallel branches.
|
||||
|
||||
Returns:
|
||||
List of frame processors that support metrics collection from all branches.
|
||||
"""
|
||||
return list(chain.from_iterable(p.processors_with_metrics() for p in self._pipelines))
|
||||
|
||||
#
|
||||
@@ -100,6 +165,14 @@ class ParallelPipeline(BasePipeline):
|
||||
#
|
||||
|
||||
async def setup(self, setup: FrameProcessorSetup):
|
||||
"""Set up the parallel pipeline and all its branches.
|
||||
|
||||
Args:
|
||||
setup: Configuration for frame processor setup.
|
||||
|
||||
Raises:
|
||||
TypeError: If any processor list argument is not actually a list.
|
||||
"""
|
||||
await super().setup(setup)
|
||||
|
||||
self._up_queue = WatchdogQueue(setup.task_manager)
|
||||
@@ -129,12 +202,19 @@ class ParallelPipeline(BasePipeline):
|
||||
await asyncio.gather(*[s.setup(setup) for s in self._sinks])
|
||||
|
||||
async def cleanup(self):
|
||||
"""Clean up the parallel pipeline and all its branches."""
|
||||
await super().cleanup()
|
||||
await asyncio.gather(*[s.cleanup() for s in self._sources])
|
||||
await asyncio.gather(*[p.cleanup() for p in self._pipelines])
|
||||
await asyncio.gather(*[s.cleanup() for s in self._sinks])
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
"""Process frames through all parallel branches with lifecycle coordination.
|
||||
|
||||
Args:
|
||||
frame: The frame to process.
|
||||
direction: The direction of frame flow.
|
||||
"""
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, StartFrame):
|
||||
@@ -159,9 +239,11 @@ class ParallelPipeline(BasePipeline):
|
||||
await self._stop()
|
||||
|
||||
async def _start(self, frame: StartFrame):
|
||||
"""Start the parallel pipeline processing tasks."""
|
||||
await self._create_tasks()
|
||||
|
||||
async def _stop(self):
|
||||
"""Stop all parallel pipeline processing tasks."""
|
||||
if self._up_task:
|
||||
# The up task doesn't receive an EndFrame, so we just cancel it.
|
||||
await self.cancel_task(self._up_task)
|
||||
@@ -174,42 +256,55 @@ class ParallelPipeline(BasePipeline):
|
||||
self._down_task = None
|
||||
|
||||
async def _cancel(self):
|
||||
"""Cancel all parallel pipeline processing tasks."""
|
||||
if self._up_task:
|
||||
self._up_queue.cancel()
|
||||
await self.cancel_task(self._up_task)
|
||||
self._up_task = None
|
||||
if self._down_task:
|
||||
self._down_queue.cancel()
|
||||
await self.cancel_task(self._down_task)
|
||||
self._down_task = None
|
||||
|
||||
async def _create_tasks(self):
|
||||
"""Create upstream and downstream processing tasks if not already running."""
|
||||
if not self._up_task:
|
||||
self._up_task = self.create_task(self._process_up_queue())
|
||||
if not self._down_task:
|
||||
self._down_task = self.create_task(self._process_down_queue())
|
||||
|
||||
async def _drain_queues(self):
|
||||
"""Drain all frames from upstream and downstream queues."""
|
||||
while not self._up_queue.empty:
|
||||
await self._up_queue.get()
|
||||
while not self._down_queue.empty:
|
||||
await self._down_queue.get()
|
||||
|
||||
async def _handle_interruption(self):
|
||||
"""Handle interruption by cancelling tasks, draining queues, and restarting."""
|
||||
await self._cancel()
|
||||
await self._drain_queues()
|
||||
await self._create_tasks()
|
||||
|
||||
async def _parallel_push_frame(self, frame: Frame, direction: FrameDirection):
|
||||
"""Push frames while avoiding duplicates using frame ID tracking."""
|
||||
if frame.id not in self._seen_ids:
|
||||
self._seen_ids.add(frame.id)
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
async def _process_up_queue(self):
|
||||
"""Process upstream frames from all parallel branches."""
|
||||
while True:
|
||||
frame = await self._up_queue.get()
|
||||
await self._parallel_push_frame(frame, FrameDirection.UPSTREAM)
|
||||
self._up_queue.task_done()
|
||||
|
||||
async def _process_down_queue(self):
|
||||
"""Process downstream frames with EndFrame coordination.
|
||||
|
||||
Coordinates EndFrames to ensure they are only pushed upstream once
|
||||
all parallel branches have completed processing them.
|
||||
"""
|
||||
running = True
|
||||
while running:
|
||||
frame = await self._down_queue.get()
|
||||
|
||||
@@ -4,6 +4,13 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Pipeline implementation for connecting and managing frame processors.
|
||||
|
||||
This module provides the main Pipeline class that connects frame processors
|
||||
in sequence and manages frame flow between them, along with helper classes
|
||||
for pipeline source and sink operations.
|
||||
"""
|
||||
|
||||
from typing import Callable, Coroutine, List
|
||||
|
||||
from pipecat.frames.frames import Frame
|
||||
@@ -12,11 +19,29 @@ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor, F
|
||||
|
||||
|
||||
class PipelineSource(FrameProcessor):
|
||||
"""Source processor that forwards frames to an upstream handler.
|
||||
|
||||
This processor acts as the entry point for a pipeline, forwarding
|
||||
downstream frames to the next processor and upstream frames to a
|
||||
provided upstream handler function.
|
||||
"""
|
||||
|
||||
def __init__(self, upstream_push_frame: Callable[[Frame, FrameDirection], Coroutine]):
|
||||
"""Initialize the pipeline source.
|
||||
|
||||
Args:
|
||||
upstream_push_frame: Coroutine function to handle upstream frames.
|
||||
"""
|
||||
super().__init__()
|
||||
self._upstream_push_frame = upstream_push_frame
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
"""Process frames and route them based on direction.
|
||||
|
||||
Args:
|
||||
frame: The frame to process.
|
||||
direction: The direction of frame flow.
|
||||
"""
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
match direction:
|
||||
@@ -27,11 +52,29 @@ class PipelineSource(FrameProcessor):
|
||||
|
||||
|
||||
class PipelineSink(FrameProcessor):
|
||||
"""Sink processor that forwards frames to a downstream handler.
|
||||
|
||||
This processor acts as the exit point for a pipeline, forwarding
|
||||
upstream frames to the previous processor and downstream frames to a
|
||||
provided downstream handler function.
|
||||
"""
|
||||
|
||||
def __init__(self, downstream_push_frame: Callable[[Frame, FrameDirection], Coroutine]):
|
||||
"""Initialize the pipeline sink.
|
||||
|
||||
Args:
|
||||
downstream_push_frame: Coroutine function to handle downstream frames.
|
||||
"""
|
||||
super().__init__()
|
||||
self._downstream_push_frame = downstream_push_frame
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
"""Process frames and route them based on direction.
|
||||
|
||||
Args:
|
||||
frame: The frame to process.
|
||||
direction: The direction of frame flow.
|
||||
"""
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
match direction:
|
||||
@@ -42,7 +85,19 @@ class PipelineSink(FrameProcessor):
|
||||
|
||||
|
||||
class Pipeline(BasePipeline):
|
||||
"""Main pipeline implementation that connects frame processors in sequence.
|
||||
|
||||
Creates a linear chain of frame processors with automatic source and sink
|
||||
processors for external frame handling. Manages processor lifecycle and
|
||||
provides metrics collection from contained processors.
|
||||
"""
|
||||
|
||||
def __init__(self, processors: List[FrameProcessor]):
|
||||
"""Initialize the pipeline with a list of processors.
|
||||
|
||||
Args:
|
||||
processors: List of frame processors to connect in sequence.
|
||||
"""
|
||||
super().__init__()
|
||||
|
||||
# Add a source and a sink queue so we can forward frames upstream and
|
||||
@@ -58,6 +113,14 @@ class Pipeline(BasePipeline):
|
||||
#
|
||||
|
||||
def processors_with_metrics(self):
|
||||
"""Return processors that can generate metrics.
|
||||
|
||||
Recursively collects all processors that support metrics generation,
|
||||
including those from nested pipelines.
|
||||
|
||||
Returns:
|
||||
List of frame processors that can generate metrics.
|
||||
"""
|
||||
services = []
|
||||
for p in self._processors:
|
||||
if isinstance(p, BasePipeline):
|
||||
@@ -71,14 +134,26 @@ class Pipeline(BasePipeline):
|
||||
#
|
||||
|
||||
async def setup(self, setup: FrameProcessorSetup):
|
||||
"""Set up the pipeline and all contained processors.
|
||||
|
||||
Args:
|
||||
setup: Configuration for frame processor setup.
|
||||
"""
|
||||
await super().setup(setup)
|
||||
await self._setup_processors(setup)
|
||||
|
||||
async def cleanup(self):
|
||||
"""Clean up the pipeline and all contained processors."""
|
||||
await super().cleanup()
|
||||
await self._cleanup_processors()
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
"""Process frames by routing them through the pipeline.
|
||||
|
||||
Args:
|
||||
frame: The frame to process.
|
||||
direction: The direction of frame flow.
|
||||
"""
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if direction == FrameDirection.DOWNSTREAM:
|
||||
@@ -87,14 +162,17 @@ class Pipeline(BasePipeline):
|
||||
await self._sink.queue_frame(frame, FrameDirection.UPSTREAM)
|
||||
|
||||
async def _setup_processors(self, setup: FrameProcessorSetup):
|
||||
"""Set up all processors in the pipeline."""
|
||||
for p in self._processors:
|
||||
await p.setup(setup)
|
||||
|
||||
async def _cleanup_processors(self):
|
||||
"""Clean up all processors in the pipeline."""
|
||||
for p in self._processors:
|
||||
await p.cleanup()
|
||||
|
||||
def _link_processors(self):
|
||||
"""Link all processors in sequence and set their parent."""
|
||||
prev = self._processors[0]
|
||||
for curr in self._processors[1:]:
|
||||
prev.set_parent(self)
|
||||
|
||||
@@ -4,6 +4,13 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Pipeline runner for managing pipeline task execution.
|
||||
|
||||
This module provides the PipelineRunner class that handles the execution
|
||||
of pipeline tasks with signal handling, garbage collection, and lifecycle
|
||||
management.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import gc
|
||||
import signal
|
||||
@@ -17,6 +24,13 @@ from pipecat.utils.base_object import BaseObject
|
||||
|
||||
|
||||
class PipelineRunner(BaseObject):
|
||||
"""Manages the execution of pipeline tasks with lifecycle and signal handling.
|
||||
|
||||
Provides a high-level interface for running pipeline tasks with automatic
|
||||
signal handling (SIGINT/SIGTERM), optional garbage collection, and proper
|
||||
cleanup of resources.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
@@ -25,6 +39,14 @@ class PipelineRunner(BaseObject):
|
||||
force_gc: bool = False,
|
||||
loop: Optional[asyncio.AbstractEventLoop] = None,
|
||||
):
|
||||
"""Initialize the pipeline runner.
|
||||
|
||||
Args:
|
||||
name: Optional name for the runner instance.
|
||||
handle_sigint: Whether to automatically handle SIGINT/SIGTERM signals.
|
||||
force_gc: Whether to force garbage collection after task completion.
|
||||
loop: Event loop to use. If None, uses the current running loop.
|
||||
"""
|
||||
super().__init__(name=name)
|
||||
|
||||
self._tasks = {}
|
||||
@@ -36,6 +58,11 @@ class PipelineRunner(BaseObject):
|
||||
self._setup_sigint()
|
||||
|
||||
async def run(self, task: PipelineTask):
|
||||
"""Run a pipeline task to completion.
|
||||
|
||||
Args:
|
||||
task: The pipeline task to execute.
|
||||
"""
|
||||
logger.debug(f"Runner {self} started running {task}")
|
||||
self._tasks[task.name] = task
|
||||
params = PipelineTaskParams(loop=self._loop)
|
||||
@@ -56,27 +83,33 @@ class PipelineRunner(BaseObject):
|
||||
logger.debug(f"Runner {self} finished running {task}")
|
||||
|
||||
async def stop_when_done(self):
|
||||
"""Schedule all running tasks to stop when their current processing is complete."""
|
||||
logger.debug(f"Runner {self} scheduled to stop when all tasks are done")
|
||||
await asyncio.gather(*[t.stop_when_done() for t in self._tasks.values()])
|
||||
|
||||
async def cancel(self):
|
||||
"""Cancel all running tasks immediately."""
|
||||
logger.debug(f"Cancelling runner {self}")
|
||||
await asyncio.gather(*[t.cancel() for t in self._tasks.values()])
|
||||
|
||||
def _setup_sigint(self):
|
||||
"""Set up signal handlers for graceful shutdown."""
|
||||
loop = asyncio.get_running_loop()
|
||||
loop.add_signal_handler(signal.SIGINT, lambda *args: self._sig_handler())
|
||||
loop.add_signal_handler(signal.SIGTERM, lambda *args: self._sig_handler())
|
||||
|
||||
def _sig_handler(self):
|
||||
"""Handle interrupt signals by cancelling all tasks."""
|
||||
if not self._sig_task:
|
||||
self._sig_task = asyncio.create_task(self._sig_cancel())
|
||||
|
||||
async def _sig_cancel(self):
|
||||
"""Cancel all running tasks due to signal interruption."""
|
||||
logger.warning(f"Interruption detected. Cancelling runner {self}")
|
||||
await self.cancel()
|
||||
|
||||
def _gc_collect(self):
|
||||
"""Force garbage collection and log results."""
|
||||
collected = gc.collect()
|
||||
logger.debug(f"Garbage collector: collected {collected} objects.")
|
||||
logger.debug(f"Garbage collector: uncollectable objects {gc.garbage}")
|
||||
|
||||
@@ -4,6 +4,13 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Synchronous parallel pipeline implementation for concurrent frame processing.
|
||||
|
||||
This module provides a pipeline that processes frames through multiple parallel
|
||||
pipelines simultaneously, synchronizing their output to maintain frame ordering
|
||||
and prevent duplicate processing.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from dataclasses import dataclass
|
||||
from itertools import chain
|
||||
@@ -20,17 +27,38 @@ from pipecat.utils.asyncio.watchdog_queue import WatchdogQueue
|
||||
|
||||
@dataclass
|
||||
class SyncFrame(ControlFrame):
|
||||
"""This frame is used to know when the internal pipelines have finished."""
|
||||
"""Control frame used to synchronize parallel pipeline processing.
|
||||
|
||||
This frame is sent through parallel pipelines to determine when the
|
||||
internal pipelines have finished processing a batch of frames.
|
||||
"""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class SyncParallelPipelineSource(FrameProcessor):
|
||||
"""Source processor for synchronous parallel pipeline processing.
|
||||
|
||||
Routes frames to parallel pipelines and collects upstream responses
|
||||
for synchronization purposes.
|
||||
"""
|
||||
|
||||
def __init__(self, upstream_queue: asyncio.Queue):
|
||||
"""Initialize the sync parallel pipeline source.
|
||||
|
||||
Args:
|
||||
upstream_queue: Queue for collecting upstream frames from the pipeline.
|
||||
"""
|
||||
super().__init__()
|
||||
self._up_queue = upstream_queue
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
"""Process frames and route them based on direction.
|
||||
|
||||
Args:
|
||||
frame: The frame to process.
|
||||
direction: The direction of frame flow.
|
||||
"""
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
match direction:
|
||||
@@ -41,11 +69,28 @@ class SyncParallelPipelineSource(FrameProcessor):
|
||||
|
||||
|
||||
class SyncParallelPipelineSink(FrameProcessor):
|
||||
"""Sink processor for synchronous parallel pipeline processing.
|
||||
|
||||
Collects downstream frames from parallel pipelines and routes
|
||||
upstream frames back through the pipeline.
|
||||
"""
|
||||
|
||||
def __init__(self, downstream_queue: asyncio.Queue):
|
||||
"""Initialize the sync parallel pipeline sink.
|
||||
|
||||
Args:
|
||||
downstream_queue: Queue for collecting downstream frames from the pipeline.
|
||||
"""
|
||||
super().__init__()
|
||||
self._down_queue = downstream_queue
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
"""Process frames and route them based on direction.
|
||||
|
||||
Args:
|
||||
frame: The frame to process.
|
||||
direction: The direction of frame flow.
|
||||
"""
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
match direction:
|
||||
@@ -56,7 +101,28 @@ class SyncParallelPipelineSink(FrameProcessor):
|
||||
|
||||
|
||||
class SyncParallelPipeline(BasePipeline):
|
||||
"""Pipeline that processes frames through multiple parallel pipelines synchronously.
|
||||
|
||||
Creates multiple parallel processing paths that all receive the same input frames
|
||||
and produces synchronized output. Each parallel path is a separate pipeline that
|
||||
processes frames independently, with synchronization points to ensure consistent
|
||||
ordering and prevent duplicate frame processing.
|
||||
|
||||
The pipeline uses SyncFrame control frames to coordinate between parallel paths
|
||||
and ensure all paths have completed processing before moving to the next frame.
|
||||
"""
|
||||
|
||||
def __init__(self, *args):
|
||||
"""Initialize the synchronous parallel pipeline.
|
||||
|
||||
Args:
|
||||
*args: Variable number of processor lists, each representing a parallel pipeline path.
|
||||
Each argument should be a list of FrameProcessor instances.
|
||||
|
||||
Raises:
|
||||
Exception: If no arguments are provided.
|
||||
TypeError: If any argument is not a list of processors.
|
||||
"""
|
||||
super().__init__()
|
||||
|
||||
if len(args) == 0:
|
||||
@@ -72,6 +138,11 @@ class SyncParallelPipeline(BasePipeline):
|
||||
#
|
||||
|
||||
def processors_with_metrics(self) -> List[FrameProcessor]:
|
||||
"""Collect processors that can generate metrics from all parallel pipelines.
|
||||
|
||||
Returns:
|
||||
List of frame processors that support metrics collection from all parallel paths.
|
||||
"""
|
||||
return list(chain.from_iterable(p.processors_with_metrics() for p in self._pipelines))
|
||||
|
||||
#
|
||||
@@ -79,6 +150,11 @@ class SyncParallelPipeline(BasePipeline):
|
||||
#
|
||||
|
||||
async def setup(self, setup: FrameProcessorSetup):
|
||||
"""Set up the parallel pipeline and all contained processors.
|
||||
|
||||
Args:
|
||||
setup: Configuration for frame processor setup.
|
||||
"""
|
||||
await super().setup(setup)
|
||||
|
||||
self._up_queue = WatchdogQueue(setup.task_manager)
|
||||
@@ -113,12 +189,23 @@ class SyncParallelPipeline(BasePipeline):
|
||||
await asyncio.gather(*[s["processor"].setup(setup) for s in self._sinks])
|
||||
|
||||
async def cleanup(self):
|
||||
"""Clean up the parallel pipeline and all contained processors."""
|
||||
await super().cleanup()
|
||||
await asyncio.gather(*[s["processor"].cleanup() for s in self._sources])
|
||||
await asyncio.gather(*[p.cleanup() for p in self._pipelines])
|
||||
await asyncio.gather(*[s["processor"].cleanup() for s in self._sinks])
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
"""Process frames through all parallel pipelines with synchronization.
|
||||
|
||||
Distributes frames to all parallel pipelines and synchronizes their output
|
||||
to maintain proper ordering and prevent duplicate processing. Uses SyncFrame
|
||||
control frames to coordinate between parallel paths.
|
||||
|
||||
Args:
|
||||
frame: The frame to process.
|
||||
direction: The direction of frame flow.
|
||||
"""
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
# The last processor of each pipeline needs to be synchronous otherwise
|
||||
|
||||
@@ -4,6 +4,13 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Pipeline task implementation for managing frame processing pipelines.
|
||||
|
||||
This module provides the main PipelineTask class that orchestrates pipeline
|
||||
execution, frame routing, lifecycle management, and monitoring capabilities
|
||||
including heartbeats, idle detection, and observer integration.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import time
|
||||
from collections import deque
|
||||
@@ -53,12 +60,13 @@ HEARTBEAT_MONITOR_SECONDS = HEARTBEAT_SECONDS * 10
|
||||
|
||||
|
||||
class PipelineParams(BaseModel):
|
||||
"""Configuration parameters for pipeline execution. These parameters are
|
||||
usually passed to all frame processors using through `StartFrame`. For other
|
||||
generic pipeline task parameters use `PipelineTask` constructor arguments
|
||||
instead.
|
||||
"""Configuration parameters for pipeline execution.
|
||||
|
||||
Attributes:
|
||||
These parameters are usually passed to all frame processors through
|
||||
StartFrame. For other generic pipeline task parameters use PipelineTask
|
||||
constructor arguments instead.
|
||||
|
||||
Parameters:
|
||||
allow_interruptions: Whether to allow pipeline interruptions.
|
||||
audio_in_sample_rate: Input audio sample rate in Hz.
|
||||
audio_out_sample_rate: Output audio sample rate in Hz.
|
||||
@@ -66,12 +74,15 @@ class PipelineParams(BaseModel):
|
||||
enable_metrics: Whether to enable metrics collection.
|
||||
enable_usage_metrics: Whether to enable usage metrics.
|
||||
heartbeats_period_secs: Period between heartbeats in seconds.
|
||||
interruption_strategies: Strategies for bot interruption behavior.
|
||||
observers: [deprecated] Use `observers` arg in `PipelineTask` class.
|
||||
|
||||
.. deprecated:: 0.0.58
|
||||
Use the `observers` argument in the `PipelineTask` class instead.
|
||||
|
||||
report_only_initial_ttfb: Whether to report only initial time to first byte.
|
||||
send_initial_empty_metrics: Whether to send initial empty metrics.
|
||||
start_metadata: Additional metadata for pipeline start.
|
||||
interruption_strategies: Strategies for bot interruption behavior.
|
||||
|
||||
"""
|
||||
|
||||
model_config = ConfigDict(arbitrary_types_allowed=True)
|
||||
@@ -97,17 +108,25 @@ class PipelineTaskSource(FrameProcessor):
|
||||
pipeline given to the pipeline task. It allows us to easily push frames
|
||||
downstream to the pipeline and also receive upstream frames coming from the
|
||||
pipeline.
|
||||
|
||||
Args:
|
||||
up_queue: Queue for upstream frame processing.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, up_queue: asyncio.Queue, **kwargs):
|
||||
"""Initialize the pipeline task source.
|
||||
|
||||
Args:
|
||||
up_queue: Queue for upstream frame processing.
|
||||
**kwargs: Additional arguments passed to the parent class.
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
self._up_queue = up_queue
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
"""Process frames and route them based on direction.
|
||||
|
||||
Args:
|
||||
frame: The frame to process.
|
||||
direction: The direction of frame flow.
|
||||
"""
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
match direction:
|
||||
@@ -123,16 +142,25 @@ class PipelineTaskSink(FrameProcessor):
|
||||
This is the sink processor that is linked at the end of the pipeline
|
||||
given to the pipeline task. It allows us to receive downstream frames and
|
||||
act on them, for example, waiting to receive an EndFrame.
|
||||
|
||||
Args:
|
||||
down_queue: Queue for downstream frame processing.
|
||||
"""
|
||||
|
||||
def __init__(self, down_queue: asyncio.Queue, **kwargs):
|
||||
"""Initialize the pipeline task sink.
|
||||
|
||||
Args:
|
||||
down_queue: Queue for downstream frame processing.
|
||||
**kwargs: Additional arguments passed to the parent class.
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
self._down_queue = down_queue
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
"""Process frames and route them to the downstream queue.
|
||||
|
||||
Args:
|
||||
frame: The frame to process.
|
||||
direction: The direction of frame flow.
|
||||
"""
|
||||
await super().process_frame(frame, direction)
|
||||
await self._down_queue.put(frame)
|
||||
|
||||
@@ -140,69 +168,30 @@ class PipelineTaskSink(FrameProcessor):
|
||||
class PipelineTask(BasePipelineTask):
|
||||
"""Manages the execution of a pipeline, handling frame processing and task lifecycle.
|
||||
|
||||
It has a couple of event handlers `on_frame_reached_upstream` and
|
||||
`on_frame_reached_downstream` that are called when upstream frames or
|
||||
downstream frames reach both ends of pipeline. By default, the events
|
||||
handlers will not be called unless some filters are set using
|
||||
`set_reached_upstream_filter` and `set_reached_downstream_filter`.
|
||||
This class orchestrates pipeline execution with comprehensive monitoring,
|
||||
event handling, and lifecycle management. It provides event handlers for
|
||||
various pipeline states and frame types, idle detection, heartbeat monitoring,
|
||||
and observer integration.
|
||||
|
||||
@task.event_handler("on_frame_reached_upstream")
|
||||
async def on_frame_reached_upstream(task, frame):
|
||||
...
|
||||
Event handlers available:
|
||||
|
||||
@task.event_handler("on_frame_reached_downstream")
|
||||
async def on_frame_reached_downstream(task, frame):
|
||||
...
|
||||
- on_frame_reached_upstream: Called when upstream frames reach the source
|
||||
- on_frame_reached_downstream: Called when downstream frames reach the sink
|
||||
- on_idle_timeout: Called when pipeline is idle beyond timeout threshold
|
||||
- on_pipeline_started: Called when pipeline starts with StartFrame
|
||||
- on_pipeline_stopped: Called when pipeline stops with StopFrame
|
||||
- on_pipeline_ended: Called when pipeline ends with EndFrame
|
||||
- on_pipeline_cancelled: Called when pipeline is cancelled
|
||||
|
||||
It also has an event handler that detects when the pipeline is idle. By
|
||||
default, a pipeline is idle if no `BotSpeakingFrame` or
|
||||
`LLMFullResponseEndFrame` are received within `idle_timeout_secs`.
|
||||
Example::
|
||||
|
||||
@task.event_handler("on_idle_timeout")
|
||||
async def on_pipeline_idle_timeout(task):
|
||||
...
|
||||
@task.event_handler("on_frame_reached_upstream")
|
||||
async def on_frame_reached_upstream(task, frame):
|
||||
...
|
||||
|
||||
There are also events to know if a pipeline has been started, stopped, ended
|
||||
or cancelled.
|
||||
|
||||
@task.event_handler("on_pipeline_started")
|
||||
async def on_pipeline_started(task, frame: StartFrame):
|
||||
...
|
||||
|
||||
@task.event_handler("on_pipeline_stopped")
|
||||
async def on_pipeline_stopped(task, frame: StopFrame):
|
||||
...
|
||||
|
||||
@task.event_handler("on_pipeline_ended")
|
||||
async def on_pipeline_ended(task, frame: EndFrame):
|
||||
...
|
||||
|
||||
@task.event_handler("on_pipeline_cancelled")
|
||||
async def on_pipeline_cancelled(task, frame: CancelFrame):
|
||||
...
|
||||
|
||||
Args:
|
||||
pipeline: The pipeline to execute.
|
||||
params: Configuration parameters for the pipeline.
|
||||
additional_span_attributes: Optional dictionary of attributes to propagate as
|
||||
OpenTelemetry conversation span attributes.
|
||||
cancel_on_idle_timeout: Whether the pipeline task should be cancelled if
|
||||
the idle timeout is reached.
|
||||
check_dangling_tasks: Whether to check for processors' tasks finishing properly.
|
||||
clock: Clock implementation for timing operations.
|
||||
conversation_id: Optional custom ID for the conversation.
|
||||
enable_tracing: Whether to enable tracing.
|
||||
enable_turn_tracking: Whether to enable turn tracking.
|
||||
enable_watchdog_logging: Whether to print task processing times.
|
||||
enable_watchdog_timers: Whether to enable task watchdog timers.
|
||||
idle_timeout_frames: A tuple with the frames that should trigger an idle
|
||||
timeout if not received withing `idle_timeout_seconds`.
|
||||
idle_timeout_secs: Timeout (in seconds) to consider pipeline idle or
|
||||
None. If a pipeline is idle the pipeline task will be cancelled
|
||||
automatically.
|
||||
observers: List of observers for monitoring pipeline execution.
|
||||
watchdog_timeout_secs: Watchdog timer timeout (in seconds). A warning
|
||||
will be logged if the watchdog timer is not reset before this timeout.
|
||||
@task.event_handler("on_idle_timeout")
|
||||
async def on_pipeline_idle_timeout(task):
|
||||
...
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@@ -228,6 +217,32 @@ class PipelineTask(BasePipelineTask):
|
||||
task_manager: Optional[BaseTaskManager] = None,
|
||||
watchdog_timeout_secs: float = WATCHDOG_TIMEOUT,
|
||||
):
|
||||
"""Initialize the PipelineTask.
|
||||
|
||||
Args:
|
||||
pipeline: The pipeline to execute.
|
||||
params: Configuration parameters for the pipeline.
|
||||
additional_span_attributes: Optional dictionary of attributes to propagate as
|
||||
OpenTelemetry conversation span attributes.
|
||||
cancel_on_idle_timeout: Whether the pipeline task should be cancelled if
|
||||
the idle timeout is reached.
|
||||
check_dangling_tasks: Whether to check for processors' tasks finishing properly.
|
||||
clock: Clock implementation for timing operations.
|
||||
conversation_id: Optional custom ID for the conversation.
|
||||
enable_tracing: Whether to enable tracing.
|
||||
enable_turn_tracking: Whether to enable turn tracking.
|
||||
enable_watchdog_logging: Whether to print task processing times.
|
||||
enable_watchdog_timers: Whether to enable task watchdog timers.
|
||||
idle_timeout_frames: A tuple with the frames that should trigger an idle
|
||||
timeout if not received within `idle_timeout_seconds`.
|
||||
idle_timeout_secs: Timeout (in seconds) to consider pipeline idle or
|
||||
None. If a pipeline is idle the pipeline task will be cancelled
|
||||
automatically.
|
||||
observers: List of observers for monitoring pipeline execution.
|
||||
task_manager: Optional task manager for handling asyncio tasks.
|
||||
watchdog_timeout_secs: Watchdog timer timeout (in seconds). A warning
|
||||
will be logged if the watchdog timer is not reset before this timeout.
|
||||
"""
|
||||
super().__init__()
|
||||
self._pipeline = pipeline
|
||||
self._params = params or PipelineParams()
|
||||
@@ -331,60 +346,97 @@ class PipelineTask(BasePipelineTask):
|
||||
|
||||
@property
|
||||
def params(self) -> PipelineParams:
|
||||
"""Returns the pipeline parameters of this task."""
|
||||
"""Get the pipeline parameters for this task.
|
||||
|
||||
Returns:
|
||||
The pipeline parameters configuration.
|
||||
"""
|
||||
return self._params
|
||||
|
||||
@property
|
||||
def turn_tracking_observer(self) -> Optional[TurnTrackingObserver]:
|
||||
"""Return the turn tracking observer if enabled."""
|
||||
"""Get the turn tracking observer if enabled.
|
||||
|
||||
Returns:
|
||||
The turn tracking observer instance or None if not enabled.
|
||||
"""
|
||||
return self._turn_tracking_observer
|
||||
|
||||
@property
|
||||
def turn_trace_observer(self) -> Optional[TurnTraceObserver]:
|
||||
"""Return the turn trace observer if enabled."""
|
||||
"""Get the turn trace observer if enabled.
|
||||
|
||||
Returns:
|
||||
The turn trace observer instance or None if not enabled.
|
||||
"""
|
||||
return self._turn_trace_observer
|
||||
|
||||
def add_observer(self, observer: BaseObserver):
|
||||
"""Add an observer to monitor pipeline execution.
|
||||
|
||||
Args:
|
||||
observer: The observer to add to the pipeline monitoring.
|
||||
"""
|
||||
self._observer.add_observer(observer)
|
||||
|
||||
async def remove_observer(self, observer: BaseObserver):
|
||||
"""Remove an observer from pipeline monitoring.
|
||||
|
||||
Args:
|
||||
observer: The observer to remove from pipeline monitoring.
|
||||
"""
|
||||
await self._observer.remove_observer(observer)
|
||||
|
||||
def set_reached_upstream_filter(self, types: Tuple[Type[Frame], ...]):
|
||||
"""Sets which frames will be checked before calling the
|
||||
on_frame_reached_upstream event handler.
|
||||
"""Set which frame types trigger the on_frame_reached_upstream event.
|
||||
|
||||
Args:
|
||||
types: Tuple of frame types to monitor for upstream events.
|
||||
"""
|
||||
self._reached_upstream_types = types
|
||||
|
||||
def set_reached_downstream_filter(self, types: Tuple[Type[Frame], ...]):
|
||||
"""Sets which frames will be checked before calling the
|
||||
on_frame_reached_downstream event handler.
|
||||
"""Set which frame types trigger the on_frame_reached_downstream event.
|
||||
|
||||
Args:
|
||||
types: Tuple of frame types to monitor for downstream events.
|
||||
"""
|
||||
self._reached_downstream_types = types
|
||||
|
||||
def has_finished(self) -> bool:
|
||||
"""Indicates whether the tasks has finished. That is, all processors
|
||||
"""Check if the pipeline task has finished execution.
|
||||
|
||||
This indicates whether the tasks has finished, meaninig all processors
|
||||
have stopped.
|
||||
|
||||
Returns:
|
||||
True if all processors have stopped and the task is complete.
|
||||
"""
|
||||
return self._finished
|
||||
|
||||
async def stop_when_done(self):
|
||||
"""This is a helper function that sends an EndFrame to the pipeline in
|
||||
order to stop the task after everything in it has been processed.
|
||||
"""Schedule the pipeline to stop after processing all queued frames.
|
||||
|
||||
Sends an EndFrame to gracefully terminate the pipeline once all
|
||||
current processing is complete.
|
||||
"""
|
||||
logger.debug(f"Task {self} scheduled to stop when done")
|
||||
await self.queue_frame(EndFrame())
|
||||
|
||||
async def cancel(self):
|
||||
"""Stops the running pipeline immediately."""
|
||||
"""Immediately stop the running pipeline.
|
||||
|
||||
Cancels all running tasks and stops frame processing without
|
||||
waiting for completion.
|
||||
"""
|
||||
await self._cancel()
|
||||
|
||||
async def run(self, params: PipelineTaskParams):
|
||||
"""Starts and manages the pipeline execution until completion or cancellation."""
|
||||
"""Start and manage the pipeline execution until completion or cancellation.
|
||||
|
||||
Args:
|
||||
params: Configuration parameters for pipeline execution.
|
||||
"""
|
||||
if self.has_finished():
|
||||
return
|
||||
cleanup_pipeline = True
|
||||
@@ -440,6 +492,7 @@ class PipelineTask(BasePipelineTask):
|
||||
await self.queue_frame(frame)
|
||||
|
||||
async def _cancel(self):
|
||||
"""Internal cancellation logic for the pipeline task."""
|
||||
if not self._cancelled:
|
||||
logger.debug(f"Canceling pipeline task {self}")
|
||||
self._cancelled = True
|
||||
@@ -453,6 +506,7 @@ class PipelineTask(BasePipelineTask):
|
||||
self._process_push_task = None
|
||||
|
||||
async def _create_tasks(self):
|
||||
"""Create and start all pipeline processing tasks."""
|
||||
self._process_up_task = self._task_manager.create_task(
|
||||
self._process_up_queue(), f"{self}::_process_up_queue"
|
||||
)
|
||||
@@ -463,11 +517,12 @@ class PipelineTask(BasePipelineTask):
|
||||
self._process_push_queue(), f"{self}::_process_push_queue"
|
||||
)
|
||||
|
||||
await self._observer.start(self._enable_watchdog_timers)
|
||||
await self._observer.start()
|
||||
|
||||
return self._process_push_task
|
||||
|
||||
def _maybe_start_heartbeat_tasks(self):
|
||||
"""Start heartbeat tasks if heartbeats are enabled and not already running."""
|
||||
if self._params.enable_heartbeats and self._heartbeat_push_task is None:
|
||||
self._heartbeat_push_task = self._task_manager.create_task(
|
||||
self._heartbeat_push_handler(), f"{self}::_heartbeat_push_handler"
|
||||
@@ -477,12 +532,14 @@ class PipelineTask(BasePipelineTask):
|
||||
)
|
||||
|
||||
def _maybe_start_idle_task(self):
|
||||
"""Start idle monitoring task if idle timeout is configured."""
|
||||
if self._idle_timeout_secs:
|
||||
self._idle_monitor_task = self._task_manager.create_task(
|
||||
self._idle_monitor_handler(), f"{self}::_idle_monitor_handler"
|
||||
)
|
||||
|
||||
async def _cancel_tasks(self):
|
||||
"""Cancel all running pipeline tasks."""
|
||||
await self._observer.stop()
|
||||
|
||||
if self._process_up_task:
|
||||
@@ -497,6 +554,7 @@ class PipelineTask(BasePipelineTask):
|
||||
await self._maybe_cancel_idle_task()
|
||||
|
||||
async def _maybe_cancel_heartbeat_tasks(self):
|
||||
"""Cancel heartbeat tasks if they are running."""
|
||||
if not self._params.enable_heartbeats:
|
||||
return
|
||||
|
||||
@@ -509,11 +567,14 @@ class PipelineTask(BasePipelineTask):
|
||||
self._heartbeat_monitor_task = None
|
||||
|
||||
async def _maybe_cancel_idle_task(self):
|
||||
"""Cancel idle monitoring task if it is running."""
|
||||
if self._idle_timeout_secs and self._idle_monitor_task:
|
||||
self._idle_queue.cancel()
|
||||
await self._task_manager.cancel_task(self._idle_monitor_task)
|
||||
self._idle_monitor_task = None
|
||||
|
||||
def _initial_metrics_frame(self) -> MetricsFrame:
|
||||
"""Create an initial metrics frame with zero values for all processors."""
|
||||
processors = self._pipeline.processors_with_metrics()
|
||||
data = []
|
||||
for p in processors:
|
||||
@@ -522,10 +583,12 @@ class PipelineTask(BasePipelineTask):
|
||||
return MetricsFrame(data=data)
|
||||
|
||||
async def _wait_for_pipeline_end(self):
|
||||
"""Wait for the pipeline to signal completion."""
|
||||
await self._pipeline_end_event.wait()
|
||||
self._pipeline_end_event.clear()
|
||||
|
||||
async def _setup(self, params: PipelineTaskParams):
|
||||
"""Set up the pipeline task and all processors."""
|
||||
mgr_params = TaskManagerParams(
|
||||
loop=params.loop,
|
||||
enable_watchdog_logging=self._enable_watchdog_logging,
|
||||
@@ -545,6 +608,7 @@ class PipelineTask(BasePipelineTask):
|
||||
await self._sink.setup(setup)
|
||||
|
||||
async def _cleanup(self, cleanup_pipeline: bool):
|
||||
"""Clean up the pipeline task and processors."""
|
||||
# Cleanup base object.
|
||||
await self.cleanup()
|
||||
|
||||
@@ -559,10 +623,11 @@ class PipelineTask(BasePipelineTask):
|
||||
await self._sink.cleanup()
|
||||
|
||||
async def _process_push_queue(self):
|
||||
"""This is the task that runs the pipeline for the first time by sending
|
||||
"""Process frames from the push queue and send them through the pipeline.
|
||||
|
||||
This is the task that runs the pipeline for the first time by sending
|
||||
a StartFrame and by pushing any other frames queued by the user. It runs
|
||||
until the tasks is cancelled or stopped (e.g. with an EndFrame).
|
||||
|
||||
"""
|
||||
self._clock.start()
|
||||
|
||||
@@ -596,11 +661,12 @@ class PipelineTask(BasePipelineTask):
|
||||
await self._cleanup(cleanup_pipeline)
|
||||
|
||||
async def _process_up_queue(self):
|
||||
"""This is the task that processes frames coming upstream from the
|
||||
"""Process frames coming upstream from the pipeline.
|
||||
|
||||
This is the task that processes frames coming upstream from the
|
||||
pipeline. These frames might indicate, for example, that we want the
|
||||
pipeline to be stopped (e.g. EndTaskFrame) in which case we would send
|
||||
an EndFrame down the pipeline.
|
||||
|
||||
"""
|
||||
while True:
|
||||
frame = await self._up_queue.get()
|
||||
@@ -629,11 +695,12 @@ class PipelineTask(BasePipelineTask):
|
||||
self._up_queue.task_done()
|
||||
|
||||
async def _process_down_queue(self):
|
||||
"""This tasks process frames coming downstream from the pipeline. For
|
||||
"""Process frames coming downstream from the pipeline.
|
||||
|
||||
This tasks process frames coming downstream from the pipeline. For
|
||||
example, heartbeat frames or an EndFrame which would indicate all
|
||||
processors have handled the EndFrame and therefore we can exit the task
|
||||
cleanly.
|
||||
|
||||
"""
|
||||
while True:
|
||||
frame = await self._down_queue.get()
|
||||
@@ -664,7 +731,7 @@ class PipelineTask(BasePipelineTask):
|
||||
self._down_queue.task_done()
|
||||
|
||||
async def _heartbeat_push_handler(self):
|
||||
"""This tasks pushes a heartbeat frame every heartbeat period."""
|
||||
"""Push heartbeat frames at regular intervals."""
|
||||
while True:
|
||||
# Don't use `queue_frame()` because if an EndFrame is queued the
|
||||
# task will just stop waiting for the pipeline to finish not
|
||||
@@ -673,11 +740,12 @@ class PipelineTask(BasePipelineTask):
|
||||
await asyncio.sleep(self._params.heartbeats_period_secs)
|
||||
|
||||
async def _heartbeat_monitor_handler(self):
|
||||
"""This tasks monitors heartbeat frames. If a heartbeat frame has not
|
||||
"""Monitor heartbeat frames for processing time and timeout detection.
|
||||
|
||||
This task monitors heartbeat frames. If a heartbeat frame has not
|
||||
been received for a long period a warning will be logged. It also logs
|
||||
the time that a heartbeat frame takes to processes, that is how long it
|
||||
takes for the heartbeat frame to traverse all the pipeline.
|
||||
|
||||
"""
|
||||
wait_time = HEARTBEAT_MONITOR_SECONDS
|
||||
while True:
|
||||
@@ -692,9 +760,12 @@ class PipelineTask(BasePipelineTask):
|
||||
)
|
||||
|
||||
async def _idle_monitor_handler(self):
|
||||
"""This tasks monitors activity in the pipeline. If no frames are
|
||||
received (heartbeats don't count) the pipeline is considered idle.
|
||||
"""Monitor pipeline activity and detect idle conditions.
|
||||
|
||||
Tracks frame activity and triggers idle timeout events when the
|
||||
pipeline hasn't received relevant frames within the timeout period.
|
||||
|
||||
Note: Heartbeats are excluded from idle detection.
|
||||
"""
|
||||
running = True
|
||||
last_frame_time = 0
|
||||
@@ -732,10 +803,13 @@ class PipelineTask(BasePipelineTask):
|
||||
running = await self._idle_timeout_detected(frame_buffer)
|
||||
|
||||
async def _idle_timeout_detected(self, last_frames: Deque[Frame]) -> bool:
|
||||
"""Logic for when the pipeline is idle.
|
||||
"""Handle idle timeout detection and optional cancellation.
|
||||
|
||||
Args:
|
||||
last_frames: Recent frames received before timeout for debugging.
|
||||
|
||||
Returns:
|
||||
bool: Whther the pipeline task is being cancelled or not.
|
||||
Whether the pipeline task should continue running.
|
||||
"""
|
||||
logger.warning("Idle timeout detected. Last 10 frames received:")
|
||||
for i, frame in enumerate(last_frames, 1):
|
||||
@@ -749,6 +823,7 @@ class PipelineTask(BasePipelineTask):
|
||||
return True
|
||||
|
||||
def _print_dangling_tasks(self):
|
||||
"""Log any dangling tasks that haven't been properly cleaned up."""
|
||||
tasks = [t.get_name() for t in self._task_manager.current_tasks()]
|
||||
if tasks:
|
||||
logger.warning(f"Dangling tasks detected: {tasks}")
|
||||
|
||||
@@ -4,6 +4,13 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Task observer for managing pipeline frame observers.
|
||||
|
||||
This module provides a proxy observer system that manages multiple observers
|
||||
for pipeline frame events, ensuring that observer processing doesn't block
|
||||
the main pipeline execution.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import inspect
|
||||
from typing import Dict, List, Optional
|
||||
@@ -17,9 +24,15 @@ from pipecat.utils.asyncio.watchdog_queue import WatchdogQueue
|
||||
|
||||
@dataclass
|
||||
class Proxy:
|
||||
"""This is the data we receive from the main observer and that we put into
|
||||
a queue for later processing.
|
||||
"""Proxy data for managing observer tasks and queues.
|
||||
|
||||
This represents is the data received from the main observer that
|
||||
is queued for later processing.
|
||||
|
||||
Parameters:
|
||||
queue: Queue for frame data awaiting observer processing.
|
||||
task: Asyncio task running the observer's frame processing loop.
|
||||
observer: The actual observer instance being proxied.
|
||||
"""
|
||||
|
||||
queue: asyncio.Queue
|
||||
@@ -28,7 +41,9 @@ class Proxy:
|
||||
|
||||
|
||||
class TaskObserver(BaseObserver):
|
||||
"""This is a pipeline frame observer that is meant to be used as a proxy to
|
||||
"""Proxy observer that manages multiple observers without blocking the pipeline.
|
||||
|
||||
This is a pipeline frame observer that is meant to be used as a proxy to
|
||||
the user provided observers. That is, this is the observer that should be
|
||||
passed to the frame processors. Then, every time a frame is pushed this
|
||||
observer will call all the observers registered to the pipeline task.
|
||||
@@ -37,7 +52,6 @@ class TaskObserver(BaseObserver):
|
||||
pipeline by creating a queue and a task for each user observer. When a frame
|
||||
is received, it will be put in a queue for efficiency and later processed by
|
||||
each task.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@@ -47,6 +61,13 @@ class TaskObserver(BaseObserver):
|
||||
task_manager: BaseTaskManager,
|
||||
**kwargs,
|
||||
):
|
||||
"""Initialize the TaskObserver.
|
||||
|
||||
Args:
|
||||
observers: List of observers to manage. Defaults to empty list.
|
||||
task_manager: Task manager for creating and managing observer tasks.
|
||||
**kwargs: Additional arguments passed to the base observer.
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
self._observers = observers or []
|
||||
self._task_manager = task_manager
|
||||
@@ -55,6 +76,11 @@ class TaskObserver(BaseObserver):
|
||||
)
|
||||
|
||||
def add_observer(self, observer: BaseObserver):
|
||||
"""Add a new observer to the managed list.
|
||||
|
||||
Args:
|
||||
observer: The observer to add.
|
||||
"""
|
||||
# Add the observer to the list.
|
||||
self._observers.append(observer)
|
||||
|
||||
@@ -65,6 +91,11 @@ class TaskObserver(BaseObserver):
|
||||
self._proxies[observer] = proxy
|
||||
|
||||
async def remove_observer(self, observer: BaseObserver):
|
||||
"""Remove an observer and clean up its resources.
|
||||
|
||||
Args:
|
||||
observer: The observer to remove.
|
||||
"""
|
||||
# If the observer has a proxy, remove it.
|
||||
if observer in self._proxies:
|
||||
proxy = self._proxies[observer]
|
||||
@@ -77,12 +108,12 @@ class TaskObserver(BaseObserver):
|
||||
if observer in self._observers:
|
||||
self._observers.remove(observer)
|
||||
|
||||
async def start(self, watchdog_timers_enabled: bool = False):
|
||||
"""Starts all proxy observer tasks."""
|
||||
async def start(self):
|
||||
"""Start all proxy observer tasks."""
|
||||
self._proxies = self._create_proxies(self._observers)
|
||||
|
||||
async def stop(self):
|
||||
"""Stops all proxy observer tasks."""
|
||||
"""Stop all proxy observer tasks."""
|
||||
if not self._proxies:
|
||||
return
|
||||
|
||||
@@ -90,13 +121,20 @@ class TaskObserver(BaseObserver):
|
||||
await self._task_manager.cancel_task(proxy.task)
|
||||
|
||||
async def on_push_frame(self, data: FramePushed):
|
||||
"""Queue frame data for all managed observers.
|
||||
|
||||
Args:
|
||||
data: The frame push event data to distribute to observers.
|
||||
"""
|
||||
for proxy in self._proxies.values():
|
||||
await proxy.queue.put(data)
|
||||
|
||||
def _started(self) -> bool:
|
||||
"""Check if the task observer has been started."""
|
||||
return self._proxies is not None
|
||||
|
||||
def _create_proxy(self, observer: BaseObserver) -> Proxy:
|
||||
"""Create a proxy for a single observer."""
|
||||
queue = WatchdogQueue(self._task_manager)
|
||||
task = self._task_manager.create_task(
|
||||
self._proxy_task_handler(queue, observer),
|
||||
@@ -106,6 +144,7 @@ class TaskObserver(BaseObserver):
|
||||
return proxy
|
||||
|
||||
def _create_proxies(self, observers: List[BaseObserver]) -> Dict[BaseObserver, Proxy]:
|
||||
"""Create proxies for all observers."""
|
||||
proxies = {}
|
||||
for observer in observers:
|
||||
proxy = self._create_proxy(observer)
|
||||
@@ -113,6 +152,7 @@ class TaskObserver(BaseObserver):
|
||||
return proxies
|
||||
|
||||
async def _proxy_task_handler(self, queue: asyncio.Queue, observer: BaseObserver):
|
||||
"""Handle frame processing for a single observer."""
|
||||
warning_reported = False
|
||||
while True:
|
||||
data = await queue.get()
|
||||
|
||||
@@ -1,3 +1,16 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Sequential pipeline merging for Pipecat.
|
||||
|
||||
This module provides a pipeline implementation that sequentially merges
|
||||
the output from multiple pipelines, processing them one after another
|
||||
in a specified order.
|
||||
"""
|
||||
|
||||
from typing import List
|
||||
|
||||
from pipecat.frames.frames import EndFrame, EndPipeFrame
|
||||
@@ -5,14 +18,31 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
|
||||
|
||||
class SequentialMergePipeline(Pipeline):
|
||||
"""This class merges the sink queues from a list of pipelines. Frames from
|
||||
each pipeline's sink are merged in the order of pipelines in the list."""
|
||||
"""Pipeline that sequentially merges output from multiple pipelines.
|
||||
|
||||
This pipeline merges the sink queues from a list of pipelines by processing
|
||||
frames from each pipeline's sink sequentially in the order specified. Each
|
||||
pipeline runs to completion before the next one begins processing.
|
||||
"""
|
||||
|
||||
def __init__(self, pipelines: List[Pipeline]):
|
||||
"""Initialize the sequential merge pipeline.
|
||||
|
||||
Args:
|
||||
pipelines: List of pipelines to merge sequentially. Pipelines will
|
||||
be processed in the order they appear in this list.
|
||||
"""
|
||||
super().__init__([])
|
||||
self.pipelines = pipelines
|
||||
|
||||
async def run_pipeline(self):
|
||||
"""Run all pipelines sequentially and merge their output.
|
||||
|
||||
Processes each pipeline in order, consuming all frames from each
|
||||
pipeline's sink until an EndFrame or EndPipeFrame is encountered,
|
||||
then moves to the next pipeline. After all pipelines complete,
|
||||
sends a final EndFrame to signal completion.
|
||||
"""
|
||||
for idx, pipeline in enumerate(self.pipelines):
|
||||
while True:
|
||||
frame = await pipeline.sink.get()
|
||||
|
||||
@@ -4,6 +4,13 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""DTMF aggregation processor for converting keypad input to transcription.
|
||||
|
||||
This module provides a frame processor that aggregates DTMF (Dual-Tone Multi-Frequency)
|
||||
keypad inputs into meaningful sequences and converts them to transcription frames
|
||||
for downstream processing by LLM context aggregators.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from typing import Optional
|
||||
|
||||
@@ -26,16 +33,12 @@ class DTMFAggregator(FrameProcessor):
|
||||
|
||||
The aggregator accumulates digits from InputDTMFFrame instances and flushes
|
||||
when:
|
||||
|
||||
- Timeout occurs (configurable idle period)
|
||||
- Termination digit is received (default: '#')
|
||||
- EndFrame or CancelFrame is received
|
||||
|
||||
Emits TranscriptionFrame for compatibility with existing LLM context aggregators.
|
||||
|
||||
Args:
|
||||
timeout: Idle timeout in seconds before flushing
|
||||
termination_digit: Digit that triggers immediate flush
|
||||
prefix: Prefix added to DTMF sequence in transcription
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@@ -45,6 +48,14 @@ class DTMFAggregator(FrameProcessor):
|
||||
prefix: str = "DTMF: ",
|
||||
**kwargs,
|
||||
):
|
||||
"""Initialize the DTMF aggregator.
|
||||
|
||||
Args:
|
||||
timeout: Idle timeout in seconds before flushing
|
||||
termination_digit: Digit that triggers immediate flush
|
||||
prefix: Prefix added to DTMF sequence in transcription
|
||||
**kwargs: Additional arguments passed to FrameProcessor
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
self._aggregation = ""
|
||||
self._idle_timeout = timeout
|
||||
@@ -55,6 +66,12 @@ class DTMFAggregator(FrameProcessor):
|
||||
self._aggregation_task: Optional[asyncio.Task] = None
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection) -> None:
|
||||
"""Process incoming frames and handle DTMF aggregation.
|
||||
|
||||
Args:
|
||||
frame: The frame to process.
|
||||
direction: The direction of frame flow in the pipeline.
|
||||
"""
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, StartFrame):
|
||||
|
||||
@@ -4,6 +4,13 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Gated frame aggregator for conditional frame accumulation.
|
||||
|
||||
This module provides a gated aggregator that accumulates frames based on
|
||||
custom gate open/close functions, allowing for conditional frame buffering
|
||||
and release in frame processing pipelines.
|
||||
"""
|
||||
|
||||
from typing import List, Tuple
|
||||
|
||||
from loguru import logger
|
||||
@@ -14,31 +21,11 @@ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
|
||||
class GatedAggregator(FrameProcessor):
|
||||
"""Accumulate frames, with custom functions to start and stop accumulation.
|
||||
|
||||
Yields gate-opening frame before any accumulated frames, then ensuing frames
|
||||
until and not including the gate-closed frame.
|
||||
|
||||
Doctest: FIXME to work with asyncio
|
||||
>>> from pipecat.frames.frames import ImageRawFrame
|
||||
|
||||
>>> async def print_frames(aggregator, frame):
|
||||
... async for frame in aggregator.process_frame(frame):
|
||||
... if isinstance(frame, TextFrame):
|
||||
... print(frame.text)
|
||||
... else:
|
||||
... print(frame.__class__.__name__)
|
||||
|
||||
>>> aggregator = GatedAggregator(
|
||||
... gate_close_fn=lambda x: isinstance(x, LLMResponseStartFrame),
|
||||
... gate_open_fn=lambda x: isinstance(x, ImageRawFrame),
|
||||
... start_open=False)
|
||||
>>> asyncio.run(print_frames(aggregator, TextFrame("Hello")))
|
||||
>>> asyncio.run(print_frames(aggregator, TextFrame("Hello again.")))
|
||||
>>> asyncio.run(print_frames(aggregator, ImageRawFrame(image=bytes([]), size=(0, 0))))
|
||||
ImageRawFrame
|
||||
Hello
|
||||
Hello again.
|
||||
>>> asyncio.run(print_frames(aggregator, TextFrame("Goodbye.")))
|
||||
Goodbye.
|
||||
until and not including the gate-closed frame. The aggregator maintains an
|
||||
internal gate state that controls whether frames are passed through immediately
|
||||
or accumulated for later release.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@@ -48,6 +35,14 @@ class GatedAggregator(FrameProcessor):
|
||||
start_open,
|
||||
direction: FrameDirection = FrameDirection.DOWNSTREAM,
|
||||
):
|
||||
"""Initialize the gated aggregator.
|
||||
|
||||
Args:
|
||||
gate_open_fn: Function that returns True when a frame should open the gate.
|
||||
gate_close_fn: Function that returns True when a frame should close the gate.
|
||||
start_open: Whether the gate should start in the open state.
|
||||
direction: The frame direction this aggregator operates on.
|
||||
"""
|
||||
super().__init__()
|
||||
self._gate_open_fn = gate_open_fn
|
||||
self._gate_close_fn = gate_close_fn
|
||||
@@ -56,6 +51,12 @@ class GatedAggregator(FrameProcessor):
|
||||
self._accumulator: List[Tuple[Frame, FrameDirection]] = []
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
"""Process incoming frames with gated accumulation logic.
|
||||
|
||||
Args:
|
||||
frame: The frame to process.
|
||||
direction: The direction of the frame flow.
|
||||
"""
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
# We must not block system frames.
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Gated OpenAI LLM context aggregator for controlled message flow."""
|
||||
|
||||
from pipecat.frames.frames import CancelFrame, EndFrame, Frame, StartFrame
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContextFrame
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
@@ -11,12 +13,21 @@ from pipecat.sync.base_notifier import BaseNotifier
|
||||
|
||||
|
||||
class GatedOpenAILLMContextAggregator(FrameProcessor):
|
||||
"""This aggregator keeps the last received OpenAI LLM context frame and it
|
||||
doesn't let it through until the notifier is notified.
|
||||
"""Aggregator that gates OpenAI LLM context frames until notified.
|
||||
|
||||
This aggregator captures OpenAI LLM context frames and holds them until
|
||||
a notifier signals that they can be released. This is useful for controlling
|
||||
the flow of context frames based on external conditions or timing.
|
||||
"""
|
||||
|
||||
def __init__(self, *, notifier: BaseNotifier, start_open: bool = False, **kwargs):
|
||||
"""Initialize the gated context aggregator.
|
||||
|
||||
Args:
|
||||
notifier: The notifier that controls when frames are released.
|
||||
start_open: If True, the first context frame passes through immediately.
|
||||
**kwargs: Additional arguments passed to the parent FrameProcessor.
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
self._notifier = notifier
|
||||
self._start_open = start_open
|
||||
@@ -24,6 +35,12 @@ class GatedOpenAILLMContextAggregator(FrameProcessor):
|
||||
self._gate_task = None
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
"""Process incoming frames, gating OpenAI LLM context frames.
|
||||
|
||||
Args:
|
||||
frame: The frame to process.
|
||||
direction: The direction of frame flow in the pipeline.
|
||||
"""
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, StartFrame):
|
||||
@@ -42,15 +59,18 @@ class GatedOpenAILLMContextAggregator(FrameProcessor):
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
async def _start(self):
|
||||
"""Start the gate task handler."""
|
||||
if not self._gate_task:
|
||||
self._gate_task = self.create_task(self._gate_task_handler())
|
||||
|
||||
async def _stop(self):
|
||||
"""Stop the gate task handler."""
|
||||
if self._gate_task:
|
||||
await self.cancel_task(self._gate_task)
|
||||
self._gate_task = None
|
||||
|
||||
async def _gate_task_handler(self):
|
||||
"""Handle the gating logic by waiting for notifications and releasing frames."""
|
||||
while True:
|
||||
await self._notifier.wait()
|
||||
if self._last_context_frame:
|
||||
|
||||
@@ -4,6 +4,13 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""LLM response aggregators for handling conversation context and message aggregation.
|
||||
|
||||
This module provides aggregators that process and accumulate LLM responses, user inputs,
|
||||
and conversation context. These aggregators handle the flow between speech-to-text,
|
||||
LLM processing, and text-to-speech components in conversational AI pipelines.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from abc import abstractmethod
|
||||
from dataclasses import dataclass
|
||||
@@ -54,30 +61,55 @@ from pipecat.utils.time import time_now_iso8601
|
||||
|
||||
@dataclass
|
||||
class LLMUserAggregatorParams:
|
||||
"""Parameters for configuring LLM user aggregation behavior.
|
||||
|
||||
Parameters:
|
||||
aggregation_timeout: Maximum time in seconds to wait for additional
|
||||
transcription content before pushing aggregated result. This
|
||||
timeout is used only when the transcription is slow to arrive.
|
||||
"""
|
||||
|
||||
aggregation_timeout: float = 0.5
|
||||
|
||||
|
||||
@dataclass
|
||||
class LLMAssistantAggregatorParams:
|
||||
"""Parameters for configuring LLM assistant aggregation behavior.
|
||||
|
||||
Parameters:
|
||||
expect_stripped_words: Whether to expect and handle stripped words
|
||||
in text frames by adding spaces between tokens.
|
||||
"""
|
||||
|
||||
expect_stripped_words: bool = True
|
||||
|
||||
|
||||
class LLMFullResponseAggregator(FrameProcessor):
|
||||
"""This is an LLM aggregator that aggregates a full LLM completion. It
|
||||
aggregates LLM text frames (tokens) received between
|
||||
`LLMFullResponseStartFrame` and `LLMFullResponseEndFrame`. Every full
|
||||
completion is returned via the "on_completion" event handler:
|
||||
"""Aggregates complete LLM responses between start and end frames.
|
||||
|
||||
@aggregator.event_handler("on_completion")
|
||||
async def on_completion(
|
||||
aggregator: LLMFullResponseAggregator,
|
||||
completion: str,
|
||||
completed: bool,
|
||||
)
|
||||
This aggregator collects LLM text frames (tokens) received between
|
||||
`LLMFullResponseStartFrame` and `LLMFullResponseEndFrame` and provides
|
||||
the complete response via an event handler.
|
||||
|
||||
The aggregator provides an "on_completion" event that fires when a full
|
||||
completion is available::
|
||||
|
||||
@aggregator.event_handler("on_completion")
|
||||
async def on_completion(
|
||||
aggregator: LLMFullResponseAggregator,
|
||||
completion: str,
|
||||
completed: bool,
|
||||
):
|
||||
# Handle the completion
|
||||
pass
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
"""Initialize the LLM full response aggregator.
|
||||
|
||||
Args:
|
||||
**kwargs: Additional arguments passed to parent FrameProcessor.
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
|
||||
self._aggregation = ""
|
||||
@@ -86,6 +118,12 @@ class LLMFullResponseAggregator(FrameProcessor):
|
||||
self._register_event_handler("on_completion")
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
"""Process incoming frames and aggregate LLM text content.
|
||||
|
||||
Args:
|
||||
frame: The frame to process.
|
||||
direction: The direction of frame flow in the pipeline.
|
||||
"""
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, StartInterruptionFrame):
|
||||
@@ -116,83 +154,123 @@ class LLMFullResponseAggregator(FrameProcessor):
|
||||
|
||||
|
||||
class BaseLLMResponseAggregator(FrameProcessor):
|
||||
"""This is the base class for all LLM response aggregators. These
|
||||
aggregators process incoming frames and aggregate content until they are
|
||||
ready to push the aggregation. In the case of a user, an aggregation might
|
||||
be a full transcription received from the STT service.
|
||||
"""Base class for all LLM response aggregators.
|
||||
|
||||
The LLM response aggregators also keep a store (e.g. a message list or an
|
||||
LLM context) of the current conversation, that is, it stores the messages
|
||||
said by the user or by the bot.
|
||||
These aggregators process incoming frames and aggregate content until they are
|
||||
ready to push the aggregation downstream. They maintain conversation state
|
||||
and handle message flow between different components in the pipeline.
|
||||
|
||||
The aggregators keep a store (e.g. message list or LLM context) of the current
|
||||
conversation, storing messages from both users and the bot.
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
"""Initialize the base LLM response aggregator.
|
||||
|
||||
Args:
|
||||
**kwargs: Additional arguments passed to parent FrameProcessor.
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def messages(self) -> List[dict]:
|
||||
"""Returns the messages from the current conversation."""
|
||||
"""Get the messages from the current conversation.
|
||||
|
||||
Returns:
|
||||
List of message dictionaries representing the conversation history.
|
||||
"""
|
||||
pass
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def role(self) -> str:
|
||||
"""Returns the role (e.g. user, assistant...) for this aggregator."""
|
||||
"""Get the role for this aggregator.
|
||||
|
||||
Returns:
|
||||
The role string (e.g. "user", "assistant") for this aggregator.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def add_messages(self, messages):
|
||||
"""Add the given messages to the conversation."""
|
||||
"""Add the given messages to the conversation.
|
||||
|
||||
Args:
|
||||
messages: Messages to append to the conversation history.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def set_messages(self, messages):
|
||||
"""Reset the conversation with the given messages."""
|
||||
"""Reset the conversation with the given messages.
|
||||
|
||||
Args:
|
||||
messages: Messages to replace the current conversation history.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def set_tools(self, tools):
|
||||
"""Set LLM tools to be used in the current conversation."""
|
||||
"""Set LLM tools to be used in the current conversation.
|
||||
|
||||
Args:
|
||||
tools: List of tool definitions for the LLM to use.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def set_tool_choice(self, tool_choice):
|
||||
"""Set the tool choice. This should modify the LLM context."""
|
||||
"""Set the tool choice for the LLM.
|
||||
|
||||
Args:
|
||||
tool_choice: Tool choice configuration for the LLM context.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def reset(self):
|
||||
"""Reset the internals of this aggregator. This should not modify the
|
||||
internal messages.
|
||||
"""Reset the internal state of this aggregator.
|
||||
|
||||
This should clear aggregation state but not modify the conversation messages.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def handle_aggregation(self, aggregation: str):
|
||||
"""Adds the given aggregation to the aggregator. The aggregator can use
|
||||
a simple list of message or a context. It doesn't not push any frames.
|
||||
"""Add the given aggregation to the conversation store.
|
||||
|
||||
Args:
|
||||
aggregation: The aggregated text content to add to the conversation.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def push_aggregation(self):
|
||||
"""Pushes the current aggregation. For example, iN the case of context
|
||||
aggregation this might push a new context frame.
|
||||
"""Push the current aggregation downstream.
|
||||
|
||||
The specific frame type pushed depends on the aggregator implementation
|
||||
(e.g. context frame, messages frame).
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class LLMContextResponseAggregator(BaseLLMResponseAggregator):
|
||||
"""This is a base LLM aggregator that uses an LLM context to store the
|
||||
conversation. It pushes `OpenAILLMContextFrame` as an aggregation frame.
|
||||
"""Base LLM aggregator that uses an OpenAI LLM context for conversation storage.
|
||||
|
||||
This aggregator maintains conversation state using an OpenAILLMContext and
|
||||
pushes OpenAILLMContextFrame objects as aggregation frames. It provides
|
||||
common functionality for context-based conversation management.
|
||||
"""
|
||||
|
||||
def __init__(self, *, context: OpenAILLMContext, role: str, **kwargs):
|
||||
"""Initialize the context response aggregator.
|
||||
|
||||
Args:
|
||||
context: The OpenAI LLM context to use for conversation storage.
|
||||
role: The role this aggregator represents (e.g. "user", "assistant").
|
||||
**kwargs: Additional arguments passed to parent class.
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
self._context = context
|
||||
self._role = role
|
||||
@@ -201,46 +279,99 @@ class LLMContextResponseAggregator(BaseLLMResponseAggregator):
|
||||
|
||||
@property
|
||||
def messages(self) -> List[dict]:
|
||||
"""Get messages from the LLM context.
|
||||
|
||||
Returns:
|
||||
List of message dictionaries from the context.
|
||||
"""
|
||||
return self._context.get_messages()
|
||||
|
||||
@property
|
||||
def role(self) -> str:
|
||||
"""Get the role for this aggregator.
|
||||
|
||||
Returns:
|
||||
The role string for this aggregator.
|
||||
"""
|
||||
return self._role
|
||||
|
||||
@property
|
||||
def context(self):
|
||||
"""Get the OpenAI LLM context.
|
||||
|
||||
Returns:
|
||||
The OpenAILLMContext instance used by this aggregator.
|
||||
"""
|
||||
return self._context
|
||||
|
||||
def get_context_frame(self) -> OpenAILLMContextFrame:
|
||||
"""Create a context frame with the current context.
|
||||
|
||||
Returns:
|
||||
OpenAILLMContextFrame containing the current context.
|
||||
"""
|
||||
return OpenAILLMContextFrame(context=self._context)
|
||||
|
||||
async def push_context_frame(self, direction: FrameDirection = FrameDirection.DOWNSTREAM):
|
||||
"""Push a context frame in the specified direction.
|
||||
|
||||
Args:
|
||||
direction: The direction to push the frame (upstream or downstream).
|
||||
"""
|
||||
frame = self.get_context_frame()
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
def add_messages(self, messages):
|
||||
"""Add messages to the context.
|
||||
|
||||
Args:
|
||||
messages: Messages to add to the conversation context.
|
||||
"""
|
||||
self._context.add_messages(messages)
|
||||
|
||||
def set_messages(self, messages):
|
||||
"""Set the context messages.
|
||||
|
||||
Args:
|
||||
messages: Messages to replace the current context messages.
|
||||
"""
|
||||
self._context.set_messages(messages)
|
||||
|
||||
def set_tools(self, tools: List):
|
||||
"""Set tools in the context.
|
||||
|
||||
Args:
|
||||
tools: List of tool definitions to set in the context.
|
||||
"""
|
||||
self._context.set_tools(tools)
|
||||
|
||||
def set_tool_choice(self, tool_choice: Literal["none", "auto", "required"] | dict):
|
||||
"""Set tool choice in the context.
|
||||
|
||||
Args:
|
||||
tool_choice: Tool choice configuration for the context.
|
||||
"""
|
||||
self._context.set_tool_choice(tool_choice)
|
||||
|
||||
async def reset(self):
|
||||
"""Reset the aggregation state."""
|
||||
self._aggregation = ""
|
||||
|
||||
|
||||
class LLMUserContextAggregator(LLMContextResponseAggregator):
|
||||
"""This is a user LLM aggregator that uses an LLM context to store the
|
||||
conversation. It aggregates transcriptions from the STT service and it has
|
||||
logic to handle multiple scenarios where transcriptions are received between
|
||||
VAD events (`UserStartedSpeakingFrame` and `UserStoppedSpeakingFrame`) or
|
||||
even outside or no VAD events at all.
|
||||
"""User LLM aggregator that processes speech-to-text transcriptions.
|
||||
|
||||
This aggregator handles the complex logic of aggregating user speech transcriptions
|
||||
from STT services. It manages multiple scenarios including:
|
||||
|
||||
- Transcriptions received between VAD events
|
||||
- Transcriptions received outside VAD events
|
||||
- Interim vs final transcriptions
|
||||
- User interruptions during bot speech
|
||||
- Emulated VAD for whispered or short utterances
|
||||
|
||||
The aggregator uses timeouts to handle cases where transcriptions arrive
|
||||
after VAD events or when no VAD is available.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@@ -250,6 +381,13 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
|
||||
params: Optional[LLMUserAggregatorParams] = None,
|
||||
**kwargs,
|
||||
):
|
||||
"""Initialize the user context aggregator.
|
||||
|
||||
Args:
|
||||
context: The OpenAI LLM context for conversation storage.
|
||||
params: Configuration parameters for aggregation behavior.
|
||||
**kwargs: Additional arguments. Supports deprecated 'aggregation_timeout'.
|
||||
"""
|
||||
super().__init__(context=context, role="user", **kwargs)
|
||||
self._params = params or LLMUserAggregatorParams()
|
||||
if "aggregation_timeout" in kwargs:
|
||||
@@ -275,6 +413,7 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
|
||||
self._aggregation_task = None
|
||||
|
||||
async def reset(self):
|
||||
"""Reset the aggregation state and interruption strategies."""
|
||||
await super().reset()
|
||||
self._was_bot_speaking = False
|
||||
self._seen_interim_results = False
|
||||
@@ -282,9 +421,20 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
|
||||
[await s.reset() for s in self._interruption_strategies]
|
||||
|
||||
async def handle_aggregation(self, aggregation: str):
|
||||
"""Add the aggregated user text to the context.
|
||||
|
||||
Args:
|
||||
aggregation: The aggregated user text to add as a user message.
|
||||
"""
|
||||
self._context.add_message({"role": self.role, "content": aggregation})
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
"""Process frames for user speech aggregation and context management.
|
||||
|
||||
Args:
|
||||
frame: The frame to process.
|
||||
direction: The direction of frame flow in the pipeline.
|
||||
"""
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, StartFrame):
|
||||
@@ -320,9 +470,9 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
|
||||
elif isinstance(frame, InterimTranscriptionFrame):
|
||||
await self._handle_interim_transcription(frame)
|
||||
elif isinstance(frame, LLMMessagesAppendFrame):
|
||||
self.add_messages(frame.messages)
|
||||
await self._handle_llm_messages_append(frame)
|
||||
elif isinstance(frame, LLMMessagesUpdateFrame):
|
||||
self.set_messages(frame.messages)
|
||||
await self._handle_llm_messages_update(frame)
|
||||
elif isinstance(frame, LLMSetToolsFrame):
|
||||
self.set_tools(frame.tools)
|
||||
elif isinstance(frame, LLMSetToolChoiceFrame):
|
||||
@@ -339,7 +489,7 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
|
||||
await self.push_frame(frame)
|
||||
|
||||
async def push_aggregation(self):
|
||||
"""Pushes the current aggregation based on interruption strategies and conditions."""
|
||||
"""Push the current aggregation based on interruption strategies and conditions."""
|
||||
if len(self._aggregation) > 0:
|
||||
if self.interruption_strategies and self._bot_speaking:
|
||||
should_interrupt = await self._should_interrupt_based_on_strategies()
|
||||
@@ -373,7 +523,11 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
|
||||
# await self.push_frame(OpenAILLMContextFrame(self._context))
|
||||
|
||||
async def _should_interrupt_based_on_strategies(self) -> bool:
|
||||
"""Check if interruption should occur based on configured strategies."""
|
||||
"""Check if interruption should occur based on configured strategies.
|
||||
|
||||
Returns:
|
||||
True if any interruption strategy indicates interruption should occur.
|
||||
"""
|
||||
|
||||
async def should_interrupt(strategy: BaseInterruptionStrategy):
|
||||
await strategy.append_text(self._aggregation)
|
||||
@@ -390,6 +544,16 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
|
||||
async def _cancel(self, frame: CancelFrame):
|
||||
await self._cancel_aggregation_task()
|
||||
|
||||
async def _handle_llm_messages_append(self, frame: LLMMessagesAppendFrame):
|
||||
self.add_messages(frame.messages)
|
||||
if frame.run_llm:
|
||||
await self.push_context_frame()
|
||||
|
||||
async def _handle_llm_messages_update(self, frame: LLMMessagesUpdateFrame):
|
||||
self.set_messages(frame.messages)
|
||||
if frame.run_llm:
|
||||
await self.push_context_frame()
|
||||
|
||||
async def _handle_input_audio(self, frame: InputAudioRawFrame):
|
||||
for s in self.interruption_strategies:
|
||||
await s.append_audio(frame.audio, frame.sample_rate)
|
||||
@@ -474,9 +638,10 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
|
||||
self._aggregation_event.clear()
|
||||
|
||||
async def _maybe_emulate_user_speaking(self):
|
||||
"""Emulate user speaking if we got a transcription but it was not
|
||||
detected by VAD. Only do that if the bot is not speaking.
|
||||
"""Maybe emulate user speaking based on transcription.
|
||||
|
||||
Emulate user speaking if we got a transcription but it was not
|
||||
detected by VAD. Only do that if the bot is not speaking.
|
||||
"""
|
||||
# Check if we received a transcription but VAD was not able to detect
|
||||
# voice (e.g. when you whisper a short utterance). In that case, we need
|
||||
@@ -497,10 +662,18 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
|
||||
|
||||
|
||||
class LLMAssistantContextAggregator(LLMContextResponseAggregator):
|
||||
"""This is an assistant LLM aggregator that uses an LLM context to store the
|
||||
conversation. It aggregates text frames received between
|
||||
`LLMFullResponseStartFrame` and `LLMFullResponseEndFrame`.
|
||||
"""Assistant LLM aggregator that processes bot responses and function calls.
|
||||
|
||||
This aggregator handles the complex logic of processing assistant responses including:
|
||||
|
||||
- Text frame aggregation between response start/end markers
|
||||
- Function call lifecycle management
|
||||
- Context updates with timestamps
|
||||
- Tool execution and result handling
|
||||
- Interruption handling during responses
|
||||
|
||||
The aggregator manages function calls in progress and coordinates between
|
||||
text generation and tool execution phases of LLM responses.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@@ -510,6 +683,13 @@ class LLMAssistantContextAggregator(LLMContextResponseAggregator):
|
||||
params: Optional[LLMAssistantAggregatorParams] = None,
|
||||
**kwargs,
|
||||
):
|
||||
"""Initialize the assistant context aggregator.
|
||||
|
||||
Args:
|
||||
context: The OpenAI LLM context for conversation storage.
|
||||
params: Configuration parameters for aggregation behavior.
|
||||
**kwargs: Additional arguments. Supports deprecated 'expect_stripped_words'.
|
||||
"""
|
||||
super().__init__(context=context, role="assistant", **kwargs)
|
||||
self._params = params or LLMAssistantAggregatorParams()
|
||||
|
||||
@@ -534,26 +714,57 @@ class LLMAssistantContextAggregator(LLMContextResponseAggregator):
|
||||
"""Check if there are any function calls currently in progress.
|
||||
|
||||
Returns:
|
||||
bool: True if function calls are in progress, False otherwise
|
||||
True if function calls are in progress, False otherwise.
|
||||
"""
|
||||
return bool(self._function_calls_in_progress)
|
||||
|
||||
async def handle_aggregation(self, aggregation: str):
|
||||
"""Add the aggregated assistant text to the context.
|
||||
|
||||
Args:
|
||||
aggregation: The aggregated assistant text to add as an assistant message.
|
||||
"""
|
||||
self._context.add_message({"role": "assistant", "content": aggregation})
|
||||
|
||||
async def handle_function_call_in_progress(self, frame: FunctionCallInProgressFrame):
|
||||
"""Handle a function call that is in progress.
|
||||
|
||||
Args:
|
||||
frame: The function call in progress frame to handle.
|
||||
"""
|
||||
pass
|
||||
|
||||
async def handle_function_call_result(self, frame: FunctionCallResultFrame):
|
||||
"""Handle the result of a completed function call.
|
||||
|
||||
Args:
|
||||
frame: The function call result frame to handle.
|
||||
"""
|
||||
pass
|
||||
|
||||
async def handle_function_call_cancel(self, frame: FunctionCallCancelFrame):
|
||||
"""Handle cancellation of a function call.
|
||||
|
||||
Args:
|
||||
frame: The function call cancel frame to handle.
|
||||
"""
|
||||
pass
|
||||
|
||||
async def handle_user_image_frame(self, frame: UserImageRawFrame):
|
||||
"""Handle a user image frame associated with a function call.
|
||||
|
||||
Args:
|
||||
frame: The user image frame to handle.
|
||||
"""
|
||||
pass
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
"""Process frames for assistant response aggregation and function call management.
|
||||
|
||||
Args:
|
||||
frame: The frame to process.
|
||||
direction: The direction of frame flow in the pipeline.
|
||||
"""
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, StartInterruptionFrame):
|
||||
@@ -566,9 +777,9 @@ class LLMAssistantContextAggregator(LLMContextResponseAggregator):
|
||||
elif isinstance(frame, TextFrame):
|
||||
await self._handle_text(frame)
|
||||
elif isinstance(frame, LLMMessagesAppendFrame):
|
||||
self.add_messages(frame.messages)
|
||||
await self._handle_llm_messages_append(frame)
|
||||
elif isinstance(frame, LLMMessagesUpdateFrame):
|
||||
self.set_messages(frame.messages)
|
||||
await self._handle_llm_messages_update(frame)
|
||||
elif isinstance(frame, LLMSetToolsFrame):
|
||||
self.set_tools(frame.tools)
|
||||
elif isinstance(frame, LLMSetToolChoiceFrame):
|
||||
@@ -590,6 +801,7 @@ class LLMAssistantContextAggregator(LLMContextResponseAggregator):
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
async def push_aggregation(self):
|
||||
"""Push the current assistant aggregation with timestamp."""
|
||||
if not self._aggregation:
|
||||
return
|
||||
|
||||
@@ -606,6 +818,16 @@ class LLMAssistantContextAggregator(LLMContextResponseAggregator):
|
||||
timestamp_frame = OpenAILLMContextAssistantTimestampFrame(timestamp=time_now_iso8601())
|
||||
await self.push_frame(timestamp_frame)
|
||||
|
||||
async def _handle_llm_messages_append(self, frame: LLMMessagesAppendFrame):
|
||||
self.add_messages(frame.messages)
|
||||
if frame.run_llm:
|
||||
await self.push_context_frame(FrameDirection.UPSTREAM)
|
||||
|
||||
async def _handle_llm_messages_update(self, frame: LLMMessagesUpdateFrame):
|
||||
self.set_messages(frame.messages)
|
||||
if frame.run_llm:
|
||||
await self.push_context_frame(FrameDirection.UPSTREAM)
|
||||
|
||||
async def _handle_interruptions(self, frame: StartInterruptionFrame):
|
||||
await self.push_aggregation()
|
||||
self._started = 0
|
||||
@@ -719,6 +941,13 @@ class LLMAssistantContextAggregator(LLMContextResponseAggregator):
|
||||
|
||||
|
||||
class LLMUserResponseAggregator(LLMUserContextAggregator):
|
||||
"""User response aggregator that outputs LLMMessagesFrame instead of context frames.
|
||||
|
||||
This aggregator extends LLMUserContextAggregator but pushes LLMMessagesFrame
|
||||
objects downstream instead of OpenAILLMContextFrame objects. This is useful
|
||||
when you need message-based output rather than context-based output.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
messages: Optional[List[dict]] = None,
|
||||
@@ -726,9 +955,17 @@ class LLMUserResponseAggregator(LLMUserContextAggregator):
|
||||
params: Optional[LLMUserAggregatorParams] = None,
|
||||
**kwargs,
|
||||
):
|
||||
"""Initialize the user response aggregator.
|
||||
|
||||
Args:
|
||||
messages: Initial messages for the conversation context.
|
||||
params: Configuration parameters for aggregation behavior.
|
||||
**kwargs: Additional arguments passed to parent class.
|
||||
"""
|
||||
super().__init__(context=OpenAILLMContext(messages), params=params, **kwargs)
|
||||
|
||||
async def push_aggregation(self):
|
||||
"""Push the aggregated user input as an LLMMessagesFrame."""
|
||||
if len(self._aggregation) > 0:
|
||||
await self.handle_aggregation(self._aggregation)
|
||||
|
||||
@@ -741,6 +978,13 @@ class LLMUserResponseAggregator(LLMUserContextAggregator):
|
||||
|
||||
|
||||
class LLMAssistantResponseAggregator(LLMAssistantContextAggregator):
|
||||
"""Assistant response aggregator that outputs LLMMessagesFrame instead of context frames.
|
||||
|
||||
This aggregator extends LLMAssistantContextAggregator but pushes LLMMessagesFrame
|
||||
objects downstream instead of OpenAILLMContextFrame objects. This is useful
|
||||
when you need message-based output rather than context-based output.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
messages: Optional[List[dict]] = None,
|
||||
@@ -748,9 +992,17 @@ class LLMAssistantResponseAggregator(LLMAssistantContextAggregator):
|
||||
params: Optional[LLMAssistantAggregatorParams] = None,
|
||||
**kwargs,
|
||||
):
|
||||
"""Initialize the assistant response aggregator.
|
||||
|
||||
Args:
|
||||
messages: Initial messages for the conversation context.
|
||||
params: Configuration parameters for aggregation behavior.
|
||||
**kwargs: Additional arguments passed to parent class.
|
||||
"""
|
||||
super().__init__(context=OpenAILLMContext(messages), params=params, **kwargs)
|
||||
|
||||
async def push_aggregation(self):
|
||||
"""Push the aggregated assistant response as an LLMMessagesFrame."""
|
||||
if len(self._aggregation) > 0:
|
||||
await self.handle_aggregation(self._aggregation)
|
||||
|
||||
|
||||
@@ -4,6 +4,12 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""OpenAI LLM context management for Pipecat.
|
||||
|
||||
This module provides classes for managing OpenAI-specific conversation contexts,
|
||||
including message handling, tool management, and image/audio processing capabilities.
|
||||
"""
|
||||
|
||||
import base64
|
||||
import copy
|
||||
import io
|
||||
@@ -29,7 +35,21 @@ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
|
||||
|
||||
class CustomEncoder(json.JSONEncoder):
|
||||
"""Custom JSON encoder for handling special data types in logging.
|
||||
|
||||
Provides specialized encoding for io.BytesIO objects to display
|
||||
readable representations in log output instead of raw binary data.
|
||||
"""
|
||||
|
||||
def default(self, obj):
|
||||
"""Encode special objects for JSON serialization.
|
||||
|
||||
Args:
|
||||
obj: The object to encode.
|
||||
|
||||
Returns:
|
||||
Encoded representation of the object.
|
||||
"""
|
||||
if isinstance(obj, io.BytesIO):
|
||||
# Convert the first 8 bytes to an ASCII hex string
|
||||
return f"{obj.getbuffer()[0:8].hex()}..."
|
||||
@@ -37,25 +57,57 @@ class CustomEncoder(json.JSONEncoder):
|
||||
|
||||
|
||||
class OpenAILLMContext:
|
||||
"""Manages conversation context for OpenAI LLM interactions.
|
||||
|
||||
Handles message history, tool definitions, tool choices, and multimedia content
|
||||
for OpenAI API conversations. Provides methods for message manipulation,
|
||||
content formatting, and integration with various LLM adapters.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
messages: Optional[List[ChatCompletionMessageParam]] = None,
|
||||
tools: List[ChatCompletionToolParam] | NotGiven | ToolsSchema = NOT_GIVEN,
|
||||
tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
|
||||
):
|
||||
"""Initialize the OpenAI LLM context.
|
||||
|
||||
Args:
|
||||
messages: Initial list of conversation messages.
|
||||
tools: Available tools for the LLM to use.
|
||||
tool_choice: Tool selection strategy for the LLM.
|
||||
"""
|
||||
self._messages: List[ChatCompletionMessageParam] = messages if messages else []
|
||||
self._tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = tool_choice
|
||||
self._tools: List[ChatCompletionToolParam] | NotGiven | ToolsSchema = tools
|
||||
self._llm_adapter: Optional[BaseLLMAdapter] = None
|
||||
|
||||
def get_llm_adapter(self) -> Optional[BaseLLMAdapter]:
|
||||
"""Get the current LLM adapter.
|
||||
|
||||
Returns:
|
||||
The currently set LLM adapter, or None if not set.
|
||||
"""
|
||||
return self._llm_adapter
|
||||
|
||||
def set_llm_adapter(self, llm_adapter: BaseLLMAdapter):
|
||||
"""Set the LLM adapter for context processing.
|
||||
|
||||
Args:
|
||||
llm_adapter: The LLM adapter to use for tool conversion.
|
||||
"""
|
||||
self._llm_adapter = llm_adapter
|
||||
|
||||
@staticmethod
|
||||
def from_messages(messages: List[dict]) -> "OpenAILLMContext":
|
||||
"""Create a context from a list of message dictionaries.
|
||||
|
||||
Args:
|
||||
messages: List of message dictionaries to convert to context.
|
||||
|
||||
Returns:
|
||||
New OpenAILLMContext instance with the provided messages.
|
||||
"""
|
||||
context = OpenAILLMContext()
|
||||
|
||||
for message in messages:
|
||||
@@ -66,34 +118,81 @@ class OpenAILLMContext:
|
||||
|
||||
@property
|
||||
def messages(self) -> List[ChatCompletionMessageParam]:
|
||||
"""Get the current messages list.
|
||||
|
||||
Returns:
|
||||
List of conversation messages.
|
||||
"""
|
||||
return self._messages
|
||||
|
||||
@property
|
||||
def tools(self) -> List[ChatCompletionToolParam] | NotGiven | List[Any]:
|
||||
"""Get the tools list, converting through adapter if available.
|
||||
|
||||
Returns:
|
||||
Tools list, potentially converted by the LLM adapter.
|
||||
"""
|
||||
if self._llm_adapter:
|
||||
return self._llm_adapter.from_standard_tools(self._tools)
|
||||
return self._tools
|
||||
|
||||
@property
|
||||
def tool_choice(self) -> ChatCompletionToolChoiceOptionParam | NotGiven:
|
||||
"""Get the current tool choice setting.
|
||||
|
||||
Returns:
|
||||
The tool choice configuration.
|
||||
"""
|
||||
return self._tool_choice
|
||||
|
||||
def add_message(self, message: ChatCompletionMessageParam):
|
||||
"""Add a single message to the context.
|
||||
|
||||
Args:
|
||||
message: The message to add to the conversation history.
|
||||
"""
|
||||
self._messages.append(message)
|
||||
|
||||
def add_messages(self, messages: List[ChatCompletionMessageParam]):
|
||||
"""Add multiple messages to the context.
|
||||
|
||||
Args:
|
||||
messages: List of messages to add to the conversation history.
|
||||
"""
|
||||
self._messages.extend(messages)
|
||||
|
||||
def set_messages(self, messages: List[ChatCompletionMessageParam]):
|
||||
"""Replace all messages in the context.
|
||||
|
||||
Args:
|
||||
messages: New list of messages to replace the current history.
|
||||
"""
|
||||
self._messages[:] = messages
|
||||
|
||||
def get_messages(self) -> List[ChatCompletionMessageParam]:
|
||||
"""Get a copy of the current messages list.
|
||||
|
||||
Returns:
|
||||
List of all messages in the conversation history.
|
||||
"""
|
||||
return self._messages
|
||||
|
||||
def get_messages_json(self) -> str:
|
||||
"""Get messages as a formatted JSON string.
|
||||
|
||||
Returns:
|
||||
JSON string representation of all messages with custom encoding.
|
||||
"""
|
||||
return json.dumps(self._messages, cls=CustomEncoder, ensure_ascii=False, indent=2)
|
||||
|
||||
def get_messages_for_logging(self) -> str:
|
||||
"""Get sanitized messages suitable for logging.
|
||||
|
||||
Removes or truncates sensitive data like image content for safe logging.
|
||||
|
||||
Returns:
|
||||
JSON string with sanitized message content for logging.
|
||||
"""
|
||||
msgs = []
|
||||
for message in self.messages:
|
||||
msg = copy.deepcopy(message)
|
||||
@@ -111,17 +210,18 @@ class OpenAILLMContext:
|
||||
def from_standard_message(self, message):
|
||||
"""Convert from OpenAI message format to OpenAI message format (passthrough).
|
||||
|
||||
OpenAI's format allows both simple string content and structured content:
|
||||
- Simple: {"role": "user", "content": "Hello"}
|
||||
- Structured: {"role": "user", "content": [{"type": "text", "text": "Hello"}]}
|
||||
OpenAI's format allows both simple string content and structured content::
|
||||
|
||||
Simple: {"role": "user", "content": "Hello"}
|
||||
Structured: {"role": "user", "content": [{"type": "text", "text": "Hello"}]}
|
||||
|
||||
Since OpenAI is our standard format, this is a passthrough function.
|
||||
|
||||
Args:
|
||||
message (dict): Message in OpenAI format
|
||||
message: Message in OpenAI format.
|
||||
|
||||
Returns:
|
||||
dict: Same message, unchanged
|
||||
Same message, unchanged.
|
||||
"""
|
||||
return message
|
||||
|
||||
@@ -133,20 +233,28 @@ class OpenAILLMContext:
|
||||
other LLM services that may need to return multiple messages.
|
||||
|
||||
Args:
|
||||
obj (dict): Message in OpenAI format with either:
|
||||
- Simple content: {"role": "user", "content": "Hello"}
|
||||
- List content: {"role": "user", "content": [{"type": "text", "text": "Hello"}]}
|
||||
obj: Message in OpenAI format with either simple string content
|
||||
or structured list content.
|
||||
|
||||
Returns:
|
||||
list: List containing the original messages, preserving whether
|
||||
the content was in simple string or structured list format
|
||||
List containing the original messages, preserving the content format.
|
||||
"""
|
||||
return [obj]
|
||||
|
||||
def get_messages_for_initializing_history(self):
|
||||
"""Get messages for initializing conversation history.
|
||||
|
||||
Returns:
|
||||
List of messages suitable for history initialization.
|
||||
"""
|
||||
return self._messages
|
||||
|
||||
def get_messages_for_persistent_storage(self):
|
||||
"""Get messages formatted for persistent storage.
|
||||
|
||||
Returns:
|
||||
List of messages converted to standard format for storage.
|
||||
"""
|
||||
messages = []
|
||||
for m in self._messages:
|
||||
standard_messages = self.to_standard_messages(m)
|
||||
@@ -154,9 +262,19 @@ class OpenAILLMContext:
|
||||
return messages
|
||||
|
||||
def set_tool_choice(self, tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven):
|
||||
"""Set the tool choice configuration.
|
||||
|
||||
Args:
|
||||
tool_choice: Tool selection strategy for the LLM.
|
||||
"""
|
||||
self._tool_choice = tool_choice
|
||||
|
||||
def set_tools(self, tools: List[ChatCompletionToolParam] | NotGiven | ToolsSchema = NOT_GIVEN):
|
||||
"""Set the available tools for the LLM.
|
||||
|
||||
Args:
|
||||
tools: List of tools available to the LLM, or NOT_GIVEN to disable tools.
|
||||
"""
|
||||
if tools != NOT_GIVEN and isinstance(tools, list) and len(tools) == 0:
|
||||
tools = NOT_GIVEN
|
||||
self._tools = tools
|
||||
@@ -164,6 +282,14 @@ class OpenAILLMContext:
|
||||
def add_image_frame_message(
|
||||
self, *, format: str, size: tuple[int, int], image: bytes, text: str = None
|
||||
):
|
||||
"""Add a message containing an image frame.
|
||||
|
||||
Args:
|
||||
format: Image format (e.g., 'RGB', 'RGBA').
|
||||
size: Image dimensions as (width, height) tuple.
|
||||
image: Raw image bytes.
|
||||
text: Optional text to include with the image.
|
||||
"""
|
||||
buffer = io.BytesIO()
|
||||
Image.frombytes(format, size, image).save(buffer, format="JPEG")
|
||||
encoded_image = base64.b64encode(buffer.getvalue()).decode("utf-8")
|
||||
@@ -177,10 +303,30 @@ class OpenAILLMContext:
|
||||
self.add_message({"role": "user", "content": content})
|
||||
|
||||
def add_audio_frames_message(self, *, audio_frames: list[AudioRawFrame], text: str = None):
|
||||
"""Add a message containing audio frames.
|
||||
|
||||
Args:
|
||||
audio_frames: List of audio frame objects to include.
|
||||
text: Optional text to include with the audio.
|
||||
|
||||
Note:
|
||||
This method is currently a placeholder for future implementation.
|
||||
"""
|
||||
# todo: implement for OpenAI models and others
|
||||
pass
|
||||
|
||||
def create_wav_header(self, sample_rate, num_channels, bits_per_sample, data_size):
|
||||
"""Create a WAV file header for audio data.
|
||||
|
||||
Args:
|
||||
sample_rate: Audio sample rate in Hz.
|
||||
num_channels: Number of audio channels.
|
||||
bits_per_sample: Bits per audio sample.
|
||||
data_size: Size of audio data in bytes.
|
||||
|
||||
Returns:
|
||||
WAV header as a bytearray.
|
||||
"""
|
||||
# RIFF chunk descriptor
|
||||
header = bytearray()
|
||||
header.extend(b"RIFF") # ChunkID
|
||||
@@ -206,10 +352,14 @@ class OpenAILLMContext:
|
||||
|
||||
@dataclass
|
||||
class OpenAILLMContextFrame(Frame):
|
||||
"""Like an LLMMessagesFrame, but with extra context specific to the OpenAI
|
||||
"""Frame containing OpenAI-specific LLM context.
|
||||
|
||||
Like an LLMMessagesFrame, but with extra context specific to the OpenAI
|
||||
API. The context in this message is also mutable, and will be changed by the
|
||||
OpenAIContextAggregator frame processor.
|
||||
|
||||
Parameters:
|
||||
context: The OpenAI LLM context containing messages, tools, and configuration.
|
||||
"""
|
||||
|
||||
context: OpenAILLMContext
|
||||
|
||||
@@ -4,35 +4,46 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Text sentence aggregation processor for Pipecat.
|
||||
|
||||
This module provides a frame processor that accumulates text frames into
|
||||
complete sentences, only outputting when a sentence-ending pattern is detected.
|
||||
"""
|
||||
|
||||
from pipecat.frames.frames import EndFrame, Frame, InterimTranscriptionFrame, TextFrame
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.utils.string import match_endofsentence
|
||||
|
||||
|
||||
class SentenceAggregator(FrameProcessor):
|
||||
"""This frame processor aggregates text frames into complete sentences.
|
||||
"""Aggregates text frames into complete sentences.
|
||||
|
||||
This processor accumulates incoming text frames until a sentence-ending
|
||||
pattern is detected, then outputs the complete sentence as a single frame.
|
||||
Useful for ensuring downstream processors receive coherent, complete sentences
|
||||
rather than fragmented text.
|
||||
|
||||
Frame input/output::
|
||||
|
||||
Frame input/output:
|
||||
TextFrame("Hello,") -> None
|
||||
TextFrame(" world.") -> TextFrame("Hello world.")
|
||||
|
||||
Doctest: FIXME to work with asyncio
|
||||
>>> import asyncio
|
||||
>>> async def print_frames(aggregator, frame):
|
||||
... async for frame in aggregator.process_frame(frame):
|
||||
... print(frame.text)
|
||||
|
||||
>>> aggregator = SentenceAggregator()
|
||||
>>> asyncio.run(print_frames(aggregator, TextFrame("Hello,")))
|
||||
>>> asyncio.run(print_frames(aggregator, TextFrame(" world.")))
|
||||
Hello, world.
|
||||
TextFrame(" world.") -> TextFrame("Hello, world.")
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the sentence aggregator.
|
||||
|
||||
Sets up internal state for accumulating text frames into complete sentences.
|
||||
"""
|
||||
super().__init__()
|
||||
self._aggregation = ""
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
"""Process incoming frames and aggregate text into complete sentences.
|
||||
|
||||
Args:
|
||||
frame: The incoming frame to process.
|
||||
direction: The direction of frame flow in the pipeline.
|
||||
"""
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
# We ignore interim description at this point.
|
||||
|
||||
@@ -4,15 +4,39 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""User response aggregation for text frames.
|
||||
|
||||
This module provides an aggregator that collects user responses and outputs
|
||||
them as TextFrame objects, useful for capturing and processing user input
|
||||
in conversational pipelines.
|
||||
"""
|
||||
|
||||
from pipecat.frames.frames import TextFrame
|
||||
from pipecat.processors.aggregators.llm_response import LLMUserResponseAggregator
|
||||
|
||||
|
||||
class UserResponseAggregator(LLMUserResponseAggregator):
|
||||
"""Aggregates user responses into TextFrame objects.
|
||||
|
||||
This aggregator extends LLMUserResponseAggregator to specifically handle
|
||||
user input by collecting text responses and outputting them as TextFrame
|
||||
objects when the aggregation is complete.
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
"""Initialize the user response aggregator.
|
||||
|
||||
Args:
|
||||
**kwargs: Additional arguments passed to parent LLMUserResponseAggregator.
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
|
||||
async def push_aggregation(self):
|
||||
"""Push the aggregated user response as a TextFrame.
|
||||
|
||||
Creates a TextFrame from the current aggregation if it contains content,
|
||||
resets the aggregation state, and pushes the frame downstream.
|
||||
"""
|
||||
if len(self._aggregation) > 0:
|
||||
frame = TextFrame(self._aggregation.strip())
|
||||
|
||||
|
||||
@@ -4,33 +4,43 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Vision image frame aggregation for Pipecat.
|
||||
|
||||
This module provides frame aggregation functionality to combine text and image
|
||||
frames into vision frames for multimodal processing.
|
||||
"""
|
||||
|
||||
from pipecat.frames.frames import Frame, InputImageRawFrame, TextFrame, VisionImageRawFrame
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
|
||||
|
||||
class VisionImageFrameAggregator(FrameProcessor):
|
||||
"""This aggregator waits for a consecutive TextFrame and an
|
||||
InputImageRawFrame. After the InputImageRawFrame arrives it will output a
|
||||
VisionImageRawFrame.
|
||||
|
||||
>>> from pipecat.frames.frames import ImageFrame
|
||||
|
||||
>>> async def print_frames(aggregator, frame):
|
||||
... async for frame in aggregator.process_frame(frame):
|
||||
... print(frame)
|
||||
|
||||
>>> aggregator = VisionImageFrameAggregator()
|
||||
>>> asyncio.run(print_frames(aggregator, TextFrame("What do you see?")))
|
||||
>>> asyncio.run(print_frames(aggregator, ImageFrame(image=bytes([]), size=(0, 0))))
|
||||
VisionImageFrame, text: What do you see?, image size: 0x0, buffer size: 0 B
|
||||
"""Aggregates consecutive text and image frames into vision frames.
|
||||
|
||||
This aggregator waits for a consecutive TextFrame and an InputImageRawFrame.
|
||||
After the InputImageRawFrame arrives it will output a VisionImageRawFrame
|
||||
combining both the text and image data for multimodal processing.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the vision image frame aggregator.
|
||||
|
||||
The aggregator starts with no cached text, waiting for the first
|
||||
TextFrame to arrive before it can create vision frames.
|
||||
"""
|
||||
super().__init__()
|
||||
self._describe_text = None
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
"""Process incoming frames and aggregate text with images.
|
||||
|
||||
Caches TextFrames and combines them with subsequent InputImageRawFrames
|
||||
to create VisionImageRawFrames. Other frames are passed through unchanged.
|
||||
|
||||
Args:
|
||||
frame: The incoming frame to process.
|
||||
direction: The direction of frame flow in the pipeline.
|
||||
"""
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, TextFrame):
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Async generator processor for frame serialization and streaming."""
|
||||
|
||||
import asyncio
|
||||
from typing import Any, AsyncGenerator
|
||||
|
||||
@@ -17,12 +19,32 @@ from pipecat.serializers.base_serializer import FrameSerializer
|
||||
|
||||
|
||||
class AsyncGeneratorProcessor(FrameProcessor):
|
||||
"""A frame processor that serializes frames and provides them via async generator.
|
||||
|
||||
This processor passes frames through unchanged while simultaneously serializing
|
||||
them and making the serialized data available through an async generator interface.
|
||||
Useful for streaming frame data to external consumers while maintaining the
|
||||
normal frame processing pipeline.
|
||||
"""
|
||||
|
||||
def __init__(self, *, serializer: FrameSerializer, **kwargs):
|
||||
"""Initialize the async generator processor.
|
||||
|
||||
Args:
|
||||
serializer: The frame serializer to use for converting frames to data.
|
||||
**kwargs: Additional arguments passed to the parent FrameProcessor.
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
self._serializer = serializer
|
||||
self._data_queue = asyncio.Queue()
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
"""Process frames by passing them through and queuing serialized data.
|
||||
|
||||
Args:
|
||||
frame: The frame to process.
|
||||
direction: The direction of frame flow in the pipeline.
|
||||
"""
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
await self.push_frame(frame, direction)
|
||||
@@ -35,6 +57,12 @@ class AsyncGeneratorProcessor(FrameProcessor):
|
||||
await self._data_queue.put(data)
|
||||
|
||||
async def generator(self) -> AsyncGenerator[Any, None]:
|
||||
"""Generate serialized frame data asynchronously.
|
||||
|
||||
Yields:
|
||||
Serialized frame data from the internal queue until a termination
|
||||
signal (None) is received.
|
||||
"""
|
||||
running = True
|
||||
while running:
|
||||
data = await self._data_queue.get()
|
||||
|
||||
@@ -4,12 +4,18 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Audio buffer processor for managing and synchronizing audio streams.
|
||||
|
||||
This module provides an AudioBufferProcessor that handles buffering and synchronization
|
||||
of audio from both user input and bot output sources, with support for various audio
|
||||
configurations and event-driven processing.
|
||||
"""
|
||||
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
from pipecat.audio.utils import create_default_resampler, interleave_stereo_audio, mix_audio
|
||||
from pipecat.audio.utils import create_stream_resampler, interleave_stereo_audio, mix_audio
|
||||
from pipecat.frames.frames import (
|
||||
AudioRawFrame,
|
||||
BotStartedSpeakingFrame,
|
||||
BotStoppedSpeakingFrame,
|
||||
CancelFrame,
|
||||
@@ -32,23 +38,19 @@ class AudioBufferProcessor(FrameProcessor):
|
||||
including sample rate conversion and mono/stereo output.
|
||||
|
||||
Events:
|
||||
on_audio_data: Triggered when buffer_size is reached, providing merged audio
|
||||
on_track_audio_data: Triggered when buffer_size is reached, providing separate tracks
|
||||
on_user_turn_audio_data: Triggered when user turn has ended, providing that user turn's audio
|
||||
on_bot_turn_audio_data: Triggered when bot turn has ended, providing that bot turn's audio
|
||||
|
||||
Args:
|
||||
sample_rate (Optional[int]): Desired output sample rate. If None, uses source rate
|
||||
num_channels (int): Number of channels (1 for mono, 2 for stereo). Defaults to 1
|
||||
buffer_size (int): Size of buffer before triggering events. 0 for no buffering
|
||||
enable_turn_audio (bool): Whether turn audio event handlers should be triggered
|
||||
- on_audio_data: Triggered when buffer_size is reached, providing merged audio
|
||||
- on_track_audio_data: Triggered when buffer_size is reached, providing separate tracks
|
||||
- on_user_turn_audio_data: Triggered when user turn has ended, providing that user turn's audio
|
||||
- on_bot_turn_audio_data: Triggered when bot turn has ended, providing that bot turn's audio
|
||||
|
||||
Audio handling:
|
||||
- Mono output (num_channels=1): User and bot audio are mixed
|
||||
- Stereo output (num_channels=2): User audio on left, bot audio on right
|
||||
- Automatic resampling of incoming audio to match desired sample_rate
|
||||
- Silence insertion for non-continuous audio streams
|
||||
- Buffer synchronization between user and bot audio
|
||||
|
||||
- Mono output (num_channels=1): User and bot audio are mixed
|
||||
- Stereo output (num_channels=2): User audio on left, bot audio on right
|
||||
- Automatic resampling of incoming audio to match desired sample_rate
|
||||
- Silence insertion for non-continuous audio streams
|
||||
- Buffer synchronization between user and bot audio
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@@ -61,6 +63,21 @@ class AudioBufferProcessor(FrameProcessor):
|
||||
enable_turn_audio: bool = False,
|
||||
**kwargs,
|
||||
):
|
||||
"""Initialize the audio buffer processor.
|
||||
|
||||
Args:
|
||||
sample_rate: Desired output sample rate. If None, uses source rate.
|
||||
num_channels: Number of channels (1 for mono, 2 for stereo). Defaults to 1.
|
||||
buffer_size: Size of buffer before triggering events. 0 for no buffering.
|
||||
user_continuous_stream: Controls whether user audio is treated as a continuous
|
||||
stream for buffering purposes.
|
||||
|
||||
.. deprecated:: 0.0.72
|
||||
This parameter no longer has any effect and will be removed in a future version.
|
||||
|
||||
enable_turn_audio: Whether turn audio event handlers should be triggered.
|
||||
**kwargs: Additional arguments passed to parent class.
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
self._init_sample_rate = sample_rate
|
||||
self._sample_rate = 0
|
||||
@@ -93,7 +110,8 @@ class AudioBufferProcessor(FrameProcessor):
|
||||
|
||||
self._recording = False
|
||||
|
||||
self._resampler = create_default_resampler()
|
||||
self._input_resampler = create_stream_resampler()
|
||||
self._output_resampler = create_stream_resampler()
|
||||
|
||||
self._register_event_handler("on_audio_data")
|
||||
self._register_event_handler("on_track_audio_data")
|
||||
@@ -105,7 +123,7 @@ class AudioBufferProcessor(FrameProcessor):
|
||||
"""Current sample rate of the audio processor.
|
||||
|
||||
Returns:
|
||||
int: The sample rate in Hz
|
||||
The sample rate in Hz.
|
||||
"""
|
||||
return self._sample_rate
|
||||
|
||||
@@ -114,7 +132,7 @@ class AudioBufferProcessor(FrameProcessor):
|
||||
"""Number of channels in the audio output.
|
||||
|
||||
Returns:
|
||||
int: Number of channels (1 for mono, 2 for stereo)
|
||||
Number of channels (1 for mono, 2 for stereo).
|
||||
"""
|
||||
return self._num_channels
|
||||
|
||||
@@ -122,7 +140,7 @@ class AudioBufferProcessor(FrameProcessor):
|
||||
"""Check if both user and bot audio buffers contain data.
|
||||
|
||||
Returns:
|
||||
bool: True if both buffers contain audio data
|
||||
True if both buffers contain audio data.
|
||||
"""
|
||||
return self._buffer_has_audio(self._user_audio_buffer) and self._buffer_has_audio(
|
||||
self._bot_audio_buffer
|
||||
@@ -135,7 +153,7 @@ class AudioBufferProcessor(FrameProcessor):
|
||||
on the left channel and bot audio on the right channel.
|
||||
|
||||
Returns:
|
||||
bytes: Mixed audio data
|
||||
Mixed audio data as bytes.
|
||||
"""
|
||||
if self._num_channels == 1:
|
||||
return mix_audio(bytes(self._user_audio_buffer), bytes(self._bot_audio_buffer))
|
||||
@@ -163,7 +181,12 @@ class AudioBufferProcessor(FrameProcessor):
|
||||
self._recording = False
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
"""Process incoming audio frames and manage audio buffers."""
|
||||
"""Process incoming audio frames and manage audio buffers.
|
||||
|
||||
Args:
|
||||
frame: The frame to process.
|
||||
direction: The direction of frame flow in the pipeline.
|
||||
"""
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
# Update output sample rate if necessary.
|
||||
@@ -181,16 +204,18 @@ class AudioBufferProcessor(FrameProcessor):
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
def _update_sample_rate(self, frame: StartFrame):
|
||||
"""Update the sample rate from the start frame."""
|
||||
self._sample_rate = self._init_sample_rate or frame.audio_out_sample_rate
|
||||
self._audio_buffer_size_1s = self._sample_rate * 2
|
||||
|
||||
async def _process_recording(self, frame: Frame):
|
||||
"""Process audio frames for recording."""
|
||||
if isinstance(frame, InputAudioRawFrame):
|
||||
# Add silence if we need to.
|
||||
silence = self._compute_silence(self._last_user_frame_at)
|
||||
self._user_audio_buffer.extend(silence)
|
||||
# Add user audio.
|
||||
resampled = await self._resample_audio(frame)
|
||||
resampled = await self._resample_input_audio(frame)
|
||||
self._user_audio_buffer.extend(resampled)
|
||||
# Save time of frame so we can compute silence.
|
||||
self._last_user_frame_at = time.time()
|
||||
@@ -199,7 +224,7 @@ class AudioBufferProcessor(FrameProcessor):
|
||||
silence = self._compute_silence(self._last_bot_frame_at)
|
||||
self._bot_audio_buffer.extend(silence)
|
||||
# Add bot audio.
|
||||
resampled = await self._resample_audio(frame)
|
||||
resampled = await self._resample_output_audio(frame)
|
||||
self._bot_audio_buffer.extend(resampled)
|
||||
# Save time of frame so we can compute silence.
|
||||
self._last_bot_frame_at = time.time()
|
||||
@@ -208,6 +233,7 @@ class AudioBufferProcessor(FrameProcessor):
|
||||
await self._call_on_audio_data_handler()
|
||||
|
||||
async def _process_turn_recording(self, frame: Frame):
|
||||
"""Process frames for turn-based audio recording."""
|
||||
if isinstance(frame, UserStartedSpeakingFrame):
|
||||
self._user_speaking = True
|
||||
elif isinstance(frame, UserStoppedSpeakingFrame):
|
||||
@@ -226,7 +252,7 @@ class AudioBufferProcessor(FrameProcessor):
|
||||
self._bot_turn_audio_buffer = bytearray()
|
||||
|
||||
if isinstance(frame, InputAudioRawFrame):
|
||||
resampled = await self._resample_audio(frame)
|
||||
resampled = await self._resample_input_audio(frame)
|
||||
self._user_turn_audio_buffer += resampled
|
||||
# In the case of the user, we need to keep a short buffer of audio
|
||||
# since VAD notification of when the user starts speaking comes
|
||||
@@ -238,10 +264,11 @@ class AudioBufferProcessor(FrameProcessor):
|
||||
discarded = len(self._user_turn_audio_buffer) - self._audio_buffer_size_1s
|
||||
self._user_turn_audio_buffer = self._user_turn_audio_buffer[discarded:]
|
||||
elif self._bot_speaking and isinstance(frame, OutputAudioRawFrame):
|
||||
resampled = await self._resample_audio(frame)
|
||||
resampled = await self._resample_output_audio(frame)
|
||||
self._bot_turn_audio_buffer += resampled
|
||||
|
||||
async def _call_on_audio_data_handler(self):
|
||||
"""Call the audio data event handlers with buffered audio."""
|
||||
if not self.has_audio() or not self._recording:
|
||||
return
|
||||
|
||||
@@ -263,23 +290,36 @@ class AudioBufferProcessor(FrameProcessor):
|
||||
self._reset_audio_buffers()
|
||||
|
||||
def _buffer_has_audio(self, buffer: bytearray) -> bool:
|
||||
"""Check if a buffer contains audio data."""
|
||||
return buffer is not None and len(buffer) > 0
|
||||
|
||||
def _reset_recording(self):
|
||||
"""Reset recording state and buffers."""
|
||||
self._reset_audio_buffers()
|
||||
self._last_user_frame_at = time.time()
|
||||
self._last_bot_frame_at = time.time()
|
||||
|
||||
def _reset_audio_buffers(self):
|
||||
"""Reset all audio buffers to empty state."""
|
||||
self._user_audio_buffer = bytearray()
|
||||
self._bot_audio_buffer = bytearray()
|
||||
self._user_turn_audio_buffer = bytearray()
|
||||
self._bot_turn_audio_buffer = bytearray()
|
||||
|
||||
async def _resample_audio(self, frame: AudioRawFrame) -> bytes:
|
||||
return await self._resampler.resample(frame.audio, frame.sample_rate, self._sample_rate)
|
||||
async def _resample_input_audio(self, frame: InputAudioRawFrame) -> bytes:
|
||||
"""Resample audio frame to the target sample rate."""
|
||||
return await self._input_resampler.resample(
|
||||
frame.audio, frame.sample_rate, self._sample_rate
|
||||
)
|
||||
|
||||
async def _resample_output_audio(self, frame: OutputAudioRawFrame) -> bytes:
|
||||
"""Resample audio frame to the target sample rate."""
|
||||
return await self._output_resampler.resample(
|
||||
frame.audio, frame.sample_rate, self._sample_rate
|
||||
)
|
||||
|
||||
def _compute_silence(self, from_time: float) -> bytes:
|
||||
"""Compute silence to insert based on time gap."""
|
||||
quiet_time = time.time() - from_time
|
||||
# We should get audio frames very frequently. We introduce silence only
|
||||
# if there's a big enough gap of 1s.
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Consumer processor for consuming frames from ProducerProcessor queues."""
|
||||
|
||||
import asyncio
|
||||
from typing import Awaitable, Callable, Optional
|
||||
|
||||
@@ -14,11 +16,11 @@ from pipecat.utils.asyncio.watchdog_queue import WatchdogQueue
|
||||
|
||||
|
||||
class ConsumerProcessor(FrameProcessor):
|
||||
"""This class passes-through frames and also consumes frames from a
|
||||
producer's queue. When a frame from a producer queue is received it will be
|
||||
pushed to the specified direction. The frames can be transformed into a
|
||||
different type of frame before being pushed.
|
||||
"""Frame processor that consumes frames from a ProducerProcessor's queue.
|
||||
|
||||
This processor passes through frames normally while also consuming frames
|
||||
from a ProducerProcessor's queue. When frames are received from the producer
|
||||
queue, they are optionally transformed and pushed in the specified direction.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@@ -29,6 +31,14 @@ class ConsumerProcessor(FrameProcessor):
|
||||
direction: FrameDirection = FrameDirection.DOWNSTREAM,
|
||||
**kwargs,
|
||||
):
|
||||
"""Initialize the consumer processor.
|
||||
|
||||
Args:
|
||||
producer: The producer processor to consume frames from.
|
||||
transformer: Function to transform frames before pushing. Defaults to identity_transformer.
|
||||
direction: Direction to push consumed frames. Defaults to DOWNSTREAM.
|
||||
**kwargs: Additional arguments passed to parent class.
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
self._transformer = transformer
|
||||
self._direction = direction
|
||||
@@ -36,6 +46,12 @@ class ConsumerProcessor(FrameProcessor):
|
||||
self._consumer_task: Optional[asyncio.Task] = None
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
"""Process incoming frames and handle lifecycle events.
|
||||
|
||||
Args:
|
||||
frame: The frame to process.
|
||||
direction: The direction the frame is traveling.
|
||||
"""
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, StartFrame):
|
||||
@@ -48,19 +64,24 @@ class ConsumerProcessor(FrameProcessor):
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
async def _start(self, _: StartFrame):
|
||||
"""Start the consumer task and register with the producer."""
|
||||
if not self._consumer_task:
|
||||
self._queue: WatchdogQueue = self._producer.add_consumer()
|
||||
self._consumer_task = self.create_task(self._consumer_task_handler())
|
||||
|
||||
async def _stop(self, _: EndFrame):
|
||||
"""Stop the consumer task."""
|
||||
if self._consumer_task:
|
||||
await self.cancel_task(self._consumer_task)
|
||||
|
||||
async def _cancel(self, _: CancelFrame):
|
||||
"""Cancel the consumer task."""
|
||||
if self._consumer_task:
|
||||
self._queue.cancel()
|
||||
await self.cancel_task(self._consumer_task)
|
||||
|
||||
async def _consumer_task_handler(self):
|
||||
"""Handle consuming frames from the producer queue."""
|
||||
while True:
|
||||
frame = await self._queue.get()
|
||||
new_frame = await self._transformer(frame)
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Frame filtering processor for the Pipecat framework."""
|
||||
|
||||
from typing import Tuple, Type
|
||||
|
||||
from pipecat.frames.frames import EndFrame, Frame, SystemFrame
|
||||
@@ -11,7 +13,21 @@ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
|
||||
|
||||
class FrameFilter(FrameProcessor):
|
||||
"""A frame processor that filters frames based on their types.
|
||||
|
||||
This processor acts as a selective gate in the pipeline, allowing only
|
||||
frames of specified types to pass through. System and end frames are
|
||||
automatically allowed to pass through to maintain pipeline integrity.
|
||||
"""
|
||||
|
||||
def __init__(self, types: Tuple[Type[Frame], ...]):
|
||||
"""Initialize the frame filter.
|
||||
|
||||
Args:
|
||||
types: Tuple of frame types that should be allowed to pass through
|
||||
the filter. All other frame types (except SystemFrame and
|
||||
EndFrame) will be blocked.
|
||||
"""
|
||||
super().__init__()
|
||||
self._types = types
|
||||
|
||||
@@ -20,12 +36,19 @@ class FrameFilter(FrameProcessor):
|
||||
#
|
||||
|
||||
def _should_passthrough_frame(self, frame):
|
||||
"""Determine if a frame should pass through the filter."""
|
||||
if isinstance(frame, self._types):
|
||||
return True
|
||||
|
||||
return isinstance(frame, (EndFrame, SystemFrame))
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
"""Process an incoming frame and conditionally pass it through.
|
||||
|
||||
Args:
|
||||
frame: The frame to process.
|
||||
direction: The direction of frame flow in the pipeline.
|
||||
"""
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if self._should_passthrough_frame(frame):
|
||||
|
||||
@@ -4,6 +4,12 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Function-based frame filtering for Pipecat pipelines.
|
||||
|
||||
This module provides a processor that filters frames based on a custom function,
|
||||
allowing for flexible frame filtering logic in processing pipelines.
|
||||
"""
|
||||
|
||||
from typing import Awaitable, Callable
|
||||
|
||||
from pipecat.frames.frames import EndFrame, Frame, SystemFrame
|
||||
@@ -11,11 +17,26 @@ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
|
||||
|
||||
class FunctionFilter(FrameProcessor):
|
||||
"""A frame processor that filters frames using a custom function.
|
||||
|
||||
This processor allows frames to pass through based on the result of a
|
||||
user-provided filter function. System and end frames always pass through
|
||||
regardless of the filter result.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
filter: Callable[[Frame], Awaitable[bool]],
|
||||
direction: FrameDirection = FrameDirection.DOWNSTREAM,
|
||||
):
|
||||
"""Initialize the function filter.
|
||||
|
||||
Args:
|
||||
filter: An async function that takes a Frame and returns True if the
|
||||
frame should pass through, False otherwise.
|
||||
direction: The direction to apply filtering. Only frames moving in
|
||||
this direction will be filtered. Defaults to DOWNSTREAM.
|
||||
"""
|
||||
super().__init__()
|
||||
self._filter = filter
|
||||
self._direction = direction
|
||||
@@ -27,9 +48,18 @@ class FunctionFilter(FrameProcessor):
|
||||
# Ignore system frames, end frames and frames that are not following the
|
||||
# direction of this gate
|
||||
def _should_passthrough_frame(self, frame, direction):
|
||||
"""Check if a frame should pass through without filtering."""
|
||||
# Ignore system frames, end frames and frames that are not following the
|
||||
# direction of this gate
|
||||
return isinstance(frame, (SystemFrame, EndFrame)) or direction != self._direction
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
"""Process a frame through the filter.
|
||||
|
||||
Args:
|
||||
frame: The frame to process.
|
||||
direction: The direction the frame is moving in the pipeline.
|
||||
"""
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
passthrough = self._should_passthrough_frame(frame, direction)
|
||||
|
||||
@@ -4,6 +4,12 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Identity filter for transparent frame passthrough.
|
||||
|
||||
This module provides a simple passthrough filter that forwards all frames
|
||||
without modification, useful for testing and pipeline composition.
|
||||
"""
|
||||
|
||||
from pipecat.frames.frames import Frame
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
|
||||
@@ -14,10 +20,14 @@ class IdentityFilter(FrameProcessor):
|
||||
This filter acts as a transparent passthrough, allowing all frames to flow
|
||||
through unchanged. It can be useful when testing `ParallelPipeline` to
|
||||
create pipelines that pass through frames (no frames should be repeated).
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
"""Initialize the identity filter.
|
||||
|
||||
Args:
|
||||
**kwargs: Additional arguments passed to the parent FrameProcessor.
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
|
||||
#
|
||||
@@ -25,6 +35,11 @@ class IdentityFilter(FrameProcessor):
|
||||
#
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
"""Process an incoming frame by passing it through unchanged."""
|
||||
"""Process an incoming frame by passing it through unchanged.
|
||||
|
||||
Args:
|
||||
frame: The frame to process and forward.
|
||||
direction: The direction of frame flow in the pipeline.
|
||||
"""
|
||||
await super().process_frame(frame, direction)
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
@@ -4,14 +4,31 @@
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Null filter processor for blocking frame transmission.
|
||||
|
||||
This module provides a frame processor that blocks all frames except
|
||||
system and end frames, useful for testing or temporarily stopping
|
||||
frame flow in a pipeline.
|
||||
"""
|
||||
|
||||
from pipecat.frames.frames import EndFrame, Frame, SystemFrame
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
|
||||
|
||||
class NullFilter(FrameProcessor):
|
||||
"""This filter doesn't allow passing any frames up or downstream."""
|
||||
"""A filter that blocks all frames except system and end frames.
|
||||
|
||||
This processor acts as a null filter, preventing frames from passing
|
||||
through the pipeline while still allowing essential system and end
|
||||
frames to maintain proper pipeline operation.
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
"""Initialize the null filter.
|
||||
|
||||
Args:
|
||||
**kwargs: Additional arguments passed to parent FrameProcessor.
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
|
||||
#
|
||||
@@ -19,6 +36,12 @@ class NullFilter(FrameProcessor):
|
||||
#
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
"""Process incoming frames, only allowing system and end frames through.
|
||||
|
||||
Args:
|
||||
frame: The frame to process.
|
||||
direction: The direction of frame flow in the pipeline.
|
||||
"""
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, (SystemFrame, EndFrame)):
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user