Compare commits
340 Commits
v0.0.72
...
jpt/runner
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2b1f056aa7 | ||
|
|
2be615066c | ||
|
|
1bb821a07d | ||
|
|
d8bcb81f35 | ||
|
|
3ce0ab8c6d | ||
|
|
097d786431 | ||
|
|
662f04879c | ||
|
|
7a69f57e11 | ||
|
|
5b7b4efdc9 | ||
|
|
cfa26524ca | ||
|
|
3d4ab7158d | ||
|
|
26d1ca3c98 | ||
|
|
083b32887e | ||
|
|
3391929127 | ||
|
|
ebf9bc2741 | ||
|
|
f5edde42f6 | ||
|
|
37bb7ef926 | ||
|
|
a63d1530a4 | ||
|
|
960bc9df5b | ||
|
|
e2a153ee01 | ||
|
|
300f19ad23 | ||
|
|
7955080da2 | ||
|
|
994e82c1ef | ||
|
|
b07b947352 | ||
|
|
a6527c3856 | ||
|
|
0e6874b605 | ||
|
|
9ba172c49f | ||
|
|
f710c94b6e | ||
|
|
6e3a0a2d5d | ||
|
|
9530b8b842 | ||
|
|
26c937af87 | ||
|
|
976f6168f0 | ||
|
|
0be64e0fd9 | ||
|
|
7d527c3a6b | ||
|
|
c6f6930c27 | ||
|
|
c33dfe8309 | ||
|
|
769cd1ef06 | ||
|
|
6d72f60571 | ||
|
|
e8d0712ac1 | ||
|
|
88b2c817ac | ||
|
|
f8f6c9918d | ||
|
|
8ee608bbfe | ||
|
|
fad2ba4570 | ||
|
|
f609f7eb53 | ||
|
|
ea09813a2b | ||
|
|
53abfc27a7 | ||
|
|
9c72e96a2c | ||
|
|
f66c67c4ab | ||
|
|
b623face03 | ||
|
|
698d60f3ae | ||
|
|
c9717a23a5 | ||
|
|
d981ce6e56 | ||
|
|
1bbd3bd8ab | ||
|
|
a20915caa7 | ||
|
|
28cab5a606 | ||
|
|
cfea56064d | ||
|
|
8467d87cfc | ||
|
|
b20d020bea | ||
|
|
948257c66e | ||
|
|
b54d1fb7fd | ||
|
|
ec361df0d1 | ||
|
|
b1a5cddde4 | ||
|
|
e165d38277 | ||
|
|
8ba340a8a5 | ||
|
|
d4e33663b2 | ||
|
|
d7d1b16dad | ||
|
|
0bc2ea13f2 | ||
|
|
b5d1301221 | ||
|
|
ed8f30ec71 | ||
|
|
a74a935ca0 | ||
|
|
7cfd56699b | ||
|
|
cb984237a7 | ||
|
|
c969fdddb9 | ||
|
|
9931ad2ce1 | ||
|
|
fd73feb645 | ||
|
|
ee78428a2a | ||
|
|
ae02249255 | ||
|
|
727af2e6fb | ||
|
|
8fd5576879 | ||
|
|
1f85dcee7c | ||
|
|
138890bc5c | ||
|
|
a094efc9e6 | ||
|
|
1f9e2fdecc | ||
|
|
4a2b4660bc | ||
|
|
b3ac90015a | ||
|
|
2fe06f0a4e | ||
|
|
1836a7484e | ||
|
|
25a5c5aaab | ||
|
|
24694e2558 | ||
|
|
2325edd9ba | ||
|
|
fad5713ade | ||
|
|
fe8573322f | ||
|
|
06c1255abe | ||
|
|
f108a67635 | ||
|
|
bf580d061d | ||
|
|
b005bd7b98 | ||
|
|
75f8baab33 | ||
|
|
5c3fb73cef | ||
|
|
5c3f4180b9 | ||
|
|
6cd6e7ceed | ||
|
|
1a146c2a64 | ||
|
|
eaeb9e6efa | ||
|
|
2e84c91748 | ||
|
|
650d45c1f4 | ||
|
|
f4f65024ef | ||
|
|
1200aa4fb8 | ||
|
|
6762363685 | ||
|
|
b2ead325c4 | ||
|
|
4e24b915cc | ||
|
|
b610ee26ba | ||
|
|
2b867f1613 | ||
|
|
7b8fe565c7 | ||
|
|
a246862910 | ||
|
|
106809f3fd | ||
|
|
f0d8499f7e | ||
|
|
332ca3d55e | ||
|
|
a48f5d5796 | ||
|
|
f04f047428 | ||
|
|
4e61fd33ea | ||
|
|
61ac77be72 | ||
|
|
c093eb5b63 | ||
|
|
98e24131bd | ||
|
|
7becce9e8c | ||
|
|
3cdaeb719a | ||
|
|
8daaea5969 | ||
|
|
dc47516e14 | ||
|
|
0fcc4f822f | ||
|
|
c0ed061ff5 | ||
|
|
d98b6b418d | ||
|
|
deea29b5e8 | ||
|
|
0bdbc83ed9 | ||
|
|
6c591f0990 | ||
|
|
b55b9c257b | ||
|
|
5156c21d14 | ||
|
|
a9d824753b | ||
|
|
3c6a208101 | ||
|
|
b1032a1ca4 | ||
|
|
931f34fccd | ||
|
|
f2509adec1 | ||
|
|
285b82eb65 | ||
|
|
74da197304 | ||
|
|
0f727248d2 | ||
|
|
a6de16f92f | ||
|
|
fc09854d7f | ||
|
|
2959029151 | ||
|
|
e590441b7b | ||
|
|
dc41ec7cb1 | ||
|
|
43049c865c | ||
|
|
c4a9fc7f88 | ||
|
|
faf4026cf4 | ||
|
|
f53f45a6cd | ||
|
|
e04e876f44 | ||
|
|
a84e7e30da | ||
|
|
6eed6ff779 | ||
|
|
1375211610 | ||
|
|
4e9369a702 | ||
|
|
f9e8748a96 | ||
|
|
20d6bf267a | ||
|
|
b573f9dab2 | ||
|
|
7ed4fe50d4 | ||
|
|
6f66ec1727 | ||
|
|
c7e758fc36 | ||
|
|
14c22234bb | ||
|
|
d565e9ae53 | ||
|
|
4951c97eab | ||
|
|
9b38f3e2fa | ||
|
|
dbc76389d8 | ||
|
|
c27f838444 | ||
|
|
ce84485e26 | ||
|
|
6cf254e2f9 | ||
|
|
02b63c28a5 | ||
|
|
57c6ce7ffa | ||
|
|
2f3272ea2f | ||
|
|
f5c2d57e4b | ||
|
|
baa878272d | ||
|
|
093285868e | ||
|
|
6c9d058ec2 | ||
|
|
5df7be6892 | ||
|
|
2deca816ae | ||
|
|
b8d2fceced | ||
|
|
7596d71460 | ||
|
|
096067b097 | ||
|
|
ec09505f6b | ||
|
|
251ea756c8 | ||
|
|
8f6544efe2 | ||
|
|
6045a8ad8c | ||
|
|
b184d62634 | ||
|
|
1a8d512abb | ||
|
|
a62be8ea32 | ||
|
|
c230d94ff0 | ||
|
|
e7b02773f5 | ||
|
|
ed83248a6b | ||
|
|
af8b4901d4 | ||
|
|
64c8230960 | ||
|
|
bf664534cc | ||
|
|
274a04e535 | ||
|
|
cb81f3d50e | ||
|
|
30a3b24287 | ||
|
|
8aacf71956 | ||
|
|
72d503d3a3 | ||
|
|
453a904290 | ||
|
|
368bff4fb4 | ||
|
|
4ae045d704 | ||
|
|
8c71939425 | ||
|
|
a437c2d365 | ||
|
|
a1784e3237 | ||
|
|
abee0f853c | ||
|
|
e9d358ed17 | ||
|
|
c5d54d06bb | ||
|
|
c16eed7ca2 | ||
|
|
76388a10b5 | ||
|
|
38bcc033a2 | ||
|
|
5af563cd91 | ||
|
|
3de271161c | ||
|
|
c19f9bc43a | ||
|
|
ef85d245ed | ||
|
|
25749bd4c0 | ||
|
|
e19c5464fe | ||
|
|
5c2ea3b804 | ||
|
|
c27348d470 | ||
|
|
de5f9c9217 | ||
|
|
f9086ee3a2 | ||
|
|
43298a9026 | ||
|
|
d80e228c6f | ||
|
|
2902362886 | ||
|
|
1cd303ad7f | ||
|
|
f590a476e7 | ||
|
|
e71cb3ba68 | ||
|
|
510a9af2e5 | ||
|
|
5328f84df4 | ||
|
|
18817fd81b | ||
|
|
4bcc536fd2 | ||
|
|
1ab2ddd317 | ||
|
|
09aa168840 | ||
|
|
05753fb207 | ||
|
|
715e3f8543 | ||
|
|
9c9d4b35a4 | ||
|
|
2ee935f784 | ||
|
|
58aedc88a4 | ||
|
|
0e60385871 | ||
|
|
a4188f7986 | ||
|
|
c7cbfe7a4f | ||
|
|
f1c9f5040b | ||
|
|
79e51051c7 | ||
|
|
a63d0da528 | ||
|
|
4fd8df208f | ||
|
|
44d3bd30fa | ||
|
|
6e6e932370 | ||
|
|
baccf50417 | ||
|
|
7b1071b30d | ||
|
|
bd7ca94196 | ||
|
|
1ec1aa76e9 | ||
|
|
77c369c3c7 | ||
|
|
9171d4b040 | ||
|
|
e02b95fca5 | ||
|
|
d45a07b5e5 | ||
|
|
0cdcfcee8d | ||
|
|
324546b4e7 | ||
|
|
c8ee67a636 | ||
|
|
b87c57c951 | ||
|
|
721f662bbe | ||
|
|
fccd48bfff | ||
|
|
5310d903ec | ||
|
|
8cbce555e4 | ||
|
|
f6112713e8 | ||
|
|
cc637f4dea | ||
|
|
7f76a14c54 | ||
|
|
58675f4d5a | ||
|
|
d50e6db312 | ||
|
|
de74284a8e | ||
|
|
4c9a295b28 | ||
|
|
0968f36d3e | ||
|
|
fd570b0377 | ||
|
|
68ea5ee570 | ||
|
|
f891140a74 | ||
|
|
5ed2d7ac2b | ||
|
|
a297e4208e | ||
|
|
b713527da0 | ||
|
|
224d2cedc8 | ||
|
|
55cfea776f | ||
|
|
d7a2078e0b | ||
|
|
a3e540eb32 | ||
|
|
e01c20be84 | ||
|
|
ce3ca418c2 | ||
|
|
15b9a5faf6 | ||
|
|
3afa30894f | ||
|
|
0ecfa827e6 | ||
|
|
e1b0db75eb | ||
|
|
b0c773189f | ||
|
|
3064326834 | ||
|
|
c67e50fe34 | ||
|
|
9d45e3eca1 | ||
|
|
43a24d15f6 | ||
|
|
cafbda1668 | ||
|
|
86c26fd64c | ||
|
|
0c20668008 | ||
|
|
92df8dc43c | ||
|
|
9d5f5844b8 | ||
|
|
2cf31884d0 | ||
|
|
19354c6f2d | ||
|
|
0b2079ad41 | ||
|
|
5f18c3af70 | ||
|
|
0a40285d43 | ||
|
|
5b1c328541 | ||
|
|
37929533af | ||
|
|
3b92113680 | ||
|
|
46b52cb9bb | ||
|
|
f0bcc9d9ba | ||
|
|
1cac028bfe | ||
|
|
4956886819 | ||
|
|
c720cfc7c7 | ||
|
|
8fcef5628f | ||
|
|
1cf0b35ac1 | ||
|
|
c54084b7a4 | ||
|
|
e3fe040017 | ||
|
|
ae5e3e2dc4 | ||
|
|
77378d2779 | ||
|
|
4106f0dabe | ||
|
|
2ed1ed6821 | ||
|
|
6d3a38842d | ||
|
|
7360f79413 | ||
|
|
8d55e13750 | ||
|
|
737e8e79c9 | ||
|
|
4d977fede0 | ||
|
|
8070e156d8 | ||
|
|
43c6f1f5cd | ||
|
|
f53f5445ba | ||
|
|
7263d11ee4 | ||
|
|
f2d5b9ad69 | ||
|
|
40c7e3c52c | ||
|
|
ee5fea4221 | ||
|
|
db7b60cfe9 | ||
|
|
51b79bd6a1 | ||
|
|
95fe762776 | ||
|
|
2968c846ce | ||
|
|
e27da96cdc | ||
|
|
d86502e79a | ||
|
|
59c7744590 | ||
|
|
949971dea9 | ||
|
|
cd4a893c65 |
6
.github/workflows/format.yaml
vendored
6
.github/workflows/format.yaml
vendored
@@ -17,7 +17,7 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
ruff-format:
|
||||
name: "Formatting checker"
|
||||
name: "Code quality checks"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repo
|
||||
@@ -39,8 +39,8 @@ jobs:
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
ruff format --diff
|
||||
- name: Ruff import linter
|
||||
- name: Ruff linter (all rules)
|
||||
id: ruff-check
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
ruff check --select I
|
||||
ruff check
|
||||
|
||||
2
.github/workflows/publish.yaml
vendored
2
.github/workflows/publish.yaml
vendored
@@ -5,7 +5,7 @@ on:
|
||||
inputs:
|
||||
gitref:
|
||||
type: string
|
||||
description: "what git ref to build"
|
||||
description: "what git tag to build (e.g. v0.0.74)"
|
||||
required: true
|
||||
|
||||
jobs:
|
||||
|
||||
@@ -4,5 +4,5 @@ repos:
|
||||
hooks:
|
||||
- id: ruff
|
||||
language_version: python3
|
||||
args: [ --select, I, ]
|
||||
args: [--fix]
|
||||
- id: ruff-format
|
||||
|
||||
290
CHANGELOG.md
290
CHANGELOG.md
@@ -5,6 +5,296 @@ All notable changes to **Pipecat** will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
### Added
|
||||
|
||||
- Added a new field `handle_sigterm` to `PipelineRunner`. It defaults to `False`.
|
||||
This field handles SIGTERM signals. The `handle_sigint` field still defaults
|
||||
to `True`, but now it handles only SIGINT signals.
|
||||
|
||||
- Added foundational example `14u-function-calling-ollama.py` for Ollama
|
||||
function calling.
|
||||
|
||||
- Added `LocalSmartTurnAnalyzerV2`, which supports local on-device inference
|
||||
with the new `smart-turn-v2` turn detection model.
|
||||
|
||||
- Added `set_log_level` to `DailyTransport`, allowing setting the logging level
|
||||
for Daily's internal logging system.
|
||||
|
||||
### Changed
|
||||
|
||||
- Play delayed messages from `ElevenLabsTTSService` if they still belong to the
|
||||
current context.
|
||||
|
||||
- Dependency compatibility improvements: Relaxed version constraints for core
|
||||
dependencies to support broader version ranges while maintaining stability:
|
||||
|
||||
- `aiohttp`, `Markdown`, `nltk`, `numpy`, `Pillow`, `pydantic`, `openai`,
|
||||
`numba`: Now support up to the next major version (e.g. `numpy>=1.26.4,<3`)
|
||||
- `pyht`: Relaxed to `>=0.1.6` to resolve `grpcio` conflicts with
|
||||
`nvidia-riva-client`
|
||||
- `fastapi`: Updated to support versions `>=0.115.6,<0.117.0`
|
||||
- `torch`/`torchaudio`: Changed from exact pinning (`==2.5.0`) to compatible
|
||||
range (`~=2.5.0`)
|
||||
- `aws_sdk_bedrock_runtime`: Added Python 3.12+ constraint via environment
|
||||
marker
|
||||
- `numba`: Reduced minimum version to `0.60.0` for better compatibility
|
||||
|
||||
- Changed `NeuphonicHttpTTSService` to use a POST based request instead of the
|
||||
`pyneuphonic` package. This removes a package requirement, allowing Neuphonic
|
||||
to work with more services.
|
||||
|
||||
- Updated the `deepgram` optional dependency to 4.7.0, which downgrades the
|
||||
`tasks cancelled error` to a debug log. This removes the log from appearing
|
||||
in Pipecat logs upon leaving.
|
||||
|
||||
- Upgraded the `websockets` implementation to the new asyncio implementation.
|
||||
Along with this change, we're updating support for versions >=13.1.0 and
|
||||
<15.0.0. All services have been update to use the asyncio implementation.
|
||||
|
||||
- Updated `MiniMaxHttpTTSService` with a `base_url` arg where you can specify
|
||||
the Global endpoint (default) or Mainland China.
|
||||
|
||||
- Replaced regex-based sentence detection in `match_endofsentence` with NLTK's
|
||||
punkt_tab tokenizer for more reliable sentence boundary detection.
|
||||
|
||||
- Changed the `livekit` optional dependency for `tenacity` to
|
||||
`tenacity>=8.2.3,<10.0.0` in order to support the `google-genai` package.
|
||||
|
||||
- For `LmntTTSService`, changed the default `model` to `blizzard`, LMNT's
|
||||
recommended model.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed a dependency issue for uv users where an `llvmlite` version required python 3.9.
|
||||
|
||||
- Fixed an issue in `MiniMaxHttpTTSService` where the `pitch` param was the
|
||||
incorrect type.
|
||||
|
||||
- Fixed an issue with OpenTelemetry tracing where the `enable_tracing` flag did
|
||||
not disable the internal tracing decorator functions.
|
||||
|
||||
- Fixed an issue in `OLLamaLLMService` where kwargs were not passed correctly
|
||||
to the parent class.
|
||||
|
||||
- Fixed an issue in `ElevenLabsTTSService` where the word/timestamp pairs were
|
||||
calculating word boundaries incorrectly.
|
||||
|
||||
- Fixed an issue where, in some edge cases, the `EmulateUserStartedSpeakingFrame`
|
||||
could be created even if we didn't have a transcription.
|
||||
|
||||
- Fixed an issue in `GoogleLLMContext` where it would inject the
|
||||
`system_message` as a "user" message into cases where it was not meant to;
|
||||
it was only meant to do that when there were no "regular" (non-function-call)
|
||||
messages in the context, to ensure that inference would run properly.
|
||||
|
||||
- Fixed an issue in `LiveKitTransport` where the `on_audio_track_subscribed` was never emitted.
|
||||
|
||||
## [0.0.76] - 2025-07-11
|
||||
|
||||
### Added
|
||||
|
||||
- Added `SpeechControlParamsFrame`, a new `SystemFrame` that notifies
|
||||
downstream processors of the VAD and Turn analyzer params. This frame is
|
||||
pushed by the `BaseInputTransport` at Start and any time a
|
||||
`VADParamsUpdateFrame` is received.
|
||||
|
||||
### Changed
|
||||
|
||||
- Two package dependencies have been updated:
|
||||
- `numpy` now supports 1.26.0 and newer
|
||||
- `transformers` now supports 4.48.0 and newer
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue with RTVI's handling of `append-to-context`.
|
||||
|
||||
- Fixed an issue where using audio input with a sample rate requiring resampling
|
||||
could result in empty audio being passed to STT services, causing errors.
|
||||
|
||||
- Fixed the VAD analyzer to process the full audio buffer as long as it contains
|
||||
more than the minimum required bytes per iteration, instead of only analyzing
|
||||
the first chunk.
|
||||
|
||||
- Fixed an issue in ParallelPipeline that caused errors when attempting to drain
|
||||
the queues.
|
||||
|
||||
- Fixed an issue with emulated VAD timeout inconsistency in
|
||||
`LLMUserContextAggregator`. Previously, emulated VAD scenarios (where
|
||||
transcription is received without VAD detection) used a hardcoded
|
||||
`aggregation_timeout` (default 0.5s) instead of matching the VAD's
|
||||
`stop_secs` parameter (default 0.8s). This created different user experiences
|
||||
between real VAD and emulated VAD scenarios. Now, emulated VAD timeouts
|
||||
automatically synchronize with the VAD's `stop_secs` parameter.
|
||||
|
||||
- Fix a pipeline freeze when using AWS Nova Sonic, which would occur if the
|
||||
user started early, while the bot was still working through
|
||||
`trigger_assistant_response()`.
|
||||
|
||||
## [0.0.75] - 2025-07-08
|
||||
|
||||
### Added
|
||||
|
||||
- Added an `aggregate_sentences` arg in `CartesiaTTSService`,
|
||||
`ElevenLabsTTSService`, `NeuphonicTTSService` and `RimeTTSService`, where the
|
||||
default value is True. When `aggregate_sentences` is True, the `TTSService`
|
||||
aggregates the LLM streamed tokens into sentences by default. Note: setting
|
||||
the value to False requires a custom processor before the `TTSService` to
|
||||
aggregate LLM tokens.
|
||||
|
||||
- Added `kwargs` to the `OLLamaLLMService` to allow for configuration args to
|
||||
be passed to Ollama.
|
||||
|
||||
- Added call hang-up error handling in `TwilioFrameSerializer`, which handles
|
||||
the case where the user has hung up before the `TwilioFrameSerializer` hangs
|
||||
up the call.
|
||||
|
||||
### Changed
|
||||
|
||||
- Updated `RTVIObserver` and `RTVIProcessor` to match the new RTVI 1.0.0 protocol.
|
||||
This includes:
|
||||
|
||||
- Deprecating support for all messages related to service configuaration and
|
||||
actions.
|
||||
- Adding support for obtaining and logging data about client, including its
|
||||
RTVI version and optionally included system information (OS/browser/etc.)
|
||||
- Adding support for handling the new `client-message` RTVI message through
|
||||
either a `on_client_message` event handler or listening for a new
|
||||
`RTVIClientMessageFrame`
|
||||
- Adding support for responding to a `client-message` with a `server-response`
|
||||
via either a direct call on the `RTVIProcessor` or via pushing a new
|
||||
`RTVIServerResponseFrame`
|
||||
- Adding built-in support for handling the new `append-to-context` RTVI message
|
||||
which allows a client to add to the user or assistant llm context. No extra
|
||||
code is required for supporting this behavior.
|
||||
- Updating all JavaScript and React client RTVI examples to use versions 1.0.0
|
||||
of the clients.
|
||||
|
||||
Get started migrating to RTVI protocol 1.0.0 by following the migration guide:
|
||||
https://docs.pipecat.ai/client/migration-guide
|
||||
|
||||
- Refactored `AWSBedrockLLMService` and `AWSPollyTTSService` to work
|
||||
asynchronously using `aioboto3` instead of the `boto3` library.
|
||||
|
||||
- The `UserIdleProcessor` now handles the scenario where function calls take
|
||||
longer than the idle timeout duration. This allows you to use the
|
||||
`UserIdleProcessor` in conjunction with function calls that take a while to
|
||||
return a result.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Updated the `NeuphonicTTSService` to work with the updated websocket API.
|
||||
|
||||
- Fixed an issue with `RivaSTTService` where the watchdog feature was causing
|
||||
an error on initialization.
|
||||
|
||||
### Performance
|
||||
|
||||
- Remove unncessary push task in each `FrameProcessor`.
|
||||
|
||||
## [0.0.74] - 2025-07-03
|
||||
|
||||
### Added
|
||||
|
||||
- Added a new STT service, `SpeechmaticsSTTService`. This service provides
|
||||
real-time speech-to-text transcription using the Speechmatics API. It supports
|
||||
partial and final transcriptions, multiple languages, various audio formats,
|
||||
and speaker diarization.
|
||||
|
||||
- Added `normalize` and `model_id` to `FishAudioTTSService`.
|
||||
|
||||
- Added `http_options` argument to `GoogleLLMService`.
|
||||
|
||||
- Added `run_llm` field to `LLMMessagesAppendFrame` and `LLMMessagesUpdateFrame`
|
||||
frames. If true, a context frame will be pushed triggering the LLM to respond.
|
||||
|
||||
- Added a new `SOXRStreamAudioResampler` for processing audio in chunks or
|
||||
streams. If you write your own processor and need to use an audio resampler,
|
||||
use the new `create_stream_resampler()`.
|
||||
|
||||
- Added new `DailyParams.audio_in_user_tracks` to allow receiving one track per
|
||||
user (default) or a single track from the room (all participants mixed).
|
||||
|
||||
- Added support for providing "direct" functions, which don't need an
|
||||
accompanying `FunctionSchema` or function definition dict. Instead, metadata
|
||||
(i.e. `name`, `description`, `properties`, and `required`) are automatically
|
||||
extracted from a combination of the function signature and docstring.
|
||||
|
||||
Usage:
|
||||
|
||||
```python
|
||||
# "Direct" function
|
||||
# `params` must be the first parameter
|
||||
async def do_something(params: FunctionCallParams, foo: int, bar: str = ""):
|
||||
"""
|
||||
Do something interesting.
|
||||
|
||||
Args:
|
||||
foo (int): The foo to do something interesting with.
|
||||
bar (string): The bar to do something interesting with.
|
||||
"""
|
||||
|
||||
result = await process(foo, bar)
|
||||
await params.result_callback({"result": result})
|
||||
|
||||
# ...
|
||||
|
||||
llm.register_direct_function(do_something)
|
||||
|
||||
# ...
|
||||
|
||||
tools = ToolsSchema(standard_tools=[do_something])
|
||||
```
|
||||
|
||||
- `user_id` is now populated in the `TranscriptionFrame` and
|
||||
`InterimTranscriptionFrame` when using a transport that provides a `user_id`,
|
||||
like `DailyTransport` or `LiveKitTransport`.
|
||||
|
||||
- Added `watchdog_coroutine()`. This is a watchdog helper for couroutines. So,
|
||||
if you have a coroutine that is waiting for a result and that takes a long
|
||||
time, you will need to wrap it with `watchdog_coroutine()` so the watchdog
|
||||
timers are reset regularly.
|
||||
|
||||
- Added `session_token` parameter to `AWSNovaSonicLLMService`.
|
||||
|
||||
- Added Gemini Multimodal Live File API for uploading, fetching, listing, and
|
||||
deleting files. See `26f-gemini-multimodal-live-files-api.py` for example usage.
|
||||
|
||||
### Changed
|
||||
|
||||
- Updated all the services to use the new `SOXRStreamAudioResampler`, ensuring smooth
|
||||
transitions and eliminating clicks.
|
||||
|
||||
- Upgraded `daily-python` to 0.19.4.
|
||||
|
||||
- Updated `google` optional dependency to use `google-genai` version `1.24.0`.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue where audio would get stuck in the queue when an interrupt occurs
|
||||
during Azure TTS synthesis.
|
||||
|
||||
- Fixed a race condition that occurs in Python 3.10+ where the task could miss
|
||||
the `CancelledError` and continue running indefinitely, freezing the pipeline.
|
||||
|
||||
- Fixed a `AWSNovaSonicLLMService` issue introduced in 0.0.72.
|
||||
|
||||
### Deprecated
|
||||
|
||||
- In `FishAudioTTSService`, deprecated `model` and replaced with
|
||||
`reference_id`. This change is to better align with Fish Audio's variable
|
||||
naming and to reduce confusion about what functionality the variable
|
||||
controls.
|
||||
|
||||
## [0.0.73] - 2025-06-26
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue introduced in 0.0.72 that would cause `ElevenLabsTTSService`,
|
||||
`GladiaSTTService`, `NeuphonicTTSService` and `OpenAIRealtimeBetaLLMService`
|
||||
to throw an error.
|
||||
|
||||
## [0.0.72] - 2025-06-26
|
||||
|
||||
### Added
|
||||
|
||||
108
CONTRIBUTING.md
108
CONTRIBUTING.md
@@ -43,8 +43,8 @@ We follow Google-style docstrings with these specific conventions:
|
||||
|
||||
**Regular Classes:**
|
||||
|
||||
- Class docstring describes the class purpose and documents all `__init__` parameters in an `Args:` section
|
||||
- No separate `__init__` docstring needed
|
||||
- Class docstring describes the class purpose and key functionality
|
||||
- `__init__` method has its own docstring with complete `Args:` section documenting all parameters
|
||||
- All public methods must have docstrings with `Args:` and `Returns:` sections as appropriate
|
||||
|
||||
**Dataclasses:**
|
||||
@@ -60,6 +60,39 @@ We follow Google-style docstrings with these specific conventions:
|
||||
|
||||
- Must have docstrings explaining what subclasses should implement
|
||||
|
||||
**`__init__.py` Files:**
|
||||
|
||||
- **Skip docstrings** for pure import/re-export modules
|
||||
- **Add brief docstrings** for top-level packages or those with initialization logic
|
||||
|
||||
**Enums:**
|
||||
|
||||
- Class docstring describes the enumeration purpose
|
||||
- Use `Parameters:` section to document each enum value and its meaning
|
||||
- No `__init__` docstring (Enums don't have custom constructors)
|
||||
|
||||
**Code Examples in Docstrings:**
|
||||
|
||||
- Use `Examples:` as a section header for multiple examples
|
||||
- Use descriptive text followed by double colons (`::`) for each example
|
||||
- **Always include a blank line after the `::"`**
|
||||
- Indent all code consistently within each block
|
||||
- Separate multiple examples with blank lines for readability
|
||||
|
||||
**Lists and Bullets in Docstrings:**
|
||||
|
||||
- Use dashes (`-`) for bullet points, not asterisks (`*`)
|
||||
- **Add a blank line before bullet lists** when they follow a colon
|
||||
- Use section headers like "Supported features:" or "Behavior:" before lists
|
||||
- For complex nested information, consider using paragraph format instead
|
||||
|
||||
**Deprecations:**
|
||||
|
||||
- Use `warnings.warn()` in code for runtime deprecation warnings
|
||||
- Add `.. deprecated::` directive in docstrings for documentation visibility
|
||||
- Include version information and describe current status
|
||||
- Describe parameters in present tense, use directive to indicate deprecation status
|
||||
|
||||
#### Examples:
|
||||
|
||||
```python
|
||||
@@ -67,14 +100,34 @@ We follow Google-style docstrings with these specific conventions:
|
||||
class MyService(BaseService):
|
||||
"""Description of what the service does.
|
||||
|
||||
Args:
|
||||
param1: Description of param1.
|
||||
param2: Description of param2. Defaults to True.
|
||||
**kwargs: Additional arguments passed to parent.
|
||||
Provides detailed explanation of the service's functionality,
|
||||
key features, and usage patterns.
|
||||
|
||||
Supported features:
|
||||
|
||||
- Feature one with detailed explanation
|
||||
- Feature two with additional context
|
||||
- Feature three for advanced use cases
|
||||
"""
|
||||
|
||||
def __init__(self, param1: str, param2: bool = True, **kwargs):
|
||||
# No docstring - parameters documented above
|
||||
def __init__(self, param1: str, old_param: str = None, **kwargs):
|
||||
"""Initialize the service.
|
||||
|
||||
Args:
|
||||
param1: Description of param1.
|
||||
old_param: Controls legacy behavior.
|
||||
|
||||
.. deprecated:: 1.2.0
|
||||
This parameter no longer has any effect and will be removed in version 2.0.
|
||||
|
||||
**kwargs: Additional arguments passed to parent.
|
||||
"""
|
||||
if old_param is not None:
|
||||
import warnings
|
||||
warnings.warn(
|
||||
"Parameter 'old_param' is deprecated and will be removed in version 2.0.",
|
||||
DeprecationWarning,
|
||||
)
|
||||
super().__init__(**kwargs)
|
||||
|
||||
@property
|
||||
@@ -97,20 +150,41 @@ class MyService(BaseService):
|
||||
"""
|
||||
pass
|
||||
|
||||
# Dataclass
|
||||
# Dataclass with code examples
|
||||
@dataclass
|
||||
class ConfigParams:
|
||||
"""Configuration parameters for the service.
|
||||
class MessageFrame:
|
||||
"""Frame containing messages in OpenAI format.
|
||||
|
||||
Supports both simple and content list message formats.
|
||||
|
||||
Example::
|
||||
|
||||
[
|
||||
{"role": "user", "content": "Hello"},
|
||||
{"role": "assistant", "content": "Hi there!"}
|
||||
]
|
||||
|
||||
Parameters:
|
||||
host: The host address.
|
||||
port: The port number. Defaults to 8080.
|
||||
timeout: Connection timeout in seconds.
|
||||
messages: List of messages in OpenAI format.
|
||||
"""
|
||||
|
||||
host: str
|
||||
port: int = 8080
|
||||
timeout: float = 30.0
|
||||
messages: List[dict]
|
||||
|
||||
# Enum class
|
||||
class Status(Enum):
|
||||
"""Status codes for processing operations.
|
||||
|
||||
Parameters:
|
||||
PENDING: Operation is queued but not started.
|
||||
RUNNING: Operation is currently in progress.
|
||||
COMPLETED: Operation finished successfully.
|
||||
FAILED: Operation encountered an error.
|
||||
"""
|
||||
|
||||
PENDING = "pending"
|
||||
RUNNING = "running"
|
||||
COMPLETED = "completed"
|
||||
FAILED = "failed"
|
||||
```
|
||||
|
||||
# Contributor Covenant Code of Conduct
|
||||
|
||||
26
README.md
26
README.md
@@ -51,19 +51,19 @@ You can connect to Pipecat from any platform using our official SDKs:
|
||||
|
||||
## 🧩 Available services
|
||||
|
||||
| Category | Services |
|
||||
| ------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [Parakeet (NVIDIA)](https://docs.pipecat.ai/server/services/stt/parakeet), [SambaNova (Whisper)](https://docs.pipecat.ai/server/services/stt/sambanova) [Ultravox](https://docs.pipecat.ai/server/services/stt/ultravox), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) |
|
||||
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/server/services/llm/sambanova) [Together AI](https://docs.pipecat.ai/server/services/llm/together) |
|
||||
| Text-to-Speech | [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [FastPitch (NVIDIA)](https://docs.pipecat.ai/server/services/tts/fastpitch), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
|
||||
| Speech-to-Speech | [AWS Nova Sonic](https://docs.pipecat.ai/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai) |
|
||||
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local |
|
||||
| Serializers | [Plivo](https://docs.pipecat.ai/server/utilities/serializers/plivo), [Twilio](https://docs.pipecat.ai/server/utilities/serializers/twilio), [Telnyx](https://docs.pipecat.ai/server/utilities/serializers/telnyx) |
|
||||
| Video | [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) |
|
||||
| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) |
|
||||
| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/fal), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) |
|
||||
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [Noisereduce](https://docs.pipecat.ai/server/utilities/audio/noisereduce-filter) |
|
||||
| Analytics & Metrics | [OpenTelemetry](https://docs.pipecat.ai/server/utilities/opentelemetry), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) |
|
||||
| Category | Services |
|
||||
| ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [Parakeet (NVIDIA)](https://docs.pipecat.ai/server/services/stt/parakeet), [SambaNova (Whisper)](https://docs.pipecat.ai/server/services/stt/sambanova), [Soniox](https://docs.pipecat.ai/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/server/services/stt/speechmatics), [Ultravox](https://docs.pipecat.ai/server/services/stt/ultravox), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) |
|
||||
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/server/services/llm/sambanova) [Together AI](https://docs.pipecat.ai/server/services/llm/together) |
|
||||
| Text-to-Speech | [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [FastPitch (NVIDIA)](https://docs.pipecat.ai/server/services/tts/fastpitch), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
|
||||
| Speech-to-Speech | [AWS Nova Sonic](https://docs.pipecat.ai/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai) |
|
||||
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local |
|
||||
| Serializers | [Plivo](https://docs.pipecat.ai/server/utilities/serializers/plivo), [Twilio](https://docs.pipecat.ai/server/utilities/serializers/twilio), [Telnyx](https://docs.pipecat.ai/server/utilities/serializers/telnyx) |
|
||||
| Video | [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) |
|
||||
| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) |
|
||||
| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/fal), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) |
|
||||
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [Noisereduce](https://docs.pipecat.ai/server/utilities/audio/noisereduce-filter) |
|
||||
| Analytics & Metrics | [OpenTelemetry](https://docs.pipecat.ai/server/utilities/opentelemetry), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) |
|
||||
|
||||
📚 [View full services documentation →](https://docs.pipecat.ai/server/services/supported-services)
|
||||
|
||||
|
||||
@@ -1,13 +1,20 @@
|
||||
build~=1.2.2
|
||||
coverage~=7.6.12
|
||||
coverage~=7.9.1
|
||||
grpcio-tools~=1.67.1
|
||||
pip-tools~=7.4.1
|
||||
pre-commit~=4.0.1
|
||||
pyright~=1.1.400
|
||||
pytest~=8.3.4
|
||||
pytest-asyncio~=0.25.3
|
||||
pre-commit~=4.2.0
|
||||
pyright~=1.1.402
|
||||
pytest~=8.4.1
|
||||
pytest-asyncio~=1.0.0
|
||||
pytest-aiohttp==1.1.0
|
||||
ruff~=0.11.13
|
||||
setuptools~=70.0.0
|
||||
setuptools_scm~=8.1.0
|
||||
python-dotenv~=1.0.1
|
||||
ruff~=0.12.1
|
||||
setuptools~=78.1.1
|
||||
setuptools_scm~=8.3.1
|
||||
python-dotenv~=1.1.1
|
||||
|
||||
# For running examples
|
||||
uvicorn
|
||||
python-dotenv
|
||||
fastapi
|
||||
aiohttp
|
||||
aiortc
|
||||
169
docs/api/conf.py
169
docs/api/conf.py
@@ -26,18 +26,20 @@ extensions = [
|
||||
"sphinx.ext.intersphinx",
|
||||
]
|
||||
|
||||
suppress_warnings = [
|
||||
"autodoc.mocked_object",
|
||||
]
|
||||
|
||||
# Napoleon settings
|
||||
napoleon_google_docstring = True
|
||||
napoleon_numpy_docstring = False
|
||||
napoleon_include_init_with_doc = False
|
||||
napoleon_include_init_with_doc = True
|
||||
|
||||
# AutoDoc settings
|
||||
autodoc_default_options = {
|
||||
"members": True,
|
||||
"member-order": "bysource",
|
||||
"undoc-members": True,
|
||||
"exclude-members": "__weakref__,__init__",
|
||||
"no-index": True,
|
||||
"undoc-members": False,
|
||||
"exclude-members": "__weakref__,model_config",
|
||||
"show-inheritance": True,
|
||||
}
|
||||
|
||||
@@ -72,20 +74,16 @@ autodoc_mock_imports = [
|
||||
"langchain",
|
||||
"lmnt",
|
||||
"noisereduce",
|
||||
"openai",
|
||||
"openpipe",
|
||||
"simli",
|
||||
"soundfile",
|
||||
"soniox",
|
||||
"pipecat_ai_krisp",
|
||||
"pyaudio",
|
||||
"_tkinter",
|
||||
"tkinter",
|
||||
"daily",
|
||||
"daily_python",
|
||||
"pydantic.BaseModel",
|
||||
"pydantic.Field",
|
||||
"pydantic._internal._model_construction",
|
||||
"pydantic._internal._fields",
|
||||
# Moondream dependencies
|
||||
"torch",
|
||||
"transformers",
|
||||
@@ -168,85 +166,54 @@ autodoc_mock_imports = [
|
||||
"mcp.client.stdio",
|
||||
"mcp.ClientSession",
|
||||
"mcp.StdioServerParameters",
|
||||
# gstreamer
|
||||
"gi",
|
||||
"gi.require_version",
|
||||
"gi.repository",
|
||||
# Protobuf mocks
|
||||
"pipecat.frames.protobufs.frames_pb2",
|
||||
"pipecat.serializers.protobuf",
|
||||
"google.protobuf",
|
||||
"google.protobuf.descriptor",
|
||||
"google.protobuf.descriptor_pool",
|
||||
"google.protobuf.runtime_version",
|
||||
"google.protobuf.symbol_database",
|
||||
"google.protobuf.internal.builder",
|
||||
]
|
||||
|
||||
# HTML output settings
|
||||
html_theme = "sphinx_rtd_theme"
|
||||
html_static_path = ["_static"]
|
||||
autodoc_typehints = "description"
|
||||
autodoc_typehints = "signature" # Show type hints in the signature only, not in the docstring
|
||||
html_show_sphinx = False
|
||||
|
||||
|
||||
def verify_modules():
|
||||
"""Verify that required modules are available."""
|
||||
required_modules = {
|
||||
"services": [
|
||||
"assemblyai",
|
||||
"aws",
|
||||
"cartesia",
|
||||
"deepgram",
|
||||
"google",
|
||||
"lmnt",
|
||||
"riva",
|
||||
"simli",
|
||||
],
|
||||
"serializers": ["livekit"],
|
||||
"vad": ["silero", "vad_analyzer"],
|
||||
"transports": {
|
||||
"services": ["daily", "livekit"],
|
||||
"local": ["audio", "tk"],
|
||||
"network": ["fastapi_websocket", "websocket_server"],
|
||||
},
|
||||
}
|
||||
def import_core_modules():
|
||||
"""Import core pipecat modules for autodoc to discover."""
|
||||
core_modules = [
|
||||
"pipecat",
|
||||
"pipecat.frames",
|
||||
"pipecat.pipeline",
|
||||
"pipecat.processors",
|
||||
"pipecat.services",
|
||||
"pipecat.transports",
|
||||
"pipecat.audio",
|
||||
"pipecat.adapters",
|
||||
"pipecat.clocks",
|
||||
"pipecat.metrics",
|
||||
"pipecat.observers",
|
||||
"pipecat.serializers",
|
||||
"pipecat.sync",
|
||||
"pipecat.transcriptions",
|
||||
"pipecat.utils",
|
||||
]
|
||||
|
||||
# Skip importing modules that are in autodoc_mock_imports
|
||||
skipped_modules = set(autodoc_mock_imports)
|
||||
|
||||
missing = []
|
||||
for category, modules in required_modules.items():
|
||||
if isinstance(modules, dict):
|
||||
# Handle nested structure
|
||||
for subcategory, submodules in modules.items():
|
||||
for module in submodules:
|
||||
# Check if module is in autodoc_mock_imports
|
||||
if (
|
||||
f"pipecat.{category}.{subcategory}.{module}" in skipped_modules
|
||||
or module in skipped_modules
|
||||
):
|
||||
logger.info(
|
||||
f"Skipping import of mocked module: pipecat.{category}.{subcategory}.{module}"
|
||||
)
|
||||
continue
|
||||
|
||||
try:
|
||||
__import__(f"pipecat.{category}.{subcategory}.{module}")
|
||||
logger.info(
|
||||
f"Successfully imported pipecat.{category}.{subcategory}.{module}"
|
||||
)
|
||||
except (ImportError, TypeError, NameError) as e:
|
||||
missing.append(f"pipecat.{category}.{subcategory}.{module}")
|
||||
logger.warning(
|
||||
f"Optional module not available: pipecat.{category}.{subcategory}.{module} - {str(e)}"
|
||||
)
|
||||
else:
|
||||
# Handle flat structure
|
||||
for module in modules:
|
||||
# Check if module is in autodoc_mock_imports
|
||||
if f"pipecat.{category}.{module}" in skipped_modules or module in skipped_modules:
|
||||
logger.info(f"Skipping import of mocked module: pipecat.{category}.{module}")
|
||||
continue
|
||||
|
||||
try:
|
||||
__import__(f"pipecat.{category}.{module}")
|
||||
logger.info(f"Successfully imported pipecat.{category}.{module}")
|
||||
except (ImportError, TypeError, NameError) as e:
|
||||
missing.append(f"pipecat.{category}.{module}")
|
||||
logger.warning(
|
||||
f"Optional module not available: pipecat.{category}.{module} - {str(e)}"
|
||||
)
|
||||
|
||||
if missing:
|
||||
logger.warning(f"Some optional modules are not available: {missing}")
|
||||
for module_name in core_modules:
|
||||
try:
|
||||
__import__(module_name)
|
||||
logger.info(f"Successfully imported {module_name}")
|
||||
except ImportError as e:
|
||||
logger.warning(f"Failed to import {module_name}: {e}")
|
||||
|
||||
|
||||
def clean_title(title: str) -> str:
|
||||
@@ -258,39 +225,7 @@ def clean_title(title: str) -> str:
|
||||
parts = title.split(".")
|
||||
title = parts[-1]
|
||||
|
||||
# Special cases for service names and common acronyms
|
||||
special_cases = {
|
||||
"ai": "AI",
|
||||
"aws": "AWS",
|
||||
"api": "API",
|
||||
"vad": "VAD",
|
||||
"assemblyai": "AssemblyAI",
|
||||
"deepgram": "Deepgram",
|
||||
"elevenlabs": "ElevenLabs",
|
||||
"openai": "OpenAI",
|
||||
"openpipe": "OpenPipe",
|
||||
"playht": "PlayHT",
|
||||
"xtts": "XTTS",
|
||||
"lmnt": "LMNT",
|
||||
"stt": "STT",
|
||||
"tts": "TTS",
|
||||
"llm": "LLM",
|
||||
}
|
||||
|
||||
# Check if the entire title is a special case
|
||||
if title.lower() in special_cases:
|
||||
return special_cases[title.lower()]
|
||||
|
||||
# Otherwise, capitalize each word
|
||||
words = title.split("_")
|
||||
cleaned_words = []
|
||||
for word in words:
|
||||
if word.lower() in special_cases:
|
||||
cleaned_words.append(special_cases[word.lower()])
|
||||
else:
|
||||
cleaned_words.append(word.capitalize())
|
||||
|
||||
return " ".join(cleaned_words)
|
||||
return title
|
||||
|
||||
|
||||
def setup(app):
|
||||
@@ -315,9 +250,8 @@ def setup(app):
|
||||
|
||||
excludes = [
|
||||
str(project_root / "src/pipecat/pipeline/to_be_updated"),
|
||||
str(project_root / "src/pipecat/processors/gstreamer"),
|
||||
str(project_root / "src/pipecat/services/to_be_updated"),
|
||||
str(project_root / "src/pipecat/vad"), # deprecated
|
||||
str(project_root / "src/pipecat/examples"),
|
||||
str(project_root / "src/pipecat/tests"),
|
||||
"**/test_*.py",
|
||||
"**/tests/*.py",
|
||||
]
|
||||
@@ -358,5 +292,4 @@ def setup(app):
|
||||
logger.error(f"Error generating API documentation: {e}", exc_info=True)
|
||||
|
||||
|
||||
# Run module verification
|
||||
verify_modules()
|
||||
import_core_modules()
|
||||
|
||||
@@ -1,57 +1,17 @@
|
||||
Pipecat API Reference Docs
|
||||
==========================
|
||||
Pipecat API Reference
|
||||
=====================
|
||||
|
||||
Welcome to Pipecat's API reference documentation!
|
||||
Welcome to the Pipecat API reference.
|
||||
|
||||
Pipecat is an open source framework for building voice and multimodal assistants.
|
||||
It provides a flexible pipeline architecture for connecting various AI services,
|
||||
audio processing, and transport layers.
|
||||
Use the navigation on the left to browse modules, or search using the search box.
|
||||
|
||||
**New to Pipecat?** Check out the `main documentation <https://docs.pipecat.ai>`_ for tutorials, guides, and client SDK information.
|
||||
|
||||
Quick Links
|
||||
-----------
|
||||
|
||||
* `GitHub Repository <https://github.com/pipecat-ai/pipecat>`_
|
||||
* `Website <https://pipecat.ai>`_
|
||||
|
||||
API Reference
|
||||
-------------
|
||||
|
||||
Core Components
|
||||
~~~~~~~~~~~~~~~
|
||||
|
||||
* :mod:`Frames <pipecat.frames>`
|
||||
* :mod:`Processors <pipecat.processors>`
|
||||
* :mod:`Pipeline <pipecat.pipeline>`
|
||||
|
||||
Audio Processing
|
||||
~~~~~~~~~~~~~~~~
|
||||
|
||||
* :mod:`Audio <pipecat.audio>`
|
||||
|
||||
Services
|
||||
~~~~~~~~
|
||||
|
||||
* :mod:`Services <pipecat.services>`
|
||||
|
||||
Transport & Serialization
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
* :mod:`Transports <pipecat.transports>`
|
||||
* :mod:`Local <pipecat.transports.local>`
|
||||
* :mod:`Network <pipecat.transports.network>`
|
||||
* :mod:`Services <pipecat.transports.services>`
|
||||
* :mod:`Serializers <pipecat.serializers>`
|
||||
|
||||
Utilities
|
||||
~~~~~~~~~
|
||||
|
||||
* :mod:`Adapters <pipecat.adapters>`
|
||||
* :mod:`Clocks <pipecat.clocks>`
|
||||
* :mod:`Metrics <pipecat.metrics>`
|
||||
* :mod:`Observers <pipecat.observers>`
|
||||
* :mod:`Sync <pipecat.sync>`
|
||||
* :mod:`Transcriptions <pipecat.transcriptions>`
|
||||
* :mod:`Utils <pipecat.utils>`
|
||||
* `Join our Community <https://discord.gg/pipecat>`_
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 3
|
||||
@@ -71,11 +31,4 @@ Utilities
|
||||
Sync <api/pipecat.sync>
|
||||
Transcriptions <api/pipecat.transcriptions>
|
||||
Transports <api/pipecat.transports>
|
||||
Utils <api/pipecat.utils>
|
||||
|
||||
Indices and tables
|
||||
==================
|
||||
|
||||
* :ref:`genindex`
|
||||
* :ref:`modindex`
|
||||
* :ref:`search`
|
||||
Utils <api/pipecat.utils>
|
||||
@@ -46,6 +46,8 @@ pipecat-ai[sambanova]
|
||||
pipecat-ai[silero]
|
||||
pipecat-ai[simli]
|
||||
pipecat-ai[soundfile]
|
||||
pipecat-ai[soniox]
|
||||
pipecat-ai[speechmatics]
|
||||
pipecat-ai[tavus]
|
||||
pipecat-ai[together]
|
||||
# pipecat-ai[ultravox] # Mocked
|
||||
|
||||
@@ -109,6 +109,13 @@ MINIMAX_GROUP_ID=...
|
||||
# Sarvam AI
|
||||
SARVAM_API_KEY=...
|
||||
|
||||
# Soniox
|
||||
SONIOX_API_KEY=
|
||||
|
||||
# Speechmatics
|
||||
SPEECHMATICS_API_KEY=...
|
||||
|
||||
|
||||
# SambaNova
|
||||
SAMBANOVA_API_KEY=...
|
||||
|
||||
|
||||
60
examples/aws-strands/README.md
Normal file
60
examples/aws-strands/README.md
Normal file
@@ -0,0 +1,60 @@
|
||||
# AWS Strands Examples
|
||||
|
||||
This folder contains two Python examples demonstrating how to use Pipecat with the AWS Strands agent.
|
||||
|
||||
## Overview
|
||||
|
||||
These examples show how to delegate complex, multi-step tasks to a Strands agent, which can reason step-by-step and call tools to accomplish user requests.
|
||||
|
||||
These examples are intentionally simplified for demonstration, using mock API calls. They work best if you ask it:
|
||||
|
||||
> What's the weather where the Golden Gate Bridge is?
|
||||
|
||||
## Example Scripts
|
||||
|
||||
### `black-box.py`
|
||||
|
||||
A minimal example that demonstrates how to use the Strands agent with Pipecat. The agent can handle multi-step queries by calling tools, but does not explain its reasoning out loud.
|
||||
|
||||
### `explain-thinking.py`
|
||||
|
||||
An enhanced example where the Strands agent explains each step of its reasoning in clear, simple language as it works through a multi-step task.
|
||||
|
||||
## Quick Start
|
||||
|
||||
1. **Clone the repository and navigate to this example:**
|
||||
|
||||
```bash
|
||||
git clone https://github.com/pipecat-ai/pipecat.git
|
||||
cd pipecat/examples/aws-strands
|
||||
```
|
||||
|
||||
2. **Set up a virtual environment:**
|
||||
|
||||
```bash
|
||||
python -m venv venv
|
||||
source venv/bin/activate # On Windows: venv\Scripts\activate
|
||||
```
|
||||
|
||||
3. **Install dependencies:**
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
4. **Configure environment variables:**
|
||||
|
||||
Copy the provided `env.example` file to `.env` and fill in the necessary credentials:
|
||||
|
||||
```bash
|
||||
cp env.example .env
|
||||
# Then edit .env with your preferred editor
|
||||
```
|
||||
|
||||
5. **Run an example:**
|
||||
|
||||
```bash
|
||||
python black-box.py
|
||||
# or
|
||||
python explain-thinking.py
|
||||
```
|
||||
206
examples/aws-strands/black-box.py
Normal file
206
examples/aws-strands/black-box.py
Normal file
@@ -0,0 +1,206 @@
|
||||
#
|
||||
# Copyright (c) 2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from strands import Agent, tool
|
||||
from strands.models import BedrockModel
|
||||
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.llm_service import FunctionCallParams
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
"""This example demonstrates how to use the Strands agent with Pipecat.
|
||||
|
||||
You can delegate complex, multi-step tasks to the Strands agent, which can cycle through LLM-based reasoning and tool calls to accomplish the task.
|
||||
|
||||
Try asking: "What's the weather where the Golden Gate Bridge is?"
|
||||
"""
|
||||
|
||||
# Strands agent tools
|
||||
|
||||
|
||||
@tool
|
||||
def get_location_name_from_landmark(landmark: str) -> str:
|
||||
"""
|
||||
Get the location name from a landmark.
|
||||
|
||||
Args:
|
||||
landmark (str): The name of the landmark, e.g. "Golden Gate Bridge".
|
||||
"""
|
||||
# Simulate fetching location
|
||||
return "San Francisco, CA"
|
||||
|
||||
|
||||
@tool
|
||||
def get_lat_long_from_location_name(location: str) -> dict:
|
||||
"""
|
||||
Get the latitude and longitude for a location name.
|
||||
|
||||
Args:
|
||||
location (str): The city and state, e.g. "San Francisco, CA".
|
||||
"""
|
||||
# Simulate fetching lat/long from a geocoding service
|
||||
return {"lat": 37.7749, "long": -122.4194}
|
||||
|
||||
|
||||
@tool
|
||||
def get_current_weather_from_lat_long(lat: float, long: float) -> dict:
|
||||
"""
|
||||
Get the current weather for a specific latitude and longitude.
|
||||
|
||||
Args:
|
||||
lat (float): The latitude of the location.
|
||||
long (float): The longitude of the location.
|
||||
"""
|
||||
# Simulate fetching weather data from a weather service
|
||||
return {"conditions": "nice", "temperature": "75"}
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
strands_agent = Agent(
|
||||
model=BedrockModel(
|
||||
model_id="us.anthropic.claude-3-7-sonnet-20250219-v1:0", max_tokens=64000
|
||||
),
|
||||
tools=[
|
||||
get_location_name_from_landmark,
|
||||
get_lat_long_from_location_name,
|
||||
get_current_weather_from_lat_long,
|
||||
],
|
||||
system_prompt="""
|
||||
You are a helpful personal assistant who can look up information about places and weather.
|
||||
|
||||
Your key capabilities:
|
||||
1. Look up where landmarks are located.
|
||||
2. Find latitude and longitude for a location.
|
||||
3. Look up the current weather for a specific latitude and longitude.
|
||||
|
||||
Explain each step of your reasoning in clear, simple, and concise language. Your responses will be converted to audio, so avoid special characters and numbered lists.
|
||||
""",
|
||||
)
|
||||
|
||||
async def handle_location_or_weather_related_queries(params: FunctionCallParams, query: str):
|
||||
"""
|
||||
Handle location or weather related queries.
|
||||
|
||||
Args:
|
||||
query (str): The user's query, e.g. "What's the weather where the Golden Gate Bridge is?".
|
||||
"""
|
||||
# Run in a background thread
|
||||
# (Otherwise the agent blocks the event loop; one effect of that is that we don't hear
|
||||
# "let me check on that" until the agent finishes)
|
||||
loop = asyncio.get_running_loop()
|
||||
result = await loop.run_in_executor(None, strands_agent, query)
|
||||
await params.result_callback(result.message)
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
llm.register_direct_function(handle_location_or_weather_related_queries)
|
||||
|
||||
@llm.event_handler("on_function_calls_started")
|
||||
async def on_function_calls_started(service, function_calls):
|
||||
await tts.queue_frame(TTSSpeakFrame("Let me check on that."))
|
||||
|
||||
tools = ToolsSchema(standard_tools=[handle_location_or_weather_related_queries])
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way. Start by suggesting that the user ask about the weather where the Golden Gate Bridge is.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
8
examples/aws-strands/env.example
Normal file
8
examples/aws-strands/env.example
Normal file
@@ -0,0 +1,8 @@
|
||||
OPENAI_API_KEY=
|
||||
CARTESIA_API_KEY=
|
||||
DEEPGRAM_API_KEY=
|
||||
DAILY_API_KEY=
|
||||
DAILY_SAMPLE_ROOM_URL=
|
||||
AWS_SECRET_ACCESS_KEY=
|
||||
AWS_ACCESS_KEY_ID=
|
||||
AWS_REGION=
|
||||
249
examples/aws-strands/explain-thinking.py
Normal file
249
examples/aws-strands/explain-thinking.py
Normal file
@@ -0,0 +1,249 @@
|
||||
#
|
||||
# Copyright (c) 2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from strands import Agent, tool
|
||||
from strands.models import BedrockModel
|
||||
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.llm_service import FunctionCallParams
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
"""This example demonstrates how to use the Strands agent with Pipecat in a way where the agent explains its reasoning step-by-step.
|
||||
|
||||
You can delegate complex, multi-step tasks to the Strands agent, which can cycle through LLM-based reasoning and tool calls to accomplish the task.
|
||||
|
||||
Try asking: "What's the weather where the Golden Gate Bridge is?"
|
||||
"""
|
||||
|
||||
|
||||
# Strands agent tools
|
||||
|
||||
|
||||
@tool
|
||||
def get_location_name_from_landmark(landmark: str) -> str:
|
||||
"""
|
||||
Get the location name from a landmark.
|
||||
|
||||
Args:
|
||||
landmark (str): The name of the landmark, e.g. "Golden Gate Bridge".
|
||||
"""
|
||||
# Simulate fetching location (slowly)
|
||||
time.sleep(3)
|
||||
return "San Francisco, CA"
|
||||
|
||||
|
||||
@tool
|
||||
def get_lat_long_from_location_name(location: str) -> dict:
|
||||
"""
|
||||
Get the latitude and longitude for a location name.
|
||||
|
||||
Args:
|
||||
location (str): The city and state, e.g. "San Francisco, CA".
|
||||
"""
|
||||
# Simulate fetching lat/long from a geocoding service (slowly)
|
||||
time.sleep(3)
|
||||
return {"lat": 37.7749, "long": -122.4194}
|
||||
|
||||
|
||||
@tool
|
||||
def get_current_weather_from_lat_long(lat: float, long: float) -> dict:
|
||||
"""
|
||||
Get the current weather for a specific latitude and longitude.
|
||||
|
||||
Args:
|
||||
lat (float): The latitude of the location.
|
||||
long (float): The longitude of the location.
|
||||
"""
|
||||
# Simulate fetching weather data from a weather service (slowly)
|
||||
time.sleep(3)
|
||||
return {"conditions": "nice", "temperature": "75"}
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
next_strands_message_is_last = False
|
||||
strands_messages_queue = asyncio.Queue()
|
||||
|
||||
def strands_callback_handler(**kwargs):
|
||||
"""
|
||||
Handle events from the Strands agent.
|
||||
"""
|
||||
nonlocal next_strands_message_is_last
|
||||
if "event" in kwargs:
|
||||
event_obj = kwargs["event"]
|
||||
if event_obj and "messageStop" in event_obj:
|
||||
message_stop = event_obj["messageStop"]
|
||||
if message_stop and "stopReason" in message_stop:
|
||||
stop_reason = message_stop["stopReason"]
|
||||
if stop_reason == "end_turn":
|
||||
next_strands_message_is_last = True
|
||||
elif "message" in kwargs:
|
||||
message_obj = kwargs["message"]
|
||||
if message_obj and "content" in message_obj and "role" in message_obj:
|
||||
role = message_obj["role"]
|
||||
content = message_obj["content"]
|
||||
if role == "assistant" and isinstance(content, list):
|
||||
for content_obj in content:
|
||||
if isinstance(content_obj, dict) and "text" in content_obj:
|
||||
message = content_obj["text"]
|
||||
if not next_strands_message_is_last:
|
||||
strands_messages_queue.put_nowait(message)
|
||||
|
||||
async def process_strands_messages():
|
||||
while True:
|
||||
message = await strands_messages_queue.get()
|
||||
await tts.queue_frame(TTSSpeakFrame(message))
|
||||
strands_messages_queue.task_done()
|
||||
|
||||
asyncio.create_task(process_strands_messages())
|
||||
|
||||
strands_agent = Agent(
|
||||
model=BedrockModel(
|
||||
model_id="us.anthropic.claude-3-7-sonnet-20250219-v1:0", max_tokens=64000
|
||||
),
|
||||
tools=[
|
||||
get_location_name_from_landmark,
|
||||
get_lat_long_from_location_name,
|
||||
get_current_weather_from_lat_long,
|
||||
],
|
||||
system_prompt="""
|
||||
You are a helpful personal assistant who can look up information about places and weather.
|
||||
|
||||
Your key capabilities:
|
||||
1. Look up where landmarks are located.
|
||||
2. Find latitude and longitude for a location.
|
||||
3. Look up the current weather for a specific latitude and longitude.
|
||||
|
||||
Explain each step of your reasoning in clear, simple, and concise language. Your responses will be converted to audio, so avoid special characters and numbered lists.
|
||||
""",
|
||||
callback_handler=strands_callback_handler,
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
async def handle_location_or_weather_related_queries(params: FunctionCallParams, query: str):
|
||||
"""
|
||||
Handle location or weather related queries.
|
||||
|
||||
Args:
|
||||
query (str): The user's query, e.g. "What's the weather where the Golden Gate Bridge is?".
|
||||
"""
|
||||
# Run in a background thread
|
||||
# (Otherwise the agent blocks the event loop; one effect of that is that we don't hear
|
||||
# the agent's "thinking" messages until the agent finishes)
|
||||
loop = asyncio.get_running_loop()
|
||||
result = await loop.run_in_executor(None, strands_agent, query)
|
||||
await params.result_callback(result.message)
|
||||
|
||||
llm.register_direct_function(handle_location_or_weather_related_queries)
|
||||
|
||||
@llm.event_handler("on_function_calls_started")
|
||||
async def on_function_calls_started(service, function_calls):
|
||||
await tts.queue_frame(TTSSpeakFrame("Let me check on that."))
|
||||
|
||||
tools = ToolsSchema(standard_tools=[handle_location_or_weather_related_queries])
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way. Start by suggesting that the user ask about the weather where the Golden Gate Bridge is.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
@@ -2,4 +2,5 @@ fastapi
|
||||
uvicorn
|
||||
python-dotenv
|
||||
pipecat-ai[webrtc,daily,deepgram,cartesia]
|
||||
pipecat-ai-small-webrtc-prebuilt
|
||||
pipecat-ai-small-webrtc-prebuilt
|
||||
strands-agents
|
||||
@@ -4364,9 +4364,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/brace-expansion": {
|
||||
"version": "1.1.11",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
|
||||
"integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
|
||||
"version": "1.1.12",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
|
||||
"integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0",
|
||||
"concat-map": "0.0.1"
|
||||
@@ -6081,9 +6081,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/glob/node_modules/brace-expansion": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
|
||||
"integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
|
||||
"version": "2.0.2",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz",
|
||||
"integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==",
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0"
|
||||
}
|
||||
|
||||
@@ -2,4 +2,4 @@ aiofiles
|
||||
python-dotenv
|
||||
fastapi[all]
|
||||
uvicorn
|
||||
pipecat-ai[daily,deepgram,openai,silero,cartesia]
|
||||
pipecat-ai[daily,deepgram,openai,silero,cartesia,soundfile]
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -15,7 +15,7 @@
|
||||
"vite": "^6.3.5"
|
||||
},
|
||||
"dependencies": {
|
||||
"@pipecat-ai/client-js": "^0.3.5",
|
||||
"@pipecat-ai/daily-transport": "^0.3.10"
|
||||
"@pipecat-ai/client-js": "^1.0.0",
|
||||
"@pipecat-ai/daily-transport": "^1.0.0"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
*/
|
||||
|
||||
/**
|
||||
* RTVI Client Implementation
|
||||
* Pipecat Client Implementation
|
||||
*
|
||||
* This client connects to an RTVI-compatible bot server using WebRTC (via Daily).
|
||||
* It handles audio/video streaming and manages the connection lifecycle.
|
||||
@@ -16,7 +16,7 @@
|
||||
* - Browser with WebRTC support
|
||||
*/
|
||||
|
||||
import { RTVIClient, RTVIEvent } from '@pipecat-ai/client-js';
|
||||
import { PipecatClient, RTVIEvent } from '@pipecat-ai/client-js';
|
||||
import { DailyTransport } from '@pipecat-ai/daily-transport';
|
||||
|
||||
/**
|
||||
@@ -26,7 +26,7 @@ import { DailyTransport } from '@pipecat-ai/daily-transport';
|
||||
class ChatbotClient {
|
||||
constructor() {
|
||||
// Initialize client state
|
||||
this.rtviClient = null;
|
||||
this.pcClient = null;
|
||||
this.setupDOMElements();
|
||||
this.initializeClientAndTransport();
|
||||
this.setupEventListeners();
|
||||
@@ -59,7 +59,7 @@ class ChatbotClient {
|
||||
this.disconnectBtn.addEventListener('click', () => this.disconnect());
|
||||
|
||||
// Populate device selector
|
||||
this.rtviClient.getAllMics().then((mics) => {
|
||||
this.pcClient.getAllMics().then((mics) => {
|
||||
console.log('Available mics:', mics);
|
||||
mics.forEach((device) => {
|
||||
const option = document.createElement('option');
|
||||
@@ -71,16 +71,16 @@ class ChatbotClient {
|
||||
this.deviceSelector.addEventListener('change', (event) => {
|
||||
const selectedDeviceId = event.target.value;
|
||||
console.log('Selected device ID:', selectedDeviceId);
|
||||
this.rtviClient.updateMic(selectedDeviceId);
|
||||
this.pcClient.updateMic(selectedDeviceId);
|
||||
});
|
||||
|
||||
// Handle mic mute/unmute toggle
|
||||
const micToggleBtn = document.getElementById('mic-toggle-btn');
|
||||
|
||||
micToggleBtn.addEventListener('click', () => {
|
||||
let micEnabled = this.rtviClient.isMicEnabled;
|
||||
let micEnabled = this.pcClient.isMicEnabled;
|
||||
micToggleBtn.textContent = micEnabled ? 'Unmute Mic' : 'Mute Mic';
|
||||
this.rtviClient.enableMic(!micEnabled);
|
||||
this.pcClient.enableMic(!micEnabled);
|
||||
// Add logic to mute/unmute the mic
|
||||
if (micEnabled) {
|
||||
console.log('Mic muted');
|
||||
@@ -93,23 +93,12 @@ class ChatbotClient {
|
||||
}
|
||||
|
||||
/**
|
||||
* Set up the RTVI client and Daily transport
|
||||
* Set up the Pipecat client and Daily transport
|
||||
*/
|
||||
async initializeClientAndTransport() {
|
||||
// Initialize the RTVI client with a DailyTransport and our configuration
|
||||
this.rtviClient = new RTVIClient({
|
||||
// Initialize the Pipecat client with a DailyTransport and our configuration
|
||||
this.pcClient = new PipecatClient({
|
||||
transport: new DailyTransport(),
|
||||
params: {
|
||||
// REPLACE WITH YOUR MODAL URL ENDPOINT
|
||||
baseUrl:
|
||||
'https://<Modal workspace>--pipecat-modal-bot-launcher.modal.run',
|
||||
endpoints: {
|
||||
connect: '/connect',
|
||||
},
|
||||
requestData: {
|
||||
bot_name: 'openai',
|
||||
},
|
||||
},
|
||||
enableMic: true, // Enable microphone for user input
|
||||
enableCam: false,
|
||||
callbacks: {
|
||||
@@ -176,8 +165,8 @@ class ChatbotClient {
|
||||
// Set up listeners for media track events
|
||||
this.setupTrackListeners();
|
||||
|
||||
await this.rtviClient.initDevices();
|
||||
window.client = this.rtviClient;
|
||||
await this.pcClient.initDevices();
|
||||
window.client = this.pcClient;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -212,10 +201,10 @@ class ChatbotClient {
|
||||
* This is called when the bot is ready or when the transport state changes to ready
|
||||
*/
|
||||
setupMediaTracks() {
|
||||
if (!this.rtviClient) return;
|
||||
if (!this.pcClient) return;
|
||||
|
||||
// Get current tracks from the client
|
||||
const tracks = this.rtviClient.tracks();
|
||||
const tracks = this.pcClient.tracks();
|
||||
|
||||
// Set up any available bot tracks
|
||||
if (tracks.bot?.audio) {
|
||||
@@ -231,10 +220,10 @@ class ChatbotClient {
|
||||
* This handles new tracks being added during the session
|
||||
*/
|
||||
setupTrackListeners() {
|
||||
if (!this.rtviClient) return;
|
||||
if (!this.pcClient) return;
|
||||
|
||||
// Listen for new tracks starting
|
||||
this.rtviClient.on(RTVIEvent.TrackStarted, (track, participant) => {
|
||||
this.pcClient.on(RTVIEvent.TrackStarted, (track, participant) => {
|
||||
// Only handle non-local (bot) tracks
|
||||
if (!participant?.local) {
|
||||
if (track.kind === 'audio') {
|
||||
@@ -253,7 +242,7 @@ class ChatbotClient {
|
||||
});
|
||||
|
||||
// Listen for tracks stopping
|
||||
this.rtviClient.on(RTVIEvent.TrackStopped, (track, participant) => {
|
||||
this.pcClient.on(RTVIEvent.TrackStopped, (track, participant) => {
|
||||
if (participant.local) {
|
||||
this.log('Local mic muted');
|
||||
return;
|
||||
@@ -311,21 +300,27 @@ class ChatbotClient {
|
||||
|
||||
/**
|
||||
* Initialize and connect to the bot
|
||||
* This sets up the RTVI client, initializes devices, and establishes the connection
|
||||
* This sets up the Pipecat client, initializes devices, and establishes the connection
|
||||
*/
|
||||
async connect() {
|
||||
try {
|
||||
const botSelector = document.getElementById('bot-selector');
|
||||
const selectedBot = botSelector.value;
|
||||
this.rtviClient.params.requestData.bot_name = selectedBot;
|
||||
|
||||
// Initialize audio/video devices
|
||||
this.log('Initializing devices...');
|
||||
await this.rtviClient.initDevices();
|
||||
await this.pcClient.initDevices();
|
||||
|
||||
// Connect to the bot
|
||||
this.log(`Connecting to bot: ${selectedBot}`);
|
||||
await this.rtviClient.connect();
|
||||
await this.pcClient.connect({
|
||||
// REPLACE WITH YOUR MODAL URL ENDPOINT
|
||||
endpoint:
|
||||
'https://<your-workspace>--pipecat-modal-fastapi-app.modal.run/connect',
|
||||
requestData: {
|
||||
bot_name: selectedBot,
|
||||
},
|
||||
});
|
||||
|
||||
this.log('Connection complete');
|
||||
} catch (error) {
|
||||
@@ -336,9 +331,9 @@ class ChatbotClient {
|
||||
this.updateStatus('Error');
|
||||
|
||||
// Clean up if there's an error
|
||||
if (this.rtviClient) {
|
||||
if (this.pcClient) {
|
||||
try {
|
||||
await this.rtviClient.disconnect();
|
||||
await this.pcClient.disconnect();
|
||||
} catch (disconnectError) {
|
||||
this.log(`Error during disconnect: ${disconnectError.message}`);
|
||||
}
|
||||
@@ -350,10 +345,10 @@ class ChatbotClient {
|
||||
* Disconnect from the bot and clean up media resources
|
||||
*/
|
||||
async disconnect() {
|
||||
if (this.rtviClient) {
|
||||
if (this.pcClient) {
|
||||
try {
|
||||
// Disconnect the RTVI client
|
||||
await this.rtviClient.disconnect();
|
||||
// Disconnect the Pipecat client
|
||||
await this.pcClient.disconnect();
|
||||
|
||||
// Clean up audio
|
||||
if (this.botAudio.srcObject) {
|
||||
|
||||
@@ -301,7 +301,7 @@ def fastapi_app():
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# Include the endpoints from endpoints.py
|
||||
# Include the endpoints from this file
|
||||
web_app.include_router(router)
|
||||
|
||||
return web_app
|
||||
|
||||
@@ -1,2 +1,3 @@
|
||||
python-dotenv==1.0.1
|
||||
modal==0.71.3
|
||||
modal==1.0.5
|
||||
fastapi[all]
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
"name": "my-daily-app",
|
||||
"version": "0.1.0",
|
||||
"dependencies": {
|
||||
"axios": "^1.6.0",
|
||||
"axios": "^1.11.0",
|
||||
"next": "^14.0.0",
|
||||
"pino": "^8.15.0",
|
||||
"react": "^18.2.0",
|
||||
@@ -215,10 +215,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/env": {
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/env/-/env-14.2.26.tgz",
|
||||
"integrity": "sha512-vO//GJ/YBco+H7xdQhzJxF7ub3SUwft76jwaeOyVVQFHCi5DCnkP16WHB+JBylo4vOKPoZBlR94Z8xBxNBdNJA==",
|
||||
"license": "MIT"
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/env/-/env-14.2.30.tgz",
|
||||
"integrity": "sha512-KBiBKrDY6kxTQWGzKjQB7QirL3PiiOkV7KW98leHFjtVRKtft76Ra5qSA/SL75xT44dp6hOcqiiJ6iievLOYug=="
|
||||
},
|
||||
"node_modules/@next/eslint-plugin-next": {
|
||||
"version": "14.2.25",
|
||||
@@ -231,13 +230,12 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-darwin-arm64": {
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-14.2.26.tgz",
|
||||
"integrity": "sha512-zDJY8gsKEseGAxG+C2hTMT0w9Nk9N1Sk1qV7vXYz9MEiyRoF5ogQX2+vplyUMIfygnjn9/A04I6yrUTRTuRiyQ==",
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-14.2.30.tgz",
|
||||
"integrity": "sha512-EAqfOTb3bTGh9+ewpO/jC59uACadRHM6TSA9DdxJB/6gxOpyV+zrbqeXiFTDy9uV6bmipFDkfpAskeaDcO+7/g==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
@@ -247,13 +245,12 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-darwin-x64": {
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-14.2.26.tgz",
|
||||
"integrity": "sha512-U0adH5ryLfmTDkahLwG9sUQG2L0a9rYux8crQeC92rPhi3jGQEY47nByQHrVrt3prZigadwj/2HZ1LUUimuSbg==",
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-14.2.30.tgz",
|
||||
"integrity": "sha512-TyO7Wz1IKE2kGv8dwQ0bmPL3s44EKVencOqwIY69myoS3rdpO1NPg5xPM5ymKu7nfX4oYJrpMxv8G9iqLsnL4A==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
@@ -263,13 +260,12 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-arm64-gnu": {
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-14.2.26.tgz",
|
||||
"integrity": "sha512-SINMl1I7UhfHGM7SoRiw0AbwnLEMUnJ/3XXVmhyptzriHbWvPPbbm0OEVG24uUKhuS1t0nvN/DBvm5kz6ZIqpg==",
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-14.2.30.tgz",
|
||||
"integrity": "sha512-I5lg1fgPJ7I5dk6mr3qCH1hJYKJu1FsfKSiTKoYwcuUf53HWTrEkwmMI0t5ojFKeA6Vu+SfT2zVy5NS0QLXV4Q==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
@@ -279,13 +275,12 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-arm64-musl": {
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-14.2.26.tgz",
|
||||
"integrity": "sha512-s6JaezoyJK2DxrwHWxLWtJKlqKqTdi/zaYigDXUJ/gmx/72CrzdVZfMvUc6VqnZ7YEvRijvYo+0o4Z9DencduA==",
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-14.2.30.tgz",
|
||||
"integrity": "sha512-8GkNA+sLclQyxgzCDs2/2GSwBc92QLMrmYAmoP2xehe5MUKBLB2cgo34Yu242L1siSkwQkiV4YLdCnjwc/Micw==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
@@ -295,13 +290,12 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-x64-gnu": {
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-14.2.26.tgz",
|
||||
"integrity": "sha512-FEXeUQi8/pLr/XI0hKbe0tgbLmHFRhgXOUiPScz2hk0hSmbGiU8aUqVslj/6C6KA38RzXnWoJXo4FMo6aBxjzg==",
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-14.2.30.tgz",
|
||||
"integrity": "sha512-8Ly7okjssLuBoe8qaRCcjGtcMsv79hwzn/63wNeIkzJVFVX06h5S737XNr7DZwlsbTBDOyI6qbL2BJB5n6TV/w==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
@@ -311,13 +305,12 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-x64-musl": {
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-14.2.26.tgz",
|
||||
"integrity": "sha512-BUsomaO4d2DuXhXhgQCVt2jjX4B4/Thts8nDoIruEJkhE5ifeQFtvW5c9JkdOtYvE5p2G0hcwQ0UbRaQmQwaVg==",
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-14.2.30.tgz",
|
||||
"integrity": "sha512-dBmV1lLNeX4mR7uI7KNVHsGQU+OgTG5RGFPi3tBJpsKPvOPtg9poyav/BYWrB3GPQL4dW5YGGgalwZ79WukbKQ==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
@@ -327,13 +320,12 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-win32-arm64-msvc": {
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-14.2.26.tgz",
|
||||
"integrity": "sha512-5auwsMVzT7wbB2CZXQxDctpWbdEnEW/e66DyXO1DcgHxIyhP06awu+rHKshZE+lPLIGiwtjo7bsyeuubewwxMw==",
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-14.2.30.tgz",
|
||||
"integrity": "sha512-6MMHi2Qc1Gkq+4YLXAgbYslE1f9zMGBikKMdmQRHXjkGPot1JY3n5/Qrbg40Uvbi8//wYnydPnyvNhI1DMUW1g==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
@@ -343,13 +335,12 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-win32-ia32-msvc": {
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-ia32-msvc/-/swc-win32-ia32-msvc-14.2.26.tgz",
|
||||
"integrity": "sha512-GQWg/Vbz9zUGi9X80lOeGsz1rMH/MtFO/XqigDznhhhTfDlDoynCM6982mPCbSlxJ/aveZcKtTlwfAjwhyxDpg==",
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-ia32-msvc/-/swc-win32-ia32-msvc-14.2.30.tgz",
|
||||
"integrity": "sha512-pVZMnFok5qEX4RT59mK2hEVtJX+XFfak+/rjHpyFh7juiT52r177bfFKhnlafm0UOSldhXjj32b+LZIOdswGTg==",
|
||||
"cpu": [
|
||||
"ia32"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
@@ -359,13 +350,12 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-win32-x64-msvc": {
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-14.2.26.tgz",
|
||||
"integrity": "sha512-2rdB3T1/Gp7bv1eQTTm9d1Y1sv9UuJ2LAwOE0Pe2prHKe32UNscj7YS13fRB37d0GAiGNR+Y7ZcW8YjDI8Ns0w==",
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-14.2.30.tgz",
|
||||
"integrity": "sha512-4KCo8hMZXMjpTzs3HOqOGYYwAXymXIy7PEPAXNEcEOyKqkjiDlECumrWziy+JEF0Oi4ILHGxzgQ3YiMGG2t/Lg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
@@ -620,11 +610,10 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@typescript-eslint/typescript-estree/node_modules/brace-expansion": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
|
||||
"integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
|
||||
"version": "2.0.2",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz",
|
||||
"integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0"
|
||||
}
|
||||
@@ -1176,13 +1165,13 @@
|
||||
}
|
||||
},
|
||||
"node_modules/axios": {
|
||||
"version": "1.8.4",
|
||||
"resolved": "https://registry.npmjs.org/axios/-/axios-1.8.4.tgz",
|
||||
"integrity": "sha512-eBSYY4Y68NNlHbHBMdeDmKNtDgXWhQsJcGqzO3iLUM0GraQFSS9cVgPX5I9b3lbdFKyYoAEGAZF1DwhTaljNAw==",
|
||||
"version": "1.11.0",
|
||||
"resolved": "https://registry.npmjs.org/axios/-/axios-1.11.0.tgz",
|
||||
"integrity": "sha512-1Lx3WLFQWm3ooKDYZD1eXmoGO9fxYQjrycfHFC8P0sCfQVXyROp0p9PFWBehewBOdCwHc+f/b8I0fMto5eSfwA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"follow-redirects": "^1.15.6",
|
||||
"form-data": "^4.0.0",
|
||||
"form-data": "^4.0.4",
|
||||
"proxy-from-env": "^1.1.0"
|
||||
}
|
||||
},
|
||||
@@ -1224,11 +1213,10 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/brace-expansion": {
|
||||
"version": "1.1.11",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
|
||||
"integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
|
||||
"version": "1.1.12",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
|
||||
"integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0",
|
||||
"concat-map": "0.0.1"
|
||||
@@ -2448,14 +2436,15 @@
|
||||
}
|
||||
},
|
||||
"node_modules/form-data": {
|
||||
"version": "4.0.2",
|
||||
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.2.tgz",
|
||||
"integrity": "sha512-hGfm/slu0ZabnNt4oaRZ6uREyfCj6P4fT/n6A1rGV+Z0VdGXjfOhVUpkn6qVQONHGIFwmveGXyDs75+nr6FM8w==",
|
||||
"version": "4.0.4",
|
||||
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.4.tgz",
|
||||
"integrity": "sha512-KrGhL9Q4zjj0kiUt5OO4Mr/A/jlI2jDYs5eHBpYHPcBEVSiipAvn2Ko2HnPe20rmcuuvMHNdZFp+4IlGTMF0Ow==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"asynckit": "^0.4.0",
|
||||
"combined-stream": "^1.0.8",
|
||||
"es-set-tostringtag": "^2.1.0",
|
||||
"hasown": "^2.0.2",
|
||||
"mime-types": "^2.1.12"
|
||||
},
|
||||
"engines": {
|
||||
@@ -2614,11 +2603,10 @@
|
||||
}
|
||||
},
|
||||
"node_modules/glob/node_modules/brace-expansion": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
|
||||
"integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
|
||||
"version": "2.0.2",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz",
|
||||
"integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0"
|
||||
}
|
||||
@@ -3613,12 +3601,11 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/next": {
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/next/-/next-14.2.26.tgz",
|
||||
"integrity": "sha512-b81XSLihMwCfwiUVRRja3LphLo4uBBMZEzBBWMaISbKTwOmq3wPknIETy/8000tr7Gq4WmbuFYPS7jOYIf+ZJw==",
|
||||
"license": "MIT",
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/next/-/next-14.2.30.tgz",
|
||||
"integrity": "sha512-+COdu6HQrHHFQ1S/8BBsCag61jZacmvbuL2avHvQFbWa2Ox7bE+d8FyNgxRLjXQ5wtPyQwEmk85js/AuaG2Sbg==",
|
||||
"dependencies": {
|
||||
"@next/env": "14.2.26",
|
||||
"@next/env": "14.2.30",
|
||||
"@swc/helpers": "0.5.5",
|
||||
"busboy": "1.6.0",
|
||||
"caniuse-lite": "^1.0.30001579",
|
||||
@@ -3633,15 +3620,15 @@
|
||||
"node": ">=18.17.0"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@next/swc-darwin-arm64": "14.2.26",
|
||||
"@next/swc-darwin-x64": "14.2.26",
|
||||
"@next/swc-linux-arm64-gnu": "14.2.26",
|
||||
"@next/swc-linux-arm64-musl": "14.2.26",
|
||||
"@next/swc-linux-x64-gnu": "14.2.26",
|
||||
"@next/swc-linux-x64-musl": "14.2.26",
|
||||
"@next/swc-win32-arm64-msvc": "14.2.26",
|
||||
"@next/swc-win32-ia32-msvc": "14.2.26",
|
||||
"@next/swc-win32-x64-msvc": "14.2.26"
|
||||
"@next/swc-darwin-arm64": "14.2.30",
|
||||
"@next/swc-darwin-x64": "14.2.30",
|
||||
"@next/swc-linux-arm64-gnu": "14.2.30",
|
||||
"@next/swc-linux-arm64-musl": "14.2.30",
|
||||
"@next/swc-linux-x64-gnu": "14.2.30",
|
||||
"@next/swc-linux-x64-musl": "14.2.30",
|
||||
"@next/swc-win32-arm64-msvc": "14.2.30",
|
||||
"@next/swc-win32-ia32-msvc": "14.2.30",
|
||||
"@next/swc-win32-x64-msvc": "14.2.30"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@opentelemetry/api": "^1.1.0",
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
"lint": "next lint"
|
||||
},
|
||||
"dependencies": {
|
||||
"axios": "^1.6.0",
|
||||
"axios": "^1.11.0",
|
||||
"next": "^14.0.0",
|
||||
"pino": "^8.15.0",
|
||||
"react": "^18.2.0",
|
||||
|
||||
@@ -103,7 +103,7 @@ export default async function handler(req, res) {
|
||||
const sip_config = {
|
||||
display_name: From,
|
||||
sip_mode: 'dial-in',
|
||||
num_endpoints: call_transfer !== null ? 2 : 1,
|
||||
num_endpoints: (call_transfer !== undefined && call_transfer !== null) ? 2 : 1,
|
||||
codecs: {"audio": ["OPUS"]},
|
||||
};
|
||||
daily_room_properties.sip = sip_config;
|
||||
|
||||
@@ -90,7 +90,7 @@ async def main(transport: DailyTransport):
|
||||
logger.info("Participant left: {}", participant)
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner()
|
||||
runner = PipelineRunner(handle_sigint=False, force_gc=True)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
@@ -44,7 +44,7 @@ Try the hosted version of the demo here: https://pcc-smart-turn.vercel.app/.
|
||||
4. Run the server:
|
||||
|
||||
```bash
|
||||
LOCAL=1 python server.py
|
||||
LOCAL_RUN=1 python server.py
|
||||
```
|
||||
|
||||
### Run the client
|
||||
|
||||
1289
examples/fal-smart-turn/client/package-lock.json
generated
1289
examples/fal-smart-turn/client/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -9,9 +9,9 @@
|
||||
"lint": "next lint"
|
||||
},
|
||||
"dependencies": {
|
||||
"@pipecat-ai/client-js": "^0.3.5",
|
||||
"@pipecat-ai/client-react": "^0.3.5",
|
||||
"@pipecat-ai/daily-transport": "^0.3.10",
|
||||
"@pipecat-ai/client-js": "^1.0.0",
|
||||
"@pipecat-ai/client-react": "^1.0.0",
|
||||
"@pipecat-ai/daily-transport": "^1.0.0",
|
||||
"next": "15.3.1",
|
||||
"react": "^19.0.0",
|
||||
"react-dom": "^19.0.0"
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import './globals.css';
|
||||
import { RTVIProvider } from '@/providers/RTVIProvider';
|
||||
import { PipecatProvider } from '@/providers/PipecatProvider';
|
||||
|
||||
export const metadata = {
|
||||
title: 'Pipecat React Client',
|
||||
@@ -20,7 +20,7 @@ export default function RootLayout({
|
||||
<link rel="icon" href="/favicon.svg" type="image/svg+xml" />
|
||||
</head>
|
||||
<body>
|
||||
<RTVIProvider>{children}</RTVIProvider>
|
||||
<PipecatProvider>{children}</PipecatProvider>
|
||||
</body>
|
||||
</html>
|
||||
);
|
||||
|
||||
@@ -1,22 +1,22 @@
|
||||
'use client';
|
||||
|
||||
import {
|
||||
RTVIClientAudio,
|
||||
RTVIClientVideo,
|
||||
useRTVIClientTransportState,
|
||||
PipecatClientAudio,
|
||||
PipecatClientVideo,
|
||||
usePipecatClientTransportState,
|
||||
} from '@pipecat-ai/client-react';
|
||||
import { ConnectButton } from '../components/ConnectButton';
|
||||
import { StatusDisplay } from '../components/StatusDisplay';
|
||||
import { DebugDisplay } from '../components/DebugDisplay';
|
||||
|
||||
function BotVideo() {
|
||||
const transportState = useRTVIClientTransportState();
|
||||
const transportState = usePipecatClientTransportState();
|
||||
const isConnected = transportState !== 'disconnected';
|
||||
|
||||
return (
|
||||
<div className="bot-container">
|
||||
<div className="video-container">
|
||||
{isConnected && <RTVIClientVideo participant="bot" fit="cover" />}
|
||||
{isConnected && <PipecatClientVideo participant="bot" fit="cover" />}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
@@ -35,7 +35,7 @@ export default function Home() {
|
||||
</div>
|
||||
|
||||
<DebugDisplay />
|
||||
<RTVIClientAudio />
|
||||
<PipecatClientAudio />
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1,11 +1,17 @@
|
||||
import {
|
||||
useRTVIClient,
|
||||
useRTVIClientTransportState,
|
||||
usePipecatClient,
|
||||
usePipecatClientTransportState,
|
||||
} from '@pipecat-ai/client-react';
|
||||
|
||||
// Get the API base URL from environment variables
|
||||
// Default to "/api" if not specified
|
||||
// "/api" is the default for Next.js API routes and used
|
||||
// for the Pipecat Cloud deployed agent
|
||||
const API_BASE_URL = process.env.NEXT_PUBLIC_API_BASE_URL || '/api';
|
||||
|
||||
export function ConnectButton() {
|
||||
const client = useRTVIClient();
|
||||
const transportState = useRTVIClientTransportState();
|
||||
const client = usePipecatClient();
|
||||
const transportState = usePipecatClientTransportState();
|
||||
const isConnected = ['connected', 'ready'].includes(transportState);
|
||||
|
||||
const handleClick = async () => {
|
||||
@@ -18,7 +24,10 @@ export function ConnectButton() {
|
||||
if (isConnected) {
|
||||
await client.disconnect();
|
||||
} else {
|
||||
await client.connect();
|
||||
await client.connect({
|
||||
endpoint: `${API_BASE_URL}/connect`,
|
||||
requestData: { foo: 'bar' },
|
||||
});
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Connection error:', error);
|
||||
|
||||
@@ -6,7 +6,7 @@ import {
|
||||
TranscriptData,
|
||||
BotLLMTextData,
|
||||
} from '@pipecat-ai/client-js';
|
||||
import { useRTVIClient, useRTVIClientEvent } from '@pipecat-ai/client-react';
|
||||
import { usePipecatClient, useRTVIClientEvent } from '@pipecat-ai/client-react';
|
||||
import './DebugDisplay.css';
|
||||
|
||||
interface SmartTurnResultData {
|
||||
@@ -20,7 +20,7 @@ interface SmartTurnResultData {
|
||||
|
||||
export function DebugDisplay() {
|
||||
const debugLogRef = useRef<HTMLDivElement>(null);
|
||||
const client = useRTVIClient();
|
||||
const client = usePipecatClient();
|
||||
|
||||
const log = useCallback((message: string) => {
|
||||
if (!debugLogRef.current) return;
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { useRTVIClientTransportState } from '@pipecat-ai/client-react';
|
||||
import { usePipecatClientTransportState } from '@pipecat-ai/client-react';
|
||||
|
||||
export function StatusDisplay() {
|
||||
const transportState = useRTVIClientTransportState();
|
||||
const transportState = usePipecatClientTransportState();
|
||||
|
||||
return (
|
||||
<div className="status">
|
||||
|
||||
@@ -0,0 +1,28 @@
|
||||
'use client';
|
||||
|
||||
import { PipecatClient } from '@pipecat-ai/client-js';
|
||||
import { DailyTransport } from '@pipecat-ai/daily-transport';
|
||||
import { PipecatClientProvider } from '@pipecat-ai/client-react';
|
||||
import { PropsWithChildren, useEffect, useState } from 'react';
|
||||
|
||||
export function PipecatProvider({ children }: PropsWithChildren) {
|
||||
const [client, setClient] = useState<PipecatClient | null>(null);
|
||||
|
||||
useEffect(() => {
|
||||
const pcClient = new PipecatClient({
|
||||
transport: new DailyTransport(),
|
||||
enableMic: true,
|
||||
enableCam: false,
|
||||
});
|
||||
|
||||
setClient(pcClient);
|
||||
}, []);
|
||||
|
||||
if (!client) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return (
|
||||
<PipecatClientProvider client={client}>{children}</PipecatClientProvider>
|
||||
);
|
||||
}
|
||||
@@ -1,43 +0,0 @@
|
||||
'use client';
|
||||
|
||||
import { RTVIClient } from '@pipecat-ai/client-js';
|
||||
import { DailyTransport } from '@pipecat-ai/daily-transport';
|
||||
import { RTVIClientProvider } from '@pipecat-ai/client-react';
|
||||
import { PropsWithChildren, useEffect, useState } from 'react';
|
||||
|
||||
// Get the API base URL from environment variables
|
||||
// Default to "/api" if not specified
|
||||
// "/api" is the default for Next.js API routes and used
|
||||
// for the Pipecat Cloud deployed agent
|
||||
const API_BASE_URL = process.env.NEXT_PUBLIC_API_BASE_URL || '/api';
|
||||
|
||||
console.log('Using API base URL:', API_BASE_URL);
|
||||
|
||||
export function RTVIProvider({ children }: PropsWithChildren) {
|
||||
const [client, setClient] = useState<RTVIClient | null>(null);
|
||||
|
||||
useEffect(() => {
|
||||
const transport = new DailyTransport();
|
||||
|
||||
const rtviClient = new RTVIClient({
|
||||
transport,
|
||||
params: {
|
||||
baseUrl: API_BASE_URL,
|
||||
endpoints: {
|
||||
connect: '/connect',
|
||||
},
|
||||
requestData: { foo: 'bar' },
|
||||
},
|
||||
enableMic: true,
|
||||
enableCam: false,
|
||||
});
|
||||
|
||||
setClient(rtviClient);
|
||||
}, []);
|
||||
|
||||
if (!client) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return <RTVIClientProvider client={client}>{children}</RTVIClientProvider>;
|
||||
}
|
||||
@@ -45,7 +45,7 @@ from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
load_dotenv(override=True)
|
||||
|
||||
# Check if we're in local development mode
|
||||
LOCAL = os.getenv("LOCAL")
|
||||
LOCAL = os.getenv("LOCAL_RUN")
|
||||
|
||||
logger.remove()
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
@@ -20,7 +20,7 @@ from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.transports.services.daily import DailyLogLevel, DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -43,6 +43,7 @@ async def main():
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
transport.set_log_level(DailyLogLevel.Info)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
|
||||
153
examples/foundational/07a-interruptible-speechmatics.py
Normal file
153
examples/foundational/07a-interruptible-speechmatics.py
Normal file
@@ -0,0 +1,153 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
|
||||
from pipecat.services.openai.base_llm import BaseOpenAILLMService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.services.speechmatics.stt import SpeechmaticsSTTService
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool):
|
||||
"""Run example using Speechmatics STT.
|
||||
|
||||
This example will use diarization within our STT service and output the words spoken by
|
||||
each individual speaker and wrap them with XML tags for the LLM to process. Note the
|
||||
instructions in the system context for the LLM. This greatly improves the conversation
|
||||
experience by allowing the LLM to understand who is speaking in a multi-party call.
|
||||
|
||||
If you do not wish to use diarization, then set the `enable_speaker_diarization` parameter
|
||||
to `False` or omit it altogether. The `text_format` will only be used if diarization is enabled.
|
||||
|
||||
By default, this example will use our ENHANCED operating point, which is optimized for
|
||||
high accuracy. You can change this by setting the `operating_point` parameter to a different
|
||||
value.
|
||||
|
||||
For more information on operating points, see the Speechmatics documentation:
|
||||
https://docs.speechmatics.com/rt-api-ref
|
||||
"""
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = SpeechmaticsSTTService(
|
||||
api_key=os.getenv("SPEECHMATICS_API_KEY"),
|
||||
language=Language.EN,
|
||||
enable_speaker_diarization=True,
|
||||
text_format="<{speaker_id}>{text}</{speaker_id}>",
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY", ""),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID", ""),
|
||||
model="eleven_turbo_v2_5",
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
params=BaseOpenAILLMService.InputParams(temperature=0.75),
|
||||
)
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
"You are a helpful British assistant called Alfred. "
|
||||
"Your goal is to demonstrate your capabilities in a succinct way. "
|
||||
"Your output will be converted to audio so don't include special characters in your answers. "
|
||||
"Always include punctuation in your responses. "
|
||||
"Give very short replies - do not give longer replies unless strictly necessary. "
|
||||
"Respond to what the user said in a concise, funny, creative and helpful way. "
|
||||
"Use `<Sn/>` tags to identify different speakers - do not use tags in your replies."
|
||||
),
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(aggregation_timeout=0.005),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Say a short hello to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
109
examples/foundational/07aa-interruptible-soniox.py
Normal file
109
examples/foundational/07aa-interruptible-soniox.py
Normal file
@@ -0,0 +1,109 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.services.soniox.stt import SonioxSTTService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = SonioxSTTService(
|
||||
api_key=os.getenv("SONIOX_API_KEY"),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt,
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
@@ -35,7 +35,7 @@ transport_params = {
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"twilio": lambda: TransportParams(
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
|
||||
@@ -61,7 +61,12 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
credentials=os.getenv("GOOGLE_TEST_CREDENTIALS"),
|
||||
)
|
||||
|
||||
llm = GoogleLLMService(api_key=os.getenv("GOOGLE_API_KEY"))
|
||||
llm = GoogleLLMService(
|
||||
api_key=os.getenv("GOOGLE_API_KEY"),
|
||||
model="gemini-2.5-flash",
|
||||
# turn on thinking if you want it
|
||||
# params=GoogleLLMService.InputParams(extra={"thinking_config": {"thinking_budget": 4096}}),)
|
||||
)
|
||||
|
||||
messages = [
|
||||
{
|
||||
|
||||
@@ -214,7 +214,12 @@ transport_params = {
|
||||
async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
llm = GoogleLLMService(api_key=os.getenv("GOOGLE_API_KEY"), model="gemini-2.0-flash-001")
|
||||
llm = GoogleLLMService(
|
||||
api_key=os.getenv("GOOGLE_API_KEY"),
|
||||
model="gemini-2.5-flash",
|
||||
# turn on thinking if you want it
|
||||
# params=GoogleLLMService.InputParams(extra={"thinking_config": {"thinking_budget": 4096}}),
|
||||
)
|
||||
|
||||
tts = GoogleTTSService(
|
||||
voice_id="en-US-Chirp3-HD-Charon",
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
import argparse
|
||||
import os
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
@@ -50,60 +51,63 @@ transport_params = {
|
||||
async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
# Create an HTTP session
|
||||
async with aiohttp.ClientSession() as session:
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = NeuphonicHttpTTSService(
|
||||
api_key=os.getenv("NEUPHONIC_API_KEY"),
|
||||
voice_id="fc854436-2dac-4d21-aa69-ae17b54e98eb", # Emily
|
||||
)
|
||||
tts = NeuphonicHttpTTSService(
|
||||
api_key=os.getenv("NEUPHONIC_API_KEY"),
|
||||
voice_id="fc854436-2dac-4d21-aa69-ae17b54e98eb", # Emily
|
||||
aiohttp_session=session,
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt,
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt,
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
runner = PipelineRunner(handle_sigint=handle_sigint)
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
await runner.run(task)
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
89
examples/foundational/13h-speechmatics-transcription.py
Normal file
89
examples/foundational/13h-speechmatics-transcription.py
Normal file
@@ -0,0 +1,89 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.frames.frames import Frame, TranscriptionFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.speechmatics.stt import SpeechmaticsSTTService
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
class TranscriptionLogger(FrameProcessor):
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, TranscriptionFrame):
|
||||
print(f"Transcription: {frame.text}")
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(audio_in_enabled=True),
|
||||
"twilio": lambda: FastAPIWebsocketParams(audio_in_enabled=True),
|
||||
"webrtc": lambda: TransportParams(audio_in_enabled=True),
|
||||
}
|
||||
|
||||
|
||||
async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool):
|
||||
"""Run example using Speechmatics STT.
|
||||
|
||||
This example will use diarization within our STT service and output the words spoken by
|
||||
each individual speaker and wrap them with XML tags.
|
||||
|
||||
If you do not wish to use diarization, then set the `enable_speaker_diarization` parameter
|
||||
to `False` or omit it altogether. The `text_format` will only be used if diarization is enabled.
|
||||
|
||||
By default, this example will use our ENHANCED operating point, which is optimized for
|
||||
high accuracy. You can change this by setting the `operating_point` parameter to a different
|
||||
value.
|
||||
|
||||
For more information on operating points, see the Speechmatics documentation:
|
||||
https://docs.speechmatics.com/rt-api-ref
|
||||
"""
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = SpeechmaticsSTTService(
|
||||
api_key=os.getenv("SPEECHMATICS_API_KEY"),
|
||||
language=Language.EN,
|
||||
enable_speaker_diarization=True,
|
||||
text_format="<{speaker_id}>{text}</{speaker_id}>",
|
||||
)
|
||||
|
||||
tl = TranscriptionLogger()
|
||||
|
||||
pipeline = Pipeline([transport.input(), stt, tl])
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
81
examples/foundational/13i-soniox-transcription.py
Normal file
81
examples/foundational/13i-soniox-transcription.py
Normal file
@@ -0,0 +1,81 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import Frame, TranscriptionFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.soniox.stt import SonioxSTTService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
class TranscriptionLogger(FrameProcessor):
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, TranscriptionFrame):
|
||||
print(f"Transcription: {frame.text}")
|
||||
|
||||
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = SonioxSTTService(
|
||||
api_key=os.getenv("SONIOX_API_KEY"),
|
||||
)
|
||||
|
||||
tl = TranscriptionLogger()
|
||||
|
||||
pipeline = Pipeline([transport.input(), stt, tl])
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
|
||||
@transport.event_handler("on_client_closed")
|
||||
async def on_client_closed(transport, client):
|
||||
logger.info(f"Client closed connection")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=False)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
@@ -42,7 +42,7 @@ transport_params = {
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"twilio": lambda: TransportParams(
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
|
||||
146
examples/foundational/14t-function-calling-direct.py
Normal file
146
examples/foundational/14t-function-calling-direct.py
Normal file
@@ -0,0 +1,146 @@
|
||||
#
|
||||
# Copyright (c) 2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.llm_service import FunctionCallParams
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
async def get_current_weather(params: FunctionCallParams, location: str, format: str):
|
||||
"""
|
||||
Get the current weather.
|
||||
|
||||
Args:
|
||||
location (str): The city and state, e.g. "San Francisco, CA".
|
||||
format (str): The temperature unit to use. Must be either "celsius" or "fahrenheit". Infer this from the user's location.
|
||||
"""
|
||||
await params.result_callback({"conditions": "nice", "temperature": "75"})
|
||||
|
||||
|
||||
async def get_restaurant_recommendation(params: FunctionCallParams, location: str):
|
||||
"""
|
||||
Get a restaurant recommendation.
|
||||
|
||||
Args:
|
||||
location (str): The city and state, e.g. "San Francisco, CA".
|
||||
"""
|
||||
await params.result_callback({"name": "The Golden Dragon"})
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
# You can also register a function_name of None to get all functions
|
||||
# sent to the same callback with an additional function_name parameter.
|
||||
llm.register_direct_function(get_current_weather)
|
||||
llm.register_direct_function(get_restaurant_recommendation)
|
||||
|
||||
@llm.event_handler("on_function_calls_started")
|
||||
async def on_function_calls_started(service, function_calls):
|
||||
await tts.queue_frame(TTSSpeakFrame("Let me check on that."))
|
||||
|
||||
tools = ToolsSchema(standard_tools=[get_current_weather, get_restaurant_recommendation])
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
162
examples/foundational/14u-function-calling-ollama.py
Normal file
162
examples/foundational/14u-function-calling-ollama.py
Normal file
@@ -0,0 +1,162 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.llm_service import FunctionCallParams
|
||||
from pipecat.services.ollama.llm import OLLamaLLMService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
async def fetch_weather_from_api(params: FunctionCallParams):
|
||||
await params.result_callback({"conditions": "nice", "temperature": "75"})
|
||||
|
||||
|
||||
async def fetch_restaurant_recommendation(params: FunctionCallParams):
|
||||
await params.result_callback({"name": "The Golden Dragon"})
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
llm = OLLamaLLMService(model="llama3.2") # Update to the model you're running locally
|
||||
|
||||
# You can also register a function_name of None to get all functions
|
||||
# sent to the same callback with an additional function_name parameter.
|
||||
llm.register_function("get_current_weather", fetch_weather_from_api)
|
||||
llm.register_function("get_restaurant_recommendation", fetch_restaurant_recommendation)
|
||||
|
||||
@llm.event_handler("on_function_calls_started")
|
||||
async def on_function_calls_started(service, function_calls):
|
||||
await tts.queue_frame(TTSSpeakFrame("Let me check on that."))
|
||||
|
||||
weather_function = FunctionSchema(
|
||||
name="get_current_weather",
|
||||
description="Get the current weather",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||
},
|
||||
},
|
||||
required=["location", "format"],
|
||||
)
|
||||
restaurant_function = FunctionSchema(
|
||||
name="get_restaurant_recommendation",
|
||||
description="Get a restaurant recommendation",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
},
|
||||
required=["location"],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[weather_function, restaurant_function])
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
@@ -33,7 +33,7 @@ transport_params = {
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"twilio": lambda: TransportParams(
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
|
||||
@@ -55,7 +55,7 @@ transport_params = {
|
||||
# endpointing, for now.
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)),
|
||||
),
|
||||
"twilio": lambda: TransportParams(
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
# set stop_secs to something roughly similar to the internal setting
|
||||
|
||||
242
examples/foundational/26f-gemini-multimodal-live-files-api.py
Normal file
242
examples/foundational/26f-gemini-multimodal-live-files-api.py
Normal file
@@ -0,0 +1,242 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.gemini_multimodal_live.gemini import (
|
||||
GeminiMultimodalLiveLLMService,
|
||||
)
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
video_in_enabled=False,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
video_in_enabled=False,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
video_in_enabled=False,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)),
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
sample_file_path = ""
|
||||
|
||||
|
||||
async def create_sample_file():
|
||||
if sample_file_path:
|
||||
return sample_file_path
|
||||
else:
|
||||
"""Create a sample text file for testing the File API."""
|
||||
content = """# Sample Document for Gemini File API Test
|
||||
|
||||
This is a test document to demonstrate the Gemini File API functionality.
|
||||
|
||||
## Key Information:
|
||||
- This document was created for testing purposes
|
||||
- It contains information about AI assistants
|
||||
- The document should be analyzed by Gemini
|
||||
- The secret phrase for the test is "Pineapple Pizza"
|
||||
|
||||
## AI Assistant Capabilities:
|
||||
1. Natural language processing
|
||||
2. File analysis and understanding
|
||||
3. Context-aware conversations
|
||||
4. Multi-modal interactions
|
||||
|
||||
## Conclusion:
|
||||
This document serves as a test case for the Gemini File API integration with Pipecat.
|
||||
The AI should be able to reference and discuss the contents of this file.
|
||||
"""
|
||||
|
||||
# Create a temporary file
|
||||
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
|
||||
f.write(content)
|
||||
return f.name
|
||||
|
||||
|
||||
async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool):
|
||||
logger.info(f"Starting File API bot")
|
||||
|
||||
# Create a sample file to upload
|
||||
sample_file_path = await create_sample_file()
|
||||
logger.info(f"Created sample file: {sample_file_path}")
|
||||
|
||||
system_instruction = """
|
||||
You are a helpful AI assistant with access to a document that has been uploaded for analysis.
|
||||
|
||||
The document contains test information.
|
||||
You should be able to:
|
||||
- Reference and discuss the contents of the uploaded document
|
||||
- Answer questions about what's in the document
|
||||
- Use the information from the document in our conversation
|
||||
|
||||
Your output will be converted to audio so don't include special characters in your answers.
|
||||
Be friendly and demonstrate your ability to work with the uploaded file.
|
||||
"""
|
||||
|
||||
# Initialize Gemini service with File API support
|
||||
llm = GeminiMultimodalLiveLLMService(
|
||||
api_key=os.getenv("GOOGLE_API_KEY"),
|
||||
system_instruction=system_instruction,
|
||||
voice_id="Charon", # Aoede, Charon, Fenrir, Kore, Puck
|
||||
transcribe_user_audio=True,
|
||||
)
|
||||
|
||||
# Upload the sample file to Gemini File API
|
||||
logger.info("Uploading file to Gemini File API...")
|
||||
file_info = None
|
||||
try:
|
||||
file_info = await llm.file_api.upload_file(
|
||||
sample_file_path, display_name="Sample Test Document"
|
||||
)
|
||||
logger.info(f"File uploaded successfully: {file_info['file']['name']}")
|
||||
|
||||
# Get file URI and mime type
|
||||
file_uri = file_info["file"]["uri"]
|
||||
mime_type = "text/plain"
|
||||
|
||||
# Create context with file reference
|
||||
context = OpenAILLMContext(
|
||||
[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Greet the user and let them know you have access to a document they can ask you about. Mention that you can discuss its contents.",
|
||||
},
|
||||
{
|
||||
"type": "file_data",
|
||||
"file_data": {"mime_type": mime_type, "file_uri": file_uri},
|
||||
},
|
||||
],
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
logger.info("File reference added to conversation context")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error uploading file: {e}")
|
||||
# Continue with a basic context if file upload fails
|
||||
context = OpenAILLMContext(
|
||||
[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Greet the user and explain that there was an issue with file upload, but you're ready to help with other tasks.",
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
# Create context aggregator
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
# Build the pipeline
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
transport.output(),
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
# Configure the pipeline task
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
# Handle client connection event
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation using standard context frame
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
# Handle client disconnection events
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
|
||||
@transport.event_handler("on_client_closed")
|
||||
async def on_client_closed(transport, client):
|
||||
logger.info(f"Client closed connection")
|
||||
await task.cancel()
|
||||
|
||||
# Run the pipeline
|
||||
runner = PipelineRunner(handle_sigint=False)
|
||||
await runner.run(task)
|
||||
|
||||
# Clean up: delete the uploaded file and temporary file
|
||||
if file_info:
|
||||
try:
|
||||
await llm.file_api.delete_file(file_info["file"]["name"])
|
||||
logger.info("Cleaned up uploaded file from Gemini")
|
||||
except Exception as e:
|
||||
logger.error(f"Error cleaning up file: {e}")
|
||||
|
||||
# Remove temporary file
|
||||
try:
|
||||
os.unlink(sample_file_path)
|
||||
logger.info("Cleaned up temporary file")
|
||||
except Exception as e:
|
||||
logger.error(f"Error removing temporary file: {e}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
|
||||
upload_example_file = input("""
|
||||
|
||||
Please pass in a TEXT filepath to test upload.
|
||||
NOTE: Files are stored on Google's servers for 48 hours.
|
||||
|
||||
Press Enter to use a default test file.
|
||||
|
||||
text filepath : """)
|
||||
if upload_example_file:
|
||||
print(f"Uploading file: {upload_example_file}")
|
||||
sample_file_path = upload_example_file.strip()
|
||||
else:
|
||||
print(f"Using default file")
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
@@ -0,0 +1,165 @@
|
||||
import argparse
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.adapters.schemas.tools_schema import AdapterType, ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import Frame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.gemini_multimodal_live.gemini import GeminiMultimodalLiveLLMService
|
||||
from pipecat.services.google.frames import LLMSearchResponseFrame
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
video_in_enabled=False,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
video_in_enabled=False,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
video_in_enabled=False,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)),
|
||||
),
|
||||
}
|
||||
|
||||
SYSTEM_INSTRUCTION = """
|
||||
You are a helpful AI assistant that actively uses Google Search to provide up-to-date, accurate information.
|
||||
|
||||
IMPORTANT: For ANY question about current events, news, recent developments, real-time information, or anything that might have changed recently, you MUST use the google_search tool to get the latest information.
|
||||
|
||||
You should use Google Search for:
|
||||
- Current news and events
|
||||
- Recent developments in any field
|
||||
- Today's weather, stock prices, or other real-time data
|
||||
- Any question that starts with "what's happening", "latest", "recent", "current", "today", etc.
|
||||
- When you're not certain about recent information
|
||||
|
||||
Always be proactive about using search when the user asks about anything that could benefit from real-time information.
|
||||
|
||||
Your output will be converted to audio so don't include special characters in your answers.
|
||||
|
||||
Respond to what the user said in a creative and helpful way, always using search for current information.
|
||||
"""
|
||||
|
||||
|
||||
class GroundingMetadataProcessor(FrameProcessor):
|
||||
"""Processor to capture and display grounding metadata from Gemini Live API."""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self._grounding_count = 0
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, LLMSearchResponseFrame):
|
||||
self._grounding_count += 1
|
||||
logger.info(f"\n\n🔍 GROUNDING METADATA RECEIVED #{self._grounding_count}\n")
|
||||
logger.info(f"📝 Search Result Text: {frame.search_result[:200]}...")
|
||||
|
||||
if frame.rendered_content:
|
||||
logger.info(f"🔗 Rendered Content: {frame.rendered_content}")
|
||||
|
||||
if frame.origins:
|
||||
logger.info(f"📍 Number of Origins: {len(frame.origins)}")
|
||||
for i, origin in enumerate(frame.origins):
|
||||
logger.info(f" Origin {i + 1}: {origin.site_title} - {origin.site_uri}")
|
||||
if origin.results:
|
||||
logger.info(f" Results: {len(origin.results)} items")
|
||||
|
||||
# Always push the frame downstream
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool):
|
||||
logger.info(f"Starting Gemini Live Grounding Metadata Test Bot")
|
||||
|
||||
# Create tools using ToolsSchema with custom tools for Gemini
|
||||
tools = ToolsSchema(
|
||||
standard_tools=[], # No standard function declarations needed
|
||||
custom_tools={AdapterType.GEMINI: [{"google_search": {}}, {"code_execution": {}}]},
|
||||
)
|
||||
|
||||
llm = GeminiMultimodalLiveLLMService(
|
||||
api_key=os.getenv("GOOGLE_API_KEY"),
|
||||
system_instruction=SYSTEM_INSTRUCTION,
|
||||
voice_id="Charon", # Aoede, Charon, Fenrir, Kore, Puck
|
||||
transcribe_user_audio=True,
|
||||
tools=tools,
|
||||
)
|
||||
|
||||
# Create a processor to capture grounding metadata
|
||||
grounding_processor = GroundingMetadataProcessor()
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Please introduce yourself and let me know that you can help with current information by searching the web. Ask me what current information I'd like to know about.",
|
||||
},
|
||||
]
|
||||
|
||||
# Set up conversation context and management
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
grounding_processor, # Add our grounding processor here
|
||||
transport.output(),
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
|
||||
@transport.event_handler("on_client_closed")
|
||||
async def on_client_closed(transport, client):
|
||||
logger.info(f"Client closed connection")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=False)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
@@ -11,7 +11,7 @@ from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn import LocalSmartTurnAnalyzer
|
||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v2 import LocalSmartTurnAnalyzerV2
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
@@ -37,7 +37,7 @@ load_dotenv(override=True)
|
||||
# # Hugging Face uses LFS to store large model files, including .mlpackage
|
||||
# git lfs install
|
||||
# # Clone the repo with the smart_turn_classifier.mlpackage
|
||||
# git clone https://huggingface.co/pipecat-ai/smart-turn
|
||||
# git clone https://huggingface.co/pipecat-ai/smart-turn-v2
|
||||
#
|
||||
# Then set the env variable:
|
||||
# export LOCAL_SMART_TURN_MODEL_PATH=./smart-turn
|
||||
@@ -52,7 +52,7 @@ transport_params = {
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzer(
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV2(
|
||||
smart_turn_model_path=smart_turn_model_path, params=SmartTurnParams()
|
||||
),
|
||||
),
|
||||
@@ -60,7 +60,7 @@ transport_params = {
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzer(
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV2(
|
||||
smart_turn_model_path=smart_turn_model_path, params=SmartTurnParams()
|
||||
),
|
||||
),
|
||||
@@ -68,7 +68,7 @@ transport_params = {
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=LocalSmartTurnAnalyzer(
|
||||
turn_analyzer=LocalSmartTurnAnalyzerV2(
|
||||
smart_turn_model_path=smart_turn_model_path, params=SmartTurnParams()
|
||||
),
|
||||
),
|
||||
|
||||
133
examples/foundational/39c-mcp-run-http.py
Normal file
133
examples/foundational/39c-mcp-run-http.py
Normal file
@@ -0,0 +1,133 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from mcp.client.session_group import StreamableHttpParameters
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.google.llm import GoogleLLMService
|
||||
from pipecat.services.mcp_service import MCPClient
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
llm = GoogleLLMService(api_key=os.getenv("GOOGLE_API_KEY"), model="gemini-2.0-flash")
|
||||
|
||||
try:
|
||||
# Github MCP docs: https://github.com/github/github-mcp-server
|
||||
# Enable Github Copilot on your GitHub account. Free tier is ok. (https://github.com/settings/copilot)
|
||||
# Generate a personal access token. It must be a Fine-grained token, classic tokens are not supported. (https://github.com/settings/personal-access-tokens)
|
||||
# Set permissions you want to use (eg. "all repositories", "profile: read/write", etc)
|
||||
mcp = MCPClient(
|
||||
server_params=StreamableHttpParameters(
|
||||
url="https://api.githubcopilot.com/mcp/",
|
||||
headers={"Authorization": f"Bearer {os.getenv('GITHUB_PERSONAL_ACCESS_TOKEN')}"},
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"error setting up mcp")
|
||||
logger.exception("error trace:")
|
||||
|
||||
tools = await mcp.register_tools(llm)
|
||||
|
||||
system = f"""
|
||||
You are a helpful LLM in a WebRTC call.
|
||||
Your goal is to answer questions about the user's GitHub repositories and account.
|
||||
You have access to a number of tools provided by Github. Use any and all tools to help users.
|
||||
Your output will be converted to audio so don't include special characters in your answers.
|
||||
Don't overexplain what you are doing.
|
||||
Just respond with short sentences when you are carrying out tool calls.
|
||||
"""
|
||||
|
||||
messages = [{"role": "system", "content": system}]
|
||||
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt,
|
||||
context_aggregator.user(), # User spoken responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses and tool context
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected: {client}")
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
@@ -102,6 +102,7 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
|
||||
access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
|
||||
region=os.getenv("AWS_REGION"), # as of 2025-05-06, us-east-1 is the only supported region
|
||||
session_token=os.getenv("AWS_SESSION_TOKEN"),
|
||||
voice_id="tiffany", # matthew, tiffany, amy
|
||||
# you could choose to pass instruction here rather than via context
|
||||
# system_instruction=system_instruction
|
||||
|
||||
@@ -10,8 +10,8 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.interruptions.min_words_interruption_strategy import MinWordsInterruptionStrategy
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import MinWordsInterruptionStrategy
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
|
||||
@@ -20,11 +20,10 @@ import {
|
||||
} from '@pipecat-ai/client-js';
|
||||
import {
|
||||
ProtobufFrameSerializer,
|
||||
WebSocketTransport
|
||||
} from "@pipecat-ai/websocket-transport";
|
||||
WebSocketTransport,
|
||||
} from '@pipecat-ai/websocket-transport';
|
||||
|
||||
class RecordingSerializer extends ProtobufFrameSerializer {
|
||||
|
||||
private lastTimestamp: number | null = null;
|
||||
private recordingAudioToSend: boolean = false;
|
||||
private _recordedAudio: { data: ArrayBuffer; delay: number }[] = [];
|
||||
@@ -40,7 +39,11 @@ class RecordingSerializer extends ProtobufFrameSerializer {
|
||||
}
|
||||
|
||||
// @ts-ignore
|
||||
serializeAudio(data: ArrayBuffer, sampleRate: number, numChannels: number): Uint8Array | null {
|
||||
serializeAudio(
|
||||
data: ArrayBuffer,
|
||||
sampleRate: number,
|
||||
numChannels: number
|
||||
): Uint8Array | null {
|
||||
if (this.recordingAudioToSend) {
|
||||
const now = Date.now();
|
||||
// Compute delay since last packet
|
||||
@@ -55,13 +58,13 @@ class RecordingSerializer extends ProtobufFrameSerializer {
|
||||
}
|
||||
|
||||
public get recordedAudio() {
|
||||
return this._recordedAudio
|
||||
return this._recordedAudio;
|
||||
}
|
||||
}
|
||||
|
||||
class WebsocketClientApp {
|
||||
private ENABLE_RECORDING_MODE = false
|
||||
private RECORDING_TIME_MS = 10000
|
||||
private ENABLE_RECORDING_MODE = false;
|
||||
private RECORDING_TIME_MS = 10000;
|
||||
|
||||
private rtviClient: RTVIClient | null = null;
|
||||
private connectBtn: HTMLButtonElement | null = null;
|
||||
@@ -71,7 +74,7 @@ class WebsocketClientApp {
|
||||
private botAudio: HTMLAudioElement;
|
||||
|
||||
private declare websocketTransport: WebSocketTransport;
|
||||
private sendRecordedAudio: boolean = false
|
||||
private sendRecordedAudio: boolean = false;
|
||||
private declare recordingSerializer: RecordingSerializer;
|
||||
|
||||
private playBtn: HTMLButtonElement | null = null;
|
||||
@@ -91,8 +94,12 @@ class WebsocketClientApp {
|
||||
* Set up references to DOM elements and create necessary media elements
|
||||
*/
|
||||
private setupDOMElements(): void {
|
||||
this.connectBtn = document.getElementById('connect-btn') as HTMLButtonElement;
|
||||
this.disconnectBtn = document.getElementById('disconnect-btn') as HTMLButtonElement;
|
||||
this.connectBtn = document.getElementById(
|
||||
'connect-btn'
|
||||
) as HTMLButtonElement;
|
||||
this.disconnectBtn = document.getElementById(
|
||||
'disconnect-btn'
|
||||
) as HTMLButtonElement;
|
||||
this.statusSpan = document.getElementById('connection-status');
|
||||
this.debugLog = document.getElementById('debug-log');
|
||||
this.playBtn = document.getElementById('play-btn') as HTMLButtonElement;
|
||||
@@ -105,8 +112,12 @@ class WebsocketClientApp {
|
||||
private setupEventListeners(): void {
|
||||
this.connectBtn?.addEventListener('click', () => this.connect());
|
||||
this.disconnectBtn?.addEventListener('click', () => this.disconnect());
|
||||
this.playBtn?.addEventListener('click', () => this.startSendingRecordedAudio());
|
||||
this.stopBtn?.addEventListener('click', () => this.stopSendingRecordedAudio());
|
||||
this.playBtn?.addEventListener('click', () =>
|
||||
this.startSendingRecordedAudio()
|
||||
);
|
||||
this.stopBtn?.addEventListener('click', () =>
|
||||
this.stopSendingRecordedAudio()
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -165,7 +176,9 @@ class WebsocketClientApp {
|
||||
|
||||
// Listen for tracks stopping
|
||||
this.rtviClient.on(RTVIEvent.TrackStopped, (track, participant) => {
|
||||
this.log(`Track stopped: ${track.kind} from ${participant?.name || 'unknown'}`);
|
||||
this.log(
|
||||
`Track stopped: ${track.kind} from ${participant?.name || 'unknown'}`
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -175,7 +188,10 @@ class WebsocketClientApp {
|
||||
*/
|
||||
private setupAudioTrack(track: MediaStreamTrack): void {
|
||||
this.log('Setting up audio track');
|
||||
if (this.botAudio.srcObject && "getAudioTracks" in this.botAudio.srcObject) {
|
||||
if (
|
||||
this.botAudio.srcObject &&
|
||||
'getAudioTracks' in this.botAudio.srcObject
|
||||
) {
|
||||
const oldTrack = this.botAudio.srcObject.getAudioTracks()[0];
|
||||
if (oldTrack?.id === track.id) return;
|
||||
}
|
||||
@@ -190,17 +206,17 @@ class WebsocketClientApp {
|
||||
try {
|
||||
const startTime = Date.now();
|
||||
|
||||
this.recordingSerializer = new RecordingSerializer()
|
||||
const transport = this.ENABLE_RECORDING_MODE ? new WebSocketTransport({serializer: this.recordingSerializer}) : new WebSocketTransport();
|
||||
this.websocketTransport = transport
|
||||
this.recordingSerializer = new RecordingSerializer();
|
||||
const ws_opts = {
|
||||
serializer: this.ENABLE_RECORDING_MODE
|
||||
? this.recordingSerializer
|
||||
: new ProtobufFrameSerializer(),
|
||||
recorderSampleRate: 8000,
|
||||
playerSampleRate: 8000,
|
||||
};
|
||||
|
||||
const RTVIConfig: RTVIClientOptions = {
|
||||
transport,
|
||||
params: {
|
||||
// The baseURL and endpoint of your bot server that the client will connect to
|
||||
baseUrl: 'http://localhost:7860',
|
||||
endpoints: { connect: '/connect' },
|
||||
},
|
||||
transport: new WebSocketTransport(ws_opts),
|
||||
enableMic: true,
|
||||
enableCam: false,
|
||||
callbacks: {
|
||||
@@ -228,27 +244,34 @@ class WebsocketClientApp {
|
||||
onMessageError: (error) => console.error('Message error:', error),
|
||||
onError: (error) => console.error('Error:', error),
|
||||
},
|
||||
}
|
||||
};
|
||||
this.rtviClient = new RTVIClient(RTVIConfig);
|
||||
this.websocketTransport = this.rtviClient.transport;
|
||||
this.setupTrackListeners();
|
||||
|
||||
this.log('Initializing devices...');
|
||||
await this.rtviClient.initDevices();
|
||||
|
||||
this.log('Connecting to bot...');
|
||||
await this.rtviClient.connect();
|
||||
await this.rtviClient.connect({
|
||||
endpoint: 'http://localhost:7860/connect',
|
||||
});
|
||||
|
||||
const timeTaken = Date.now() - startTime;
|
||||
this.log(`Connection complete, timeTaken: ${timeTaken}`);
|
||||
|
||||
if (this.ENABLE_RECORDING_MODE) {
|
||||
this.log(`Starting to recording the next ${(this.RECORDING_TIME_MS/1000)}s of audio`);
|
||||
this.recordingSerializer.startRecording()
|
||||
await this.sleep(this.RECORDING_TIME_MS)
|
||||
this.recordingSerializer.stopRecording()
|
||||
this.log("Recording stopped");
|
||||
this.rtviClient.enableMic(false)
|
||||
this.startSendingRecordedAudio()
|
||||
this.log(
|
||||
`Starting to recording the next ${
|
||||
this.RECORDING_TIME_MS / 1000
|
||||
}s of audio`
|
||||
);
|
||||
this.recordingSerializer.startRecording();
|
||||
await this.sleep(this.RECORDING_TIME_MS);
|
||||
this.recordingSerializer.stopRecording();
|
||||
this.log('Recording stopped');
|
||||
this.rtviClient.enableMic(false);
|
||||
this.startSendingRecordedAudio();
|
||||
}
|
||||
} catch (error) {
|
||||
this.log(`Error connecting: ${(error as Error).message}`);
|
||||
@@ -270,11 +293,16 @@ class WebsocketClientApp {
|
||||
public async disconnect(): Promise<void> {
|
||||
if (this.rtviClient) {
|
||||
try {
|
||||
this.stopSendingRecordedAudio()
|
||||
this.stopSendingRecordedAudio();
|
||||
await this.rtviClient.disconnect();
|
||||
this.rtviClient = null;
|
||||
if (this.botAudio.srcObject && "getAudioTracks" in this.botAudio.srcObject) {
|
||||
this.botAudio.srcObject.getAudioTracks().forEach((track) => track.stop());
|
||||
if (
|
||||
this.botAudio.srcObject &&
|
||||
'getAudioTracks' in this.botAudio.srcObject
|
||||
) {
|
||||
this.botAudio.srcObject
|
||||
.getAudioTracks()
|
||||
.forEach((track) => track.stop());
|
||||
this.botAudio.srcObject = null;
|
||||
}
|
||||
} catch (error) {
|
||||
@@ -284,21 +312,21 @@ class WebsocketClientApp {
|
||||
}
|
||||
|
||||
private startSendingRecordedAudio() {
|
||||
this.sendRecordedAudio = true
|
||||
this.sendRecordedAudio = true;
|
||||
if (this.playBtn) this.playBtn.disabled = true;
|
||||
if (this.stopBtn) this.stopBtn.disabled = false;
|
||||
void this.replayAudio()
|
||||
void this.replayAudio();
|
||||
}
|
||||
|
||||
private stopSendingRecordedAudio() {
|
||||
if (this.stopBtn) this.stopBtn.disabled = true;
|
||||
if (this.playBtn) this.playBtn.disabled = false;
|
||||
this.sendRecordedAudio = false
|
||||
this.sendRecordedAudio = false;
|
||||
}
|
||||
|
||||
private async replayAudio() {
|
||||
if (this.sendRecordedAudio) {
|
||||
this.log("Sending recorded audio")
|
||||
this.log('Sending recorded audio');
|
||||
for (const chunk of this.recordingSerializer.recordedAudio) {
|
||||
await this.sleep(chunk.delay);
|
||||
this.websocketTransport.handleUserAudioStream(chunk.data);
|
||||
@@ -306,14 +334,13 @@ class WebsocketClientApp {
|
||||
const randomDelay = 1000 + Math.random() * (10000 - 500);
|
||||
await this.sleep(randomDelay);
|
||||
|
||||
void this.replayAudio()
|
||||
void this.replayAudio();
|
||||
}
|
||||
}
|
||||
|
||||
private sleep(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
declare global {
|
||||
|
||||
4
examples/freeze-test/env.example
Normal file
4
examples/freeze-test/env.example
Normal file
@@ -0,0 +1,4 @@
|
||||
SENTRY_DSN=
|
||||
DEEPGRAM_API_KEY=
|
||||
CARTESIA_API_KEY=
|
||||
OPENAI_API_KEY=
|
||||
@@ -18,7 +18,6 @@ from fastapi import FastAPI, Request, WebSocket
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import RedirectResponse
|
||||
from loguru import logger
|
||||
from pipecat_ai_small_webrtc_prebuilt.frontend import SmallWebRTCPrebuiltUI
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import (
|
||||
@@ -27,11 +26,13 @@ from pipecat.frames.frames import (
|
||||
Frame,
|
||||
InterimTranscriptionFrame,
|
||||
LLMFullResponseEndFrame,
|
||||
LLMMessagesFrame,
|
||||
StartFrame,
|
||||
StartInterruptionFrame,
|
||||
StopFrame,
|
||||
StopInterruptionFrame,
|
||||
TranscriptionFrame,
|
||||
TTSSpeakFrame,
|
||||
UserStartedSpeakingFrame,
|
||||
UserStoppedSpeakingFrame,
|
||||
)
|
||||
@@ -47,6 +48,7 @@ from pipecat.processors.aggregators.openai_llm_context import (
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.processors.frameworks.rtvi import RTVIConfig, RTVIProcessor
|
||||
from pipecat.processors.metrics.sentry import SentryMetrics
|
||||
from pipecat.processors.user_idle_processor import UserIdleProcessor
|
||||
from pipecat.serializers.protobuf import ProtobufFrameSerializer
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
@@ -78,9 +80,6 @@ app.add_middleware(
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# Mount the frontend at /
|
||||
app.mount("/client", SmallWebRTCPrebuiltUI)
|
||||
|
||||
|
||||
class SimulateFreezeInput(FrameProcessor):
|
||||
def __init__(
|
||||
@@ -188,6 +187,37 @@ async def run_example(websocket_client):
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
async def handle_user_idle(user_idle: UserIdleProcessor, retry_count: int) -> bool:
|
||||
if retry_count == 1:
|
||||
# First attempt: Add a gentle prompt to the conversation
|
||||
messages.append(
|
||||
{
|
||||
"role": "system",
|
||||
"content": "The user has been quiet. Politely and briefly ask if they're still there.",
|
||||
}
|
||||
)
|
||||
await user_idle.push_frame(LLMMessagesFrame(messages))
|
||||
return True
|
||||
elif retry_count == 2:
|
||||
# Second attempt: More direct prompt
|
||||
messages.append(
|
||||
{
|
||||
"role": "system",
|
||||
"content": "The user is still inactive. Ask if they'd like to continue our conversation.",
|
||||
}
|
||||
)
|
||||
await user_idle.push_frame(LLMMessagesFrame(messages))
|
||||
return True
|
||||
else:
|
||||
# Third attempt: End the conversation
|
||||
await user_idle.push_frame(
|
||||
TTSSpeakFrame("It seems like you're busy right now. Have a nice day!")
|
||||
)
|
||||
await task.queue_frame(EndFrame())
|
||||
return False
|
||||
|
||||
user_idle = UserIdleProcessor(callback=handle_user_idle, timeout=10.0)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
@@ -222,6 +252,7 @@ async def run_example(websocket_client):
|
||||
stt,
|
||||
],
|
||||
),
|
||||
user_idle,
|
||||
rtvi,
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
@@ -238,6 +269,8 @@ async def run_example(websocket_client):
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
audio_in_sample_rate=8000,
|
||||
audio_out_sample_rate=8000,
|
||||
),
|
||||
idle_timeout_secs=120,
|
||||
observers=[
|
||||
@@ -249,6 +282,10 @@ async def run_example(websocket_client):
|
||||
# LLMTextFrame: None,
|
||||
OpenAILLMContextFrame: None,
|
||||
LLMFullResponseEndFrame: None,
|
||||
UserStartedSpeakingFrame: None,
|
||||
UserStoppedSpeakingFrame: None,
|
||||
StartInterruptionFrame: None,
|
||||
StopInterruptionFrame: None,
|
||||
},
|
||||
exclude_fields={
|
||||
"result",
|
||||
|
||||
4
examples/freeze-test/requirements.txt
Normal file
4
examples/freeze-test/requirements.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
python-dotenv
|
||||
fastapi[all]
|
||||
uvicorn
|
||||
pipecat-ai[silero,websocket,openai, deepgram, cartesia, sentry]
|
||||
File diff suppressed because it is too large
Load Diff
@@ -18,7 +18,7 @@
|
||||
"vite": "^6.3.5"
|
||||
},
|
||||
"dependencies": {
|
||||
"@pipecat-ai/client-js": "^0.3.5",
|
||||
"@pipecat-ai/daily-transport": "^0.3.8"
|
||||
"@pipecat-ai/client-js": "^1.0.0",
|
||||
"@pipecat-ai/daily-transport": "^1.0.0"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
*/
|
||||
|
||||
/**
|
||||
* RTVI Client Implementation
|
||||
* Pipecat Client Implementation
|
||||
*
|
||||
* This client connects to an RTVI-compatible bot server using WebRTC (via Daily).
|
||||
* It handles audio/video streaming and manages the connection lifecycle.
|
||||
@@ -18,20 +18,22 @@
|
||||
|
||||
import {
|
||||
Participant,
|
||||
RTVIClient,
|
||||
RTVIClientOptions,
|
||||
PipecatClient,
|
||||
PipecatClientOptions,
|
||||
RTVIEvent,
|
||||
} from '@pipecat-ai/client-js';
|
||||
import { DailyTransport } from '@pipecat-ai/daily-transport';
|
||||
import {
|
||||
DailyEventCallbacks,
|
||||
DailyTransport,
|
||||
} from '@pipecat-ai/daily-transport';
|
||||
import SoundUtils from './util/soundUtils';
|
||||
import { InstantVoiceHelper } from './util/instantVoiceHelper';
|
||||
|
||||
/**
|
||||
* InstantVoiceClient handles the connection and media management for a real-time
|
||||
* voice and video interaction with an AI bot.
|
||||
*/
|
||||
class InstantVoiceClient {
|
||||
private declare rtviClient: RTVIClient;
|
||||
private declare pcClient: PipecatClient;
|
||||
private connectBtn: HTMLButtonElement | null = null;
|
||||
private disconnectBtn: HTMLButtonElement | null = null;
|
||||
private statusSpan: HTMLElement | null = null;
|
||||
@@ -46,7 +48,7 @@ class InstantVoiceClient {
|
||||
document.body.appendChild(this.botAudio);
|
||||
this.setupDOMElements();
|
||||
this.setupEventListeners();
|
||||
this.initializeRTVIClient();
|
||||
this.initializePipecatClient();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -72,16 +74,11 @@ class InstantVoiceClient {
|
||||
this.disconnectBtn?.addEventListener('click', () => this.disconnect());
|
||||
}
|
||||
|
||||
private initializeRTVIClient(): void {
|
||||
const RTVIConfig: RTVIClientOptions = {
|
||||
private initializePipecatClient(): void {
|
||||
const PipecatConfig: PipecatClientOptions = {
|
||||
transport: new DailyTransport({
|
||||
bufferLocalAudioUntilBotReady: true,
|
||||
}),
|
||||
params: {
|
||||
// The baseURL and endpoint of your bot server that the client will connect to
|
||||
baseUrl: 'http://localhost:7860',
|
||||
endpoints: { connect: '/connect' },
|
||||
},
|
||||
enableMic: true,
|
||||
enableCam: false,
|
||||
callbacks: {
|
||||
@@ -113,30 +110,23 @@ class InstantVoiceClient {
|
||||
onBotTranscript: (data) => this.log(`Bot: ${data.text}`),
|
||||
onMessageError: (error) => console.error('Message error:', error),
|
||||
onError: (error) => console.error('Error:', error),
|
||||
},
|
||||
onAudioBufferingStarted: () => {
|
||||
SoundUtils.beep();
|
||||
this.updateBufferingStatus('Yes');
|
||||
this.log(
|
||||
`onMicCaptureStarted, timeTaken: ${Date.now() - this.startTime}`
|
||||
);
|
||||
},
|
||||
onAudioBufferingStopped: () => {
|
||||
this.updateBufferingStatus('No');
|
||||
this.log(
|
||||
`onMicCaptureStopped, timeTaken: ${Date.now() - this.startTime}`
|
||||
);
|
||||
},
|
||||
} as DailyEventCallbacks,
|
||||
};
|
||||
|
||||
this.rtviClient = new RTVIClient(RTVIConfig);
|
||||
this.rtviClient.registerHelper(
|
||||
'transport',
|
||||
new InstantVoiceHelper({
|
||||
callbacks: {
|
||||
onAudioBufferingStarted: () => {
|
||||
SoundUtils.beep();
|
||||
this.updateBufferingStatus('Yes');
|
||||
this.log(
|
||||
`onMicCaptureStarted, timeTaken: ${Date.now() - this.startTime}`
|
||||
);
|
||||
},
|
||||
onAudioBufferingStopped: () => {
|
||||
this.updateBufferingStatus('No');
|
||||
this.log(
|
||||
`onMicCaptureStopped, timeTaken: ${Date.now() - this.startTime}`
|
||||
);
|
||||
},
|
||||
},
|
||||
})
|
||||
);
|
||||
this.pcClient = new PipecatClient(PipecatConfig);
|
||||
this.setupTrackListeners();
|
||||
}
|
||||
|
||||
@@ -182,8 +172,8 @@ class InstantVoiceClient {
|
||||
* This is called when the bot is ready or when the transport state changes to ready
|
||||
*/
|
||||
setupMediaTracks() {
|
||||
if (!this.rtviClient) return;
|
||||
const tracks = this.rtviClient.tracks();
|
||||
if (!this.pcClient) return;
|
||||
const tracks = this.pcClient.tracks();
|
||||
if (tracks.bot?.audio) {
|
||||
this.setupAudioTrack(tracks.bot.audio);
|
||||
}
|
||||
@@ -194,10 +184,10 @@ class InstantVoiceClient {
|
||||
* This handles new tracks being added during the session
|
||||
*/
|
||||
setupTrackListeners() {
|
||||
if (!this.rtviClient) return;
|
||||
if (!this.pcClient) return;
|
||||
|
||||
// Listen for new tracks starting
|
||||
this.rtviClient.on(RTVIEvent.TrackStarted, (track, participant) => {
|
||||
this.pcClient.on(RTVIEvent.TrackStarted, (track, participant) => {
|
||||
// Only handle non-local (bot) tracks
|
||||
if (!participant?.local && track.kind === 'audio') {
|
||||
this.setupAudioTrack(track);
|
||||
@@ -205,7 +195,7 @@ class InstantVoiceClient {
|
||||
});
|
||||
|
||||
// Listen for tracks stopping
|
||||
this.rtviClient.on(RTVIEvent.TrackStopped, (track, participant) => {
|
||||
this.pcClient.on(RTVIEvent.TrackStopped, (track, participant) => {
|
||||
this.log(
|
||||
`Track stopped: ${track.kind} from ${participant?.name || 'unknown'}`
|
||||
);
|
||||
@@ -230,22 +220,25 @@ class InstantVoiceClient {
|
||||
|
||||
/**
|
||||
* Initialize and connect to the bot
|
||||
* This sets up the RTVI client, initializes devices, and establishes the connection
|
||||
* This sets up the Pipecat client, initializes devices, and establishes the connection
|
||||
*/
|
||||
public async connect(): Promise<void> {
|
||||
try {
|
||||
this.startTime = Date.now();
|
||||
this.log('Connecting to bot...');
|
||||
await this.rtviClient.connect();
|
||||
await this.pcClient.connect({
|
||||
// The baseURL and endpoint of your bot server that the client will connect to
|
||||
endpoint: 'http://localhost:7860/connect',
|
||||
});
|
||||
} catch (error) {
|
||||
this.log(`Error connecting: ${(error as Error).message}`);
|
||||
this.updateStatus('Error');
|
||||
this.updateBufferingStatus('No');
|
||||
|
||||
// Clean up if there's an error
|
||||
if (this.rtviClient) {
|
||||
if (this.pcClient) {
|
||||
try {
|
||||
await this.rtviClient.disconnect();
|
||||
await this.pcClient.disconnect();
|
||||
} catch (disconnectError) {
|
||||
this.log(`Error during disconnect: ${disconnectError}`);
|
||||
}
|
||||
@@ -258,7 +251,7 @@ class InstantVoiceClient {
|
||||
*/
|
||||
public async disconnect(): Promise<void> {
|
||||
try {
|
||||
await this.rtviClient.disconnect();
|
||||
await this.pcClient.disconnect();
|
||||
if (
|
||||
this.botAudio.srcObject &&
|
||||
'getAudioTracks' in this.botAudio.srcObject
|
||||
|
||||
@@ -1,39 +0,0 @@
|
||||
import {RTVIClientHelper, RTVIClientHelperOptions, RTVIMessage} from "@pipecat-ai/client-js";
|
||||
import {DailyRTVIMessageType} from '@pipecat-ai/daily-transport';
|
||||
|
||||
export type InstantVoiceHelperCallbacks = Partial<{
|
||||
onAudioBufferingStarted: () => void;
|
||||
onAudioBufferingStopped: () => void;
|
||||
}>;
|
||||
|
||||
// --- Interface and class
|
||||
export interface InstantVoiceHelperOptions extends RTVIClientHelperOptions {
|
||||
callbacks?: InstantVoiceHelperCallbacks;
|
||||
}
|
||||
export class InstantVoiceHelper extends RTVIClientHelper {
|
||||
|
||||
protected declare _options: InstantVoiceHelperOptions;
|
||||
|
||||
constructor(options: InstantVoiceHelperOptions) {
|
||||
super(options);
|
||||
}
|
||||
|
||||
handleMessage(rtviMessage: RTVIMessage): void {
|
||||
switch (rtviMessage.type) {
|
||||
case DailyRTVIMessageType.AUDIO_BUFFERING_STARTED:
|
||||
if (this._options.callbacks?.onAudioBufferingStarted) {
|
||||
this._options.callbacks?.onAudioBufferingStarted()
|
||||
}
|
||||
break;
|
||||
case DailyRTVIMessageType.AUDIO_BUFFERING_STOPPED:
|
||||
if (this._options.callbacks?.onAudioBufferingStopped) {
|
||||
this._options.callbacks?.onAudioBufferingStopped()
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
getMessageTypes(): string[] {
|
||||
return [DailyRTVIMessageType.AUDIO_BUFFERING_STARTED, DailyRTVIMessageType.AUDIO_BUFFERING_STOPPED];
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -15,7 +15,7 @@
|
||||
"vite": "^6.3.5"
|
||||
},
|
||||
"dependencies": {
|
||||
"@pipecat-ai/client-js": "^0.3.5",
|
||||
"@pipecat-ai/daily-transport": "^0.3.8"
|
||||
"@pipecat-ai/client-js": "^1.0.0",
|
||||
"@pipecat-ai/daily-transport": "^1.0.0"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
*/
|
||||
|
||||
/**
|
||||
* RTVI Client Implementation
|
||||
* Pipecat Client Implementation
|
||||
*
|
||||
* This client connects to an RTVI-compatible bot server using WebRTC (via Daily).
|
||||
* It handles audio/video streaming and manages the connection lifecycle.
|
||||
@@ -16,78 +16,9 @@
|
||||
* - Browser with WebRTC support
|
||||
*/
|
||||
|
||||
import {
|
||||
LogLevel,
|
||||
RTVIClient,
|
||||
RTVIClientHelper,
|
||||
RTVIEvent,
|
||||
} from '@pipecat-ai/client-js';
|
||||
import { LogLevel, PipecatClient, RTVIEvent } from '@pipecat-ai/client-js';
|
||||
import { DailyTransport } from '@pipecat-ai/daily-transport';
|
||||
|
||||
class SearchResponseHelper extends RTVIClientHelper {
|
||||
constructor(contentPanel) {
|
||||
super();
|
||||
this.contentPanel = contentPanel;
|
||||
}
|
||||
|
||||
handleMessage(rtviMessage) {
|
||||
console.log('SearchResponseHelper, received message:', rtviMessage);
|
||||
if (rtviMessage.data) {
|
||||
// Clear existing content
|
||||
this.contentPanel.innerHTML = '';
|
||||
|
||||
// Create a container for all content
|
||||
const contentContainer = document.createElement('div');
|
||||
contentContainer.className = 'content-container';
|
||||
|
||||
// Add the search_result
|
||||
if (rtviMessage.data.search_result) {
|
||||
const searchResultDiv = document.createElement('div');
|
||||
searchResultDiv.className = 'search-result';
|
||||
searchResultDiv.textContent = rtviMessage.data.search_result;
|
||||
contentContainer.appendChild(searchResultDiv);
|
||||
}
|
||||
|
||||
// Add the sources
|
||||
if (rtviMessage.data.origins) {
|
||||
const sourcesDiv = document.createElement('div');
|
||||
sourcesDiv.className = 'sources';
|
||||
|
||||
const sourcesTitle = document.createElement('h3');
|
||||
sourcesTitle.className = 'sources-title';
|
||||
sourcesTitle.textContent = 'Sources:';
|
||||
sourcesDiv.appendChild(sourcesTitle);
|
||||
|
||||
rtviMessage.data.origins.forEach((origin) => {
|
||||
const sourceLink = document.createElement('a');
|
||||
sourceLink.className = 'source-link';
|
||||
sourceLink.href = origin.site_uri;
|
||||
sourceLink.target = '_blank';
|
||||
sourceLink.textContent = origin.site_title;
|
||||
sourcesDiv.appendChild(sourceLink);
|
||||
});
|
||||
|
||||
contentContainer.appendChild(sourcesDiv);
|
||||
}
|
||||
|
||||
// Add the rendered_content in an iframe
|
||||
if (rtviMessage.data.rendered_content) {
|
||||
const iframe = document.createElement('iframe');
|
||||
iframe.className = 'iframe-container';
|
||||
iframe.srcdoc = rtviMessage.data.rendered_content;
|
||||
contentContainer.appendChild(iframe);
|
||||
}
|
||||
|
||||
// Append the content container to the content panel
|
||||
this.contentPanel.appendChild(contentContainer);
|
||||
}
|
||||
}
|
||||
|
||||
getMessageTypes() {
|
||||
return ['bot-llm-search-response'];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* ChatbotClient handles the connection and media management for a real-time
|
||||
* voice and video interaction with an AI bot.
|
||||
@@ -95,7 +26,7 @@ class SearchResponseHelper extends RTVIClientHelper {
|
||||
class ChatbotClient {
|
||||
constructor() {
|
||||
// Initialize client state
|
||||
this.rtviClient = null;
|
||||
this.pcClient = null;
|
||||
this.setupDOMElements();
|
||||
this.setupEventListeners();
|
||||
}
|
||||
@@ -160,10 +91,10 @@ class ChatbotClient {
|
||||
* This is called when the bot is ready or when the transport state changes to ready
|
||||
*/
|
||||
setupMediaTracks() {
|
||||
if (!this.rtviClient) return;
|
||||
if (!this.pcClient) return;
|
||||
|
||||
// Get current tracks from the client
|
||||
const tracks = this.rtviClient.tracks();
|
||||
const tracks = this.pcClient.tracks();
|
||||
|
||||
// Set up any available bot tracks
|
||||
if (tracks.bot?.audio) {
|
||||
@@ -176,10 +107,10 @@ class ChatbotClient {
|
||||
* This handles new tracks being added during the session
|
||||
*/
|
||||
setupTrackListeners() {
|
||||
if (!this.rtviClient) return;
|
||||
if (!this.pcClient) return;
|
||||
|
||||
// Listen for new tracks starting
|
||||
this.rtviClient.on(RTVIEvent.TrackStarted, (track, participant) => {
|
||||
this.pcClient.on(RTVIEvent.TrackStarted, (track, participant) => {
|
||||
// Only handle non-local (bot) tracks
|
||||
if (!participant?.local && track.kind === 'audio') {
|
||||
this.setupAudioTrack(track);
|
||||
@@ -187,7 +118,7 @@ class ChatbotClient {
|
||||
});
|
||||
|
||||
// Listen for tracks stopping
|
||||
this.rtviClient.on(RTVIEvent.TrackStopped, (track, participant) => {
|
||||
this.pcClient.on(RTVIEvent.TrackStopped, (track, participant) => {
|
||||
this.log(
|
||||
`Track stopped event: ${track.kind} from ${
|
||||
participant?.name || 'unknown'
|
||||
@@ -213,20 +144,13 @@ class ChatbotClient {
|
||||
|
||||
/**
|
||||
* Initialize and connect to the bot
|
||||
* This sets up the RTVI client, initializes devices, and establishes the connection
|
||||
* This sets up the Pipecat client, initializes devices, and establishes the connection
|
||||
*/
|
||||
async connect() {
|
||||
try {
|
||||
// Initialize the RTVI client with a Daily WebRTC transport and our configuration
|
||||
this.rtviClient = new RTVIClient({
|
||||
// Initialize the Pipecat client with a Daily WebRTC transport and our configuration
|
||||
this.pcClient = new PipecatClient({
|
||||
transport: new DailyTransport(),
|
||||
params: {
|
||||
// The baseURL and endpoint of your bot server that the client will connect to
|
||||
baseUrl: 'http://localhost:7860',
|
||||
endpoints: {
|
||||
connect: '/connect',
|
||||
},
|
||||
},
|
||||
enableMic: true, // Enable microphone for user input
|
||||
enableCam: false,
|
||||
callbacks: {
|
||||
@@ -251,6 +175,8 @@ class ChatbotClient {
|
||||
this.setupMediaTracks();
|
||||
}
|
||||
},
|
||||
// Handle search response events
|
||||
onBotLlmSearchResponse: this.handleSearchResponse.bind(this),
|
||||
// Handle bot connection events
|
||||
onBotConnected: (participant) => {
|
||||
this.log(`Bot connected: ${JSON.stringify(participant)}`);
|
||||
@@ -281,22 +207,22 @@ class ChatbotClient {
|
||||
},
|
||||
},
|
||||
});
|
||||
//this.rtviClient.setLogLevel(LogLevel.DEBUG)
|
||||
this.rtviClient.registerHelper(
|
||||
'llm',
|
||||
new SearchResponseHelper(this.searchResultContainer)
|
||||
);
|
||||
|
||||
//this.pcClient.setLogLevel(LogLevel.DEBUG)
|
||||
|
||||
// Set up listeners for media track events
|
||||
this.setupTrackListeners();
|
||||
|
||||
// Initialize audio devices
|
||||
this.log('Initializing devices...');
|
||||
await this.rtviClient.initDevices();
|
||||
await this.pcClient.initDevices();
|
||||
|
||||
// Connect to the bot
|
||||
this.log('Connecting to bot...');
|
||||
await this.rtviClient.connect();
|
||||
await this.pcClient.connect({
|
||||
// The baseURL and endpoint of your bot server that the client will connect to
|
||||
endpoint: 'http://localhost:7860/connect',
|
||||
});
|
||||
|
||||
this.log('Connection complete');
|
||||
} catch (error) {
|
||||
@@ -306,9 +232,9 @@ class ChatbotClient {
|
||||
this.updateStatus('Error');
|
||||
|
||||
// Clean up if there's an error
|
||||
if (this.rtviClient) {
|
||||
if (this.pcClient) {
|
||||
try {
|
||||
await this.rtviClient.disconnect();
|
||||
await this.pcClient.disconnect();
|
||||
} catch (disconnectError) {
|
||||
this.log(`Error during disconnect: ${disconnectError.message}`);
|
||||
}
|
||||
@@ -320,11 +246,11 @@ class ChatbotClient {
|
||||
* Disconnect from the bot and clean up media resources
|
||||
*/
|
||||
async disconnect() {
|
||||
if (this.rtviClient) {
|
||||
if (this.pcClient) {
|
||||
try {
|
||||
// Disconnect the RTVI client
|
||||
await this.rtviClient.disconnect();
|
||||
this.rtviClient = null;
|
||||
// Disconnect the Pipecat client
|
||||
await this.pcClient.disconnect();
|
||||
this.pcClient = null;
|
||||
|
||||
// Clean up audio
|
||||
if (this.botAudio.srcObject) {
|
||||
@@ -339,6 +265,57 @@ class ChatbotClient {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
handleSearchResponse(response) {
|
||||
console.log('SearchResponseHelper, received message:', response);
|
||||
// Clear existing content
|
||||
this.searchResultContainer.innerHTML = '';
|
||||
|
||||
// Create a container for all content
|
||||
const contentContainer = document.createElement('div');
|
||||
contentContainer.className = 'content-container';
|
||||
|
||||
// Add the search_result
|
||||
if (response.search_result) {
|
||||
const searchResultDiv = document.createElement('div');
|
||||
searchResultDiv.className = 'search-result';
|
||||
searchResultDiv.textContent = response.search_result;
|
||||
contentContainer.appendChild(searchResultDiv);
|
||||
}
|
||||
|
||||
// Add the sources
|
||||
if (response.origins) {
|
||||
const sourcesDiv = document.createElement('div');
|
||||
sourcesDiv.className = 'sources';
|
||||
|
||||
const sourcesTitle = document.createElement('h3');
|
||||
sourcesTitle.className = 'sources-title';
|
||||
sourcesTitle.textContent = 'Sources:';
|
||||
sourcesDiv.appendChild(sourcesTitle);
|
||||
|
||||
response.origins.forEach((origin) => {
|
||||
const sourceLink = document.createElement('a');
|
||||
sourceLink.className = 'source-link';
|
||||
sourceLink.href = origin.site_uri;
|
||||
sourceLink.target = '_blank';
|
||||
sourceLink.textContent = origin.site_title;
|
||||
sourcesDiv.appendChild(sourceLink);
|
||||
});
|
||||
|
||||
contentContainer.appendChild(sourcesDiv);
|
||||
}
|
||||
|
||||
// Add the rendered_content in an iframe
|
||||
if (response.rendered_content) {
|
||||
const iframe = document.createElement('iframe');
|
||||
iframe.className = 'iframe-container';
|
||||
iframe.srcdoc = response.rendered_content;
|
||||
contentContainer.appendChild(iframe);
|
||||
}
|
||||
|
||||
// Append the content container to the content panel
|
||||
this.searchResultContainer.appendChild(contentContainer);
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize the client when the page loads
|
||||
|
||||
@@ -24,6 +24,7 @@ from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.llm_service import FunctionCallParams
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.utils.tracing.setup import setup_tracing
|
||||
|
||||
@@ -61,7 +62,7 @@ transport_params = {
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"twilio": lambda: TransportParams(
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
fastapi
|
||||
uvicorn
|
||||
python-dotenv
|
||||
pipecat-ai[webrtc,silero,cartesia,deepgram,openai,tracing]
|
||||
pipecat-ai[daily,webrtc,silero,cartesia,deepgram,openai,tracing]
|
||||
pipecat-ai-small-webrtc-prebuilt
|
||||
opentelemetry-exporter-otlp-proto-grpc
|
||||
@@ -26,7 +26,7 @@ Create a `.env` file with your API keys to enable tracing:
|
||||
```
|
||||
ENABLE_TRACING=true
|
||||
# OTLP endpoint for Langfuse
|
||||
OTEL_EXPORTER_OTLP_ENDPOINT=http://cloud.langfuse.com/api/public/otel
|
||||
OTEL_EXPORTER_OTLP_ENDPOINT=https://cloud.langfuse.com/api/public/otel
|
||||
OTEL_EXPORTER_OTLP_HEADERS=Authorization=Basic%20<base64_encoded_api_key>
|
||||
# Set to any value to enable console output for debugging
|
||||
# OTEL_CONSOLE_EXPORT=true
|
||||
|
||||
@@ -24,6 +24,7 @@ from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.llm_service import FunctionCallParams
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.utils.tracing.setup import setup_tracing
|
||||
|
||||
@@ -58,7 +59,7 @@ transport_params = {
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"twilio": lambda: TransportParams(
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
fastapi
|
||||
uvicorn
|
||||
python-dotenv
|
||||
pipecat-ai[webrtc,silero,cartesia,deepgram,openai,tracing]
|
||||
pipecat-ai[daily,webrtc,silero,cartesia,deepgram,openai,tracing]
|
||||
pipecat-ai-small-webrtc-prebuilt
|
||||
opentelemetry-exporter-otlp-proto-http
|
||||
File diff suppressed because it is too large
Load Diff
@@ -18,7 +18,7 @@
|
||||
"vite": "^6.3.5"
|
||||
},
|
||||
"dependencies": {
|
||||
"@pipecat-ai/client-js": "^0.3.2",
|
||||
"@pipecat-ai/small-webrtc-transport": "^0.0.2"
|
||||
"@pipecat-ai/client-js": "^1.0.0",
|
||||
"@pipecat-ai/small-webrtc-transport": "^1.0.0"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,217 +1,236 @@
|
||||
import { SmallWebRTCTransport } from '@pipecat-ai/small-webrtc-transport';
|
||||
import {
|
||||
SmallWebRTCTransport
|
||||
} from "@pipecat-ai/small-webrtc-transport";
|
||||
import {Participant, RTVIClient, RTVIClientOptions, Transport} from "@pipecat-ai/client-js";
|
||||
BotLLMTextData,
|
||||
Participant,
|
||||
PipecatClient,
|
||||
PipecatClientOptions,
|
||||
TranscriptData,
|
||||
TransportState,
|
||||
} from '@pipecat-ai/client-js';
|
||||
|
||||
class WebRTCApp {
|
||||
private declare connectBtn: HTMLButtonElement;
|
||||
private declare disconnectBtn: HTMLButtonElement;
|
||||
private declare muteBtn: HTMLButtonElement;
|
||||
|
||||
private declare connectBtn: HTMLButtonElement;
|
||||
private declare disconnectBtn: HTMLButtonElement;
|
||||
private declare muteBtn: HTMLButtonElement;
|
||||
private declare audioInput: HTMLSelectElement;
|
||||
private declare videoInput: HTMLSelectElement;
|
||||
private declare audioCodec: HTMLSelectElement;
|
||||
private declare videoCodec: HTMLSelectElement;
|
||||
|
||||
private declare audioInput: HTMLSelectElement;
|
||||
private declare videoInput: HTMLSelectElement;
|
||||
private declare audioCodec: HTMLSelectElement;
|
||||
private declare videoCodec: HTMLSelectElement;
|
||||
private declare videoElement: HTMLVideoElement;
|
||||
private declare audioElement: HTMLAudioElement;
|
||||
|
||||
private declare videoElement: HTMLVideoElement;
|
||||
private declare audioElement: HTMLAudioElement;
|
||||
private debugLog: HTMLElement | null = null;
|
||||
private statusSpan: HTMLElement | null = null;
|
||||
|
||||
private debugLog: HTMLElement | null = null;
|
||||
private statusSpan: HTMLElement | null = null;
|
||||
private declare smallWebRTCTransport: SmallWebRTCTransport;
|
||||
private declare pcClient: PipecatClient;
|
||||
|
||||
private declare smallWebRTCTransport: SmallWebRTCTransport;
|
||||
private declare rtviClient: RTVIClient;
|
||||
constructor() {
|
||||
this.setupDOMElements();
|
||||
this.setupDOMEventListeners();
|
||||
this.initializePipecatClient();
|
||||
void this.populateDevices();
|
||||
}
|
||||
|
||||
constructor() {
|
||||
this.setupDOMElements();
|
||||
this.setupDOMEventListeners();
|
||||
this.initializeRTVIClient()
|
||||
void this.populateDevices();
|
||||
private initializePipecatClient(): void {
|
||||
const opts: PipecatClientOptions = {
|
||||
transport: new SmallWebRTCTransport({ connectionUrl: '/api/offer' }),
|
||||
enableMic: true,
|
||||
enableCam: true,
|
||||
callbacks: {
|
||||
onTransportStateChanged: (state: TransportState) => {
|
||||
this.log(`Transport state: ${state}`);
|
||||
},
|
||||
onConnected: () => {
|
||||
this.onConnectedHandler();
|
||||
},
|
||||
onBotReady: () => {
|
||||
this.log('Bot is ready.');
|
||||
},
|
||||
onDisconnected: () => {
|
||||
this.onDisconnectedHandler();
|
||||
},
|
||||
onUserStartedSpeaking: () => {
|
||||
this.log('User started speaking.');
|
||||
},
|
||||
onUserStoppedSpeaking: () => {
|
||||
this.log('User stopped speaking.');
|
||||
},
|
||||
onBotStartedSpeaking: () => {
|
||||
this.log('Bot started speaking.');
|
||||
},
|
||||
onBotStoppedSpeaking: () => {
|
||||
this.log('Bot stopped speaking.');
|
||||
},
|
||||
onUserTranscript: (transcript: TranscriptData) => {
|
||||
if (transcript.final) {
|
||||
this.log(`User transcript: ${transcript.text}`);
|
||||
}
|
||||
},
|
||||
onBotTranscript: (data: BotLLMTextData) => {
|
||||
this.log(`Bot transcript: ${data.text}`);
|
||||
},
|
||||
onTrackStarted: (
|
||||
track: MediaStreamTrack,
|
||||
participant?: Participant
|
||||
) => {
|
||||
if (participant?.local) {
|
||||
return;
|
||||
}
|
||||
this.onBotTrackStarted(track);
|
||||
},
|
||||
onServerMessage: (msg: unknown) => {
|
||||
this.log(`Server message: ${msg}`);
|
||||
},
|
||||
},
|
||||
};
|
||||
this.pcClient = new PipecatClient(opts);
|
||||
this.smallWebRTCTransport = this.pcClient.transport as SmallWebRTCTransport;
|
||||
}
|
||||
|
||||
private setupDOMElements(): void {
|
||||
this.connectBtn = document.getElementById(
|
||||
'connect-btn'
|
||||
) as HTMLButtonElement;
|
||||
this.disconnectBtn = document.getElementById(
|
||||
'disconnect-btn'
|
||||
) as HTMLButtonElement;
|
||||
this.muteBtn = document.getElementById('mute-btn') as HTMLButtonElement;
|
||||
|
||||
this.audioInput = document.getElementById(
|
||||
'audio-input'
|
||||
) as HTMLSelectElement;
|
||||
this.videoInput = document.getElementById(
|
||||
'video-input'
|
||||
) as HTMLSelectElement;
|
||||
this.audioCodec = document.getElementById(
|
||||
'audio-codec'
|
||||
) as HTMLSelectElement;
|
||||
this.videoCodec = document.getElementById(
|
||||
'video-codec'
|
||||
) as HTMLSelectElement;
|
||||
|
||||
this.videoElement = document.getElementById(
|
||||
'bot-video'
|
||||
) as HTMLVideoElement;
|
||||
this.audioElement = document.getElementById(
|
||||
'bot-audio'
|
||||
) as HTMLAudioElement;
|
||||
|
||||
this.debugLog = document.getElementById('debug-log');
|
||||
this.statusSpan = document.getElementById('connection-status');
|
||||
}
|
||||
|
||||
private setupDOMEventListeners(): void {
|
||||
this.connectBtn.addEventListener('click', () => this.start());
|
||||
this.disconnectBtn.addEventListener('click', () => this.stop());
|
||||
this.audioInput.addEventListener('change', (e) => {
|
||||
// @ts-ignore
|
||||
let audioDevice = e.target?.value;
|
||||
this.pcClient.updateMic(audioDevice);
|
||||
});
|
||||
this.videoInput.addEventListener('change', (e) => {
|
||||
// @ts-ignore
|
||||
let videoDevice = e.target?.value;
|
||||
this.pcClient.updateCam(videoDevice);
|
||||
});
|
||||
this.muteBtn.addEventListener('click', () => {
|
||||
let isCamEnabled = this.pcClient.isCamEnabled;
|
||||
this.pcClient.enableCam(!isCamEnabled);
|
||||
this.muteBtn.textContent = isCamEnabled ? '📵' : '📷';
|
||||
});
|
||||
}
|
||||
|
||||
private log(message: string): void {
|
||||
if (!this.debugLog) return;
|
||||
const entry = document.createElement('div');
|
||||
entry.textContent = `${new Date().toISOString()} - ${message}`;
|
||||
if (message.startsWith('User: ')) {
|
||||
entry.style.color = '#2196F3';
|
||||
} else if (message.startsWith('Bot: ')) {
|
||||
entry.style.color = '#4CAF50';
|
||||
}
|
||||
this.debugLog.appendChild(entry);
|
||||
this.debugLog.scrollTop = this.debugLog.scrollHeight;
|
||||
}
|
||||
|
||||
private initializeRTVIClient(): void {
|
||||
const transport = new SmallWebRTCTransport();
|
||||
const RTVIConfig: RTVIClientOptions = {
|
||||
params: {
|
||||
baseUrl: "/api/offer"
|
||||
},
|
||||
transport: transport as Transport,
|
||||
enableMic: true,
|
||||
enableCam: true,
|
||||
callbacks: {
|
||||
onTransportStateChanged: (state) => {
|
||||
this.log(`Transport state: ${state}`)
|
||||
},
|
||||
onConnected: () => {
|
||||
this.onConnectedHandler()
|
||||
},
|
||||
onBotReady: () => {
|
||||
this.log("Bot is ready.")
|
||||
},
|
||||
onDisconnected: () => {
|
||||
this.onDisconnectedHandler()
|
||||
},
|
||||
onUserStartedSpeaking: () => {
|
||||
this.log("User started speaking.")
|
||||
},
|
||||
onUserStoppedSpeaking: () => {
|
||||
this.log("User stopped speaking.")
|
||||
},
|
||||
onBotStartedSpeaking: () => {
|
||||
this.log("Bot started speaking.")
|
||||
},
|
||||
onBotStoppedSpeaking: () => {
|
||||
this.log("Bot stopped speaking.")
|
||||
},
|
||||
onUserTranscript: (transcript) => {
|
||||
if (transcript.final) {
|
||||
this.log(`User transcript: ${transcript.text}`)
|
||||
}
|
||||
},
|
||||
onBotTranscript: (transcript) => {
|
||||
this.log(`Bot transcript: ${transcript.text}`)
|
||||
},
|
||||
onTrackStarted: (track: MediaStreamTrack, participant?: Participant) => {
|
||||
if (participant?.local) {
|
||||
return
|
||||
}
|
||||
this.onBotTrackStarted(track)
|
||||
},
|
||||
onServerMessage: (msg) => {
|
||||
this.log(`Server message: ${msg}`)
|
||||
}
|
||||
},
|
||||
}
|
||||
RTVIConfig.customConnectHandler = () => Promise.resolve();
|
||||
this.rtviClient = new RTVIClient(RTVIConfig);
|
||||
this.smallWebRTCTransport = transport
|
||||
private clearAllLogs() {
|
||||
this.debugLog!.innerText = '';
|
||||
}
|
||||
|
||||
private updateStatus(status: string): void {
|
||||
if (this.statusSpan) {
|
||||
this.statusSpan.textContent = status;
|
||||
}
|
||||
this.log(`Status: ${status}`);
|
||||
}
|
||||
|
||||
private setupDOMElements(): void {
|
||||
this.connectBtn = document.getElementById('connect-btn') as HTMLButtonElement;
|
||||
this.disconnectBtn = document.getElementById('disconnect-btn') as HTMLButtonElement;
|
||||
this.muteBtn = document.getElementById('mute-btn') as HTMLButtonElement;
|
||||
private onConnectedHandler() {
|
||||
this.updateStatus('Connected');
|
||||
if (this.connectBtn) this.connectBtn.disabled = true;
|
||||
if (this.disconnectBtn) this.disconnectBtn.disabled = false;
|
||||
}
|
||||
|
||||
this.audioInput = document.getElementById('audio-input') as HTMLSelectElement;
|
||||
this.videoInput = document.getElementById('video-input') as HTMLSelectElement;
|
||||
this.audioCodec = document.getElementById('audio-codec') as HTMLSelectElement;
|
||||
this.videoCodec = document.getElementById('video-codec') as HTMLSelectElement;
|
||||
private onDisconnectedHandler() {
|
||||
this.updateStatus('Disconnected');
|
||||
if (this.connectBtn) this.connectBtn.disabled = false;
|
||||
if (this.disconnectBtn) this.disconnectBtn.disabled = true;
|
||||
}
|
||||
|
||||
this.videoElement = document.getElementById('bot-video') as HTMLVideoElement;
|
||||
this.audioElement = document.getElementById('bot-audio') as HTMLAudioElement;
|
||||
|
||||
this.debugLog = document.getElementById('debug-log');
|
||||
this.statusSpan = document.getElementById('connection-status');
|
||||
private onBotTrackStarted(track: MediaStreamTrack) {
|
||||
if (track.kind === 'video') {
|
||||
this.videoElement.srcObject = new MediaStream([track]);
|
||||
} else {
|
||||
this.audioElement.srcObject = new MediaStream([track]);
|
||||
}
|
||||
}
|
||||
|
||||
private setupDOMEventListeners(): void {
|
||||
this.connectBtn.addEventListener("click", () => this.start());
|
||||
this.disconnectBtn.addEventListener("click", () => this.stop());
|
||||
this.audioInput.addEventListener("change", (e) => {
|
||||
// @ts-ignore
|
||||
let audioDevice = e.target?.value
|
||||
this.rtviClient.updateMic(audioDevice)
|
||||
})
|
||||
this.videoInput.addEventListener("change", (e) => {
|
||||
// @ts-ignore
|
||||
let videoDevice = e.target?.value
|
||||
this.rtviClient.updateCam(videoDevice)
|
||||
})
|
||||
this.muteBtn.addEventListener('click', () => {
|
||||
let isCamEnabled = this.rtviClient.isCamEnabled
|
||||
this.rtviClient.enableCam(!isCamEnabled)
|
||||
this.muteBtn.textContent = isCamEnabled ? '📵' : '📷';
|
||||
});
|
||||
private async populateDevices(): Promise<void> {
|
||||
const populateSelect = (
|
||||
select: HTMLSelectElement,
|
||||
devices: MediaDeviceInfo[]
|
||||
): void => {
|
||||
let counter = 1;
|
||||
devices.forEach((device) => {
|
||||
const option = document.createElement('option');
|
||||
option.value = device.deviceId;
|
||||
option.text = device.label || 'Device #' + counter;
|
||||
select.appendChild(option);
|
||||
counter += 1;
|
||||
});
|
||||
};
|
||||
|
||||
try {
|
||||
const audioDevices = await this.pcClient.getAllMics();
|
||||
populateSelect(this.audioInput, audioDevices);
|
||||
const videoDevices = await this.pcClient.getAllCams();
|
||||
populateSelect(this.videoInput, videoDevices);
|
||||
} catch (e) {
|
||||
alert(e);
|
||||
}
|
||||
}
|
||||
|
||||
private log(message: string): void {
|
||||
if (!this.debugLog) return;
|
||||
const entry = document.createElement('div');
|
||||
entry.textContent = `${new Date().toISOString()} - ${message}`;
|
||||
if (message.startsWith('User: ')) {
|
||||
entry.style.color = '#2196F3';
|
||||
} else if (message.startsWith('Bot: ')) {
|
||||
entry.style.color = '#4CAF50';
|
||||
}
|
||||
this.debugLog.appendChild(entry);
|
||||
this.debugLog.scrollTop = this.debugLog.scrollHeight;
|
||||
private async start(): Promise<void> {
|
||||
this.clearAllLogs();
|
||||
|
||||
this.connectBtn.disabled = true;
|
||||
this.updateStatus('Connecting');
|
||||
|
||||
this.smallWebRTCTransport.setAudioCodec(this.audioCodec.value);
|
||||
this.smallWebRTCTransport.setVideoCodec(this.videoCodec.value);
|
||||
try {
|
||||
await this.pcClient.connect();
|
||||
} catch (e) {
|
||||
console.log(`Failed to connect ${e}`);
|
||||
this.stop();
|
||||
}
|
||||
}
|
||||
|
||||
private clearAllLogs() {
|
||||
this.debugLog!.innerText = ''
|
||||
}
|
||||
|
||||
private updateStatus(status: string): void {
|
||||
if (this.statusSpan) {
|
||||
this.statusSpan.textContent = status;
|
||||
}
|
||||
this.log(`Status: ${status}`);
|
||||
}
|
||||
|
||||
private onConnectedHandler() {
|
||||
this.updateStatus('Connected');
|
||||
if (this.connectBtn) this.connectBtn.disabled = true;
|
||||
if (this.disconnectBtn) this.disconnectBtn.disabled = false;
|
||||
}
|
||||
|
||||
private onDisconnectedHandler() {
|
||||
this.updateStatus('Disconnected');
|
||||
if (this.connectBtn) this.connectBtn.disabled = false;
|
||||
if (this.disconnectBtn) this.disconnectBtn.disabled = true;
|
||||
}
|
||||
|
||||
private onBotTrackStarted(track: MediaStreamTrack) {
|
||||
if (track.kind === 'video') {
|
||||
this.videoElement.srcObject = new MediaStream([track]);
|
||||
} else {
|
||||
this.audioElement.srcObject = new MediaStream([track]);
|
||||
}
|
||||
}
|
||||
|
||||
private async populateDevices(): Promise<void> {
|
||||
const populateSelect = (select: HTMLSelectElement, devices: MediaDeviceInfo[]): void => {
|
||||
let counter = 1;
|
||||
devices.forEach((device) => {
|
||||
const option = document.createElement('option');
|
||||
option.value = device.deviceId;
|
||||
option.text = device.label || ('Device #' + counter);
|
||||
select.appendChild(option);
|
||||
counter += 1;
|
||||
});
|
||||
};
|
||||
|
||||
try {
|
||||
const audioDevices = await this.rtviClient.getAllMics();
|
||||
populateSelect(this.audioInput, audioDevices);
|
||||
const videoDevices = await this.rtviClient.getAllCams();
|
||||
populateSelect(this.videoInput, videoDevices);
|
||||
} catch (e) {
|
||||
alert(e);
|
||||
}
|
||||
}
|
||||
|
||||
private async start(): Promise<void> {
|
||||
this.clearAllLogs()
|
||||
|
||||
this.connectBtn.disabled = true;
|
||||
this.updateStatus("Connecting")
|
||||
|
||||
this.smallWebRTCTransport.setAudioCodec(this.audioCodec.value)
|
||||
this.smallWebRTCTransport.setVideoCodec(this.videoCodec.value)
|
||||
try {
|
||||
await this.rtviClient.connect()
|
||||
} catch (e) {
|
||||
console.log(`Failed to connect ${e}`)
|
||||
this.stop()
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private stop(): void {
|
||||
void this.rtviClient.disconnect()
|
||||
}
|
||||
private stop(): void {
|
||||
void this.pcClient.disconnect();
|
||||
}
|
||||
}
|
||||
|
||||
// Create the WebRTCConnection instance
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
pipecat-ai[daily,elevenlabs,openai,silero]
|
||||
pipecat-ai[daily,cartesia,openai,silero]
|
||||
fastapi==0.115.6
|
||||
uvicorn
|
||||
python-dotenv
|
||||
|
||||
@@ -15,7 +15,6 @@
|
||||
90031FC22C616EE900408370 /* SimpleChatbotUITests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 90031FC12C616EE900408370 /* SimpleChatbotUITests.swift */; };
|
||||
90031FC42C616EE900408370 /* SimpleChatbotUITestsLaunchTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 90031FC32C616EE900408370 /* SimpleChatbotUITestsLaunchTests.swift */; };
|
||||
90031FDC2C6D5DD700408370 /* ToastModifier.swift in Sources */ = {isa = PBXBuildFile; fileRef = 90031FDB2C6D5DD700408370 /* ToastModifier.swift */; };
|
||||
907C98842D37E6AF0079441F /* PipecatClientIOSDaily in Frameworks */ = {isa = PBXBuildFile; productRef = 907C98832D37E6AF0079441F /* PipecatClientIOSDaily */; };
|
||||
90ABB98E2C735ED6000D9CC7 /* MeetingView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 90ABB98D2C735ED6000D9CC7 /* MeetingView.swift */; };
|
||||
90ABB9902C736A8B000D9CC7 /* WaveformView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 90ABB98F2C736A8B000D9CC7 /* WaveformView.swift */; };
|
||||
90ABB9932C73820D000D9CC7 /* MicrophoneView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 90ABB9922C73820D000D9CC7 /* MicrophoneView.swift */; };
|
||||
@@ -25,6 +24,8 @@
|
||||
90ABB9A32C74E1CE000D9CC7 /* SettingsView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 90ABB9A22C74E1CE000D9CC7 /* SettingsView.swift */; };
|
||||
90ABB9A62C74EA8A000D9CC7 /* SettingsPreference.swift in Sources */ = {isa = PBXBuildFile; fileRef = 90ABB9A52C74EA8A000D9CC7 /* SettingsPreference.swift */; };
|
||||
90ABB9A82C74EAB1000D9CC7 /* SettingsManager.swift in Sources */ = {isa = PBXBuildFile; fileRef = 90ABB9A72C74EAB1000D9CC7 /* SettingsManager.swift */; };
|
||||
90CC98B02E158093003C2706 /* PipecatClientIOSDaily in Frameworks */ = {isa = PBXBuildFile; productRef = 90CC98AF2E158093003C2706 /* PipecatClientIOSDaily */; };
|
||||
90CC98B62E15820B003C2706 /* PipecatClientIOSDaily in Frameworks */ = {isa = PBXBuildFile; productRef = 90CC98B52E15820B003C2706 /* PipecatClientIOSDaily */; };
|
||||
/* End PBXBuildFile section */
|
||||
|
||||
/* Begin PBXContainerItemProxy section */
|
||||
@@ -73,7 +74,8 @@
|
||||
isa = PBXFrameworksBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
907C98842D37E6AF0079441F /* PipecatClientIOSDaily in Frameworks */,
|
||||
90CC98B62E15820B003C2706 /* PipecatClientIOSDaily in Frameworks */,
|
||||
90CC98B02E158093003C2706 /* PipecatClientIOSDaily in Frameworks */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
@@ -218,7 +220,8 @@
|
||||
);
|
||||
name = SimpleChatbot;
|
||||
packageProductDependencies = (
|
||||
907C98832D37E6AF0079441F /* PipecatClientIOSDaily */,
|
||||
90CC98AF2E158093003C2706 /* PipecatClientIOSDaily */,
|
||||
90CC98B52E15820B003C2706 /* PipecatClientIOSDaily */,
|
||||
);
|
||||
productName = SimpleChatbot;
|
||||
productReference = 90031FA32C616EE700408370 /* SimpleChatbot.app */;
|
||||
@@ -293,7 +296,7 @@
|
||||
);
|
||||
mainGroup = 90031F9A2C616EE700408370;
|
||||
packageReferences = (
|
||||
907C98822D37E6AF0079441F /* XCRemoteSwiftPackageReference "pipecat-client-ios-daily" */,
|
||||
90CC98B42E15820B003C2706 /* XCRemoteSwiftPackageReference "pipecat-client-ios-daily" */,
|
||||
);
|
||||
productRefGroup = 90031FA42C616EE700408370 /* Products */;
|
||||
projectDirPath = "";
|
||||
@@ -682,20 +685,24 @@
|
||||
/* End XCConfigurationList section */
|
||||
|
||||
/* Begin XCRemoteSwiftPackageReference section */
|
||||
907C98822D37E6AF0079441F /* XCRemoteSwiftPackageReference "pipecat-client-ios-daily" */ = {
|
||||
90CC98B42E15820B003C2706 /* XCRemoteSwiftPackageReference "pipecat-client-ios-daily" */ = {
|
||||
isa = XCRemoteSwiftPackageReference;
|
||||
repositoryURL = "https://github.com/pipecat-ai/pipecat-client-ios-daily/";
|
||||
requirement = {
|
||||
kind = upToNextMajorVersion;
|
||||
minimumVersion = 0.3.2;
|
||||
minimumVersion = 0.3.6;
|
||||
};
|
||||
};
|
||||
/* End XCRemoteSwiftPackageReference section */
|
||||
|
||||
/* Begin XCSwiftPackageProductDependency section */
|
||||
907C98832D37E6AF0079441F /* PipecatClientIOSDaily */ = {
|
||||
90CC98AF2E158093003C2706 /* PipecatClientIOSDaily */ = {
|
||||
isa = XCSwiftPackageProductDependency;
|
||||
package = 907C98822D37E6AF0079441F /* XCRemoteSwiftPackageReference "pipecat-client-ios-daily" */;
|
||||
productName = PipecatClientIOSDaily;
|
||||
};
|
||||
90CC98B52E15820B003C2706 /* PipecatClientIOSDaily */ = {
|
||||
isa = XCSwiftPackageProductDependency;
|
||||
package = 90CC98B42E15820B003C2706 /* XCRemoteSwiftPackageReference "pipecat-client-ios-daily" */;
|
||||
productName = PipecatClientIOSDaily;
|
||||
};
|
||||
/* End XCSwiftPackageProductDependency section */
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
{
|
||||
"originHash" : "cc17f08b06def9570d775e9c6f7a8dc10d1588b98127e977c47d052abac659b7",
|
||||
"pins" : [
|
||||
{
|
||||
"identity" : "daily-client-ios",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/daily-co/daily-client-ios.git",
|
||||
"state" : {
|
||||
"revision" : "15804ce495780da3ec2d05ab99736315f7bfbd24",
|
||||
"version" : "0.28.0"
|
||||
"revision" : "431938db25e5807120e89e2dc5bab1c076729f59",
|
||||
"version" : "0.31.0"
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -14,8 +15,8 @@
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/pipecat-ai/pipecat-client-ios.git",
|
||||
"state" : {
|
||||
"revision" : "c679512e367002a1a67da85d503fec72d9b17191",
|
||||
"version" : "0.3.2"
|
||||
"revision" : "f92b5e68e56a8311f7d8ead68a7a5674843cbc40",
|
||||
"version" : "0.3.6"
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -23,10 +24,10 @@
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/pipecat-ai/pipecat-client-ios-daily/",
|
||||
"state" : {
|
||||
"revision" : "a337fe6642c52376d2f90eafcb965f5be772ce72",
|
||||
"version" : "0.3.2"
|
||||
"revision" : "8f494da903192c22c367ecf9e51248c9b651fbc6",
|
||||
"version" : "0.3.6"
|
||||
}
|
||||
}
|
||||
],
|
||||
"version" : 2
|
||||
"version" : 3
|
||||
}
|
||||
|
||||
@@ -78,10 +78,11 @@ class CallContainerModel: ObservableObject {
|
||||
self.saveCredentials(backendURL: baseUrl)
|
||||
}
|
||||
|
||||
@MainActor
|
||||
func disconnect() {
|
||||
self.rtviClientIOS?.disconnect(completion: nil)
|
||||
self.rtviClientIOS?.release()
|
||||
Task { @MainActor in
|
||||
try await self.rtviClientIOS?.disconnect()
|
||||
self.rtviClientIOS?.release()
|
||||
}
|
||||
}
|
||||
|
||||
func showError(message: String) {
|
||||
|
||||
@@ -1,40 +1,51 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>AI Chatbot</title>
|
||||
</head>
|
||||
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>AI Chatbot</title>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<div class="container">
|
||||
<div class="status-bar">
|
||||
<div class="status">
|
||||
Status: <span id="connection-status">Disconnected</span>
|
||||
</div>
|
||||
<div class="controls">
|
||||
<button id="connect-btn">Connect</button>
|
||||
<button id="disconnect-btn" disabled>Disconnect</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="main-content">
|
||||
<div class="bot-container">
|
||||
<div id="bot-video-container">
|
||||
<body>
|
||||
<div class="container">
|
||||
<div class="status-bar">
|
||||
<div class="status">
|
||||
Status: <span id="connection-status">Disconnected</span>
|
||||
</div>
|
||||
<audio id="bot-audio" autoplay></audio>
|
||||
<div class="controls">
|
||||
<button id="connect-btn">Connect</button>
|
||||
<button id="disconnect-btn" disabled>Disconnect</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="main-content">
|
||||
<div class="bot-container">
|
||||
<div id="bot-video-container"></div>
|
||||
<audio id="bot-audio" autoplay></audio>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="device-bar">
|
||||
<div class="device-controls">
|
||||
<select id="device-selector"></select>
|
||||
<button id="mic-toggle-btn">Unmute Mic</button>
|
||||
</div>
|
||||
<div class="text-input-container">
|
||||
<input
|
||||
type="text"
|
||||
id="text-input"
|
||||
placeholder="Type your message..." />
|
||||
<button id="send-text-btn" disabled>Send</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="debug-panel">
|
||||
<h3>Debug Info</h3>
|
||||
<div id="debug-log"></div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="debug-panel">
|
||||
<h3>Debug Info</h3>
|
||||
<div id="debug-log"></div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script type="module" src="/src/app.js"></script>
|
||||
<link rel="stylesheet" href="/src/style.css">
|
||||
</body>
|
||||
|
||||
</html>
|
||||
<script type="module" src="/src/app.js"></script>
|
||||
<link rel="stylesheet" href="/src/style.css" />
|
||||
</body>
|
||||
</html>
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -15,7 +15,7 @@
|
||||
"vite": "^6.3.5"
|
||||
},
|
||||
"dependencies": {
|
||||
"@pipecat-ai/client-js": "^0.3.5",
|
||||
"@pipecat-ai/daily-transport": "^0.3.8"
|
||||
"@pipecat-ai/client-js": "^1.0.0",
|
||||
"@pipecat-ai/daily-transport": "^1.0.0"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
*/
|
||||
|
||||
/**
|
||||
* RTVI Client Implementation
|
||||
* Pipecat Client Implementation
|
||||
*
|
||||
* This client connects to an RTVI-compatible bot server using WebRTC (via Daily).
|
||||
* It handles audio/video streaming and manages the connection lifecycle.
|
||||
@@ -16,7 +16,7 @@
|
||||
* - Browser with WebRTC support
|
||||
*/
|
||||
|
||||
import { RTVIClient, RTVIEvent } from '@pipecat-ai/client-js';
|
||||
import { PipecatClient, RTVIEvent } from '@pipecat-ai/client-js';
|
||||
import { DailyTransport } from '@pipecat-ai/daily-transport';
|
||||
|
||||
/**
|
||||
@@ -26,9 +26,8 @@ import { DailyTransport } from '@pipecat-ai/daily-transport';
|
||||
class ChatbotClient {
|
||||
constructor() {
|
||||
// Initialize client state
|
||||
this.rtviClient = null;
|
||||
this.pcClient = null;
|
||||
this.setupDOMElements();
|
||||
this.setupEventListeners();
|
||||
this.initializeClientAndTransport();
|
||||
}
|
||||
|
||||
@@ -42,6 +41,9 @@ class ChatbotClient {
|
||||
this.statusSpan = document.getElementById('connection-status');
|
||||
this.debugLog = document.getElementById('debug-log');
|
||||
this.botVideoContainer = document.getElementById('bot-video-container');
|
||||
this.deviceSelector = document.getElementById('device-selector');
|
||||
this.micToggleBtn = document.getElementById('mic-toggle-btn');
|
||||
this.sendTextBtn = document.getElementById('send-text-btn');
|
||||
|
||||
// Create an audio element for bot's voice output
|
||||
this.botAudio = document.createElement('audio');
|
||||
@@ -54,25 +56,78 @@ class ChatbotClient {
|
||||
* Set up event listeners for connect/disconnect buttons
|
||||
*/
|
||||
setupEventListeners() {
|
||||
this.connectBtn.addEventListener('click', () => this.connect());
|
||||
this.connectBtn.addEventListener('click', () => {
|
||||
console.log('click');
|
||||
this.connect();
|
||||
});
|
||||
this.disconnectBtn.addEventListener('click', () => this.disconnect());
|
||||
|
||||
// Populate device selector
|
||||
this.pcClient.getAllMics().then((mics) => {
|
||||
console.log('Available mics:', mics);
|
||||
mics.forEach((device) => {
|
||||
const option = document.createElement('option');
|
||||
option.value = device.deviceId;
|
||||
option.textContent = device.label || `Microphone ${device.deviceId}`;
|
||||
this.deviceSelector.appendChild(option);
|
||||
});
|
||||
});
|
||||
this.deviceSelector.addEventListener('change', (event) => {
|
||||
const selectedDeviceId = event.target.value;
|
||||
console.log('Selected device ID:', selectedDeviceId);
|
||||
this.pcClient.updateMic(selectedDeviceId);
|
||||
});
|
||||
|
||||
// Handle mic mute/unmute toggle
|
||||
const micToggleBtn = document.getElementById('mic-toggle-btn');
|
||||
|
||||
micToggleBtn.addEventListener('click', async () => {
|
||||
if (this.pcClient.state === 'disconnected') {
|
||||
await this.pcClient.initDevices();
|
||||
} else {
|
||||
this.pcClient.enableMic(!this.pcClient.isMicEnabled);
|
||||
}
|
||||
});
|
||||
|
||||
const textInput = document.getElementById('text-input');
|
||||
|
||||
const sendTextToLLM = () => {
|
||||
this.sendTextBtn.disabled = true; // Disable button to prevent multiple clicks
|
||||
const text = textInput.value.trim();
|
||||
if (text) {
|
||||
void this.pcClient.appendToContext({
|
||||
role: 'user',
|
||||
content: text,
|
||||
run_immediately: true,
|
||||
});
|
||||
}
|
||||
textInput.value = ''; // Clear the input
|
||||
this.sendTextBtn.disabled = false; // Re-enable button after sending
|
||||
};
|
||||
|
||||
this.sendTextBtn.addEventListener('click', sendTextToLLM);
|
||||
|
||||
// Also handle Enter key in the input
|
||||
textInput.addEventListener('keypress', (e) => {
|
||||
if (e.key === 'Enter') {
|
||||
sendTextToLLM();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
updateMicToggleButton(micEnabled) {
|
||||
console.log('Mic enabled:', micEnabled, this.pcClient?.isMicEnabled);
|
||||
this.micToggleBtn.textContent = micEnabled ? 'Mute Mic' : 'Unmute Mic';
|
||||
}
|
||||
/**
|
||||
* Set up the RTVI client and Daily transport
|
||||
* Set up the Pipecat client and Daily transport
|
||||
*/
|
||||
initializeClientAndTransport() {
|
||||
// Initialize the RTVI client with a DailyTransport and our configuration
|
||||
this.rtviClient = new RTVIClient({
|
||||
async initializeClientAndTransport() {
|
||||
console.log('Initializing Pipecat client and transport...');
|
||||
// Initialize the Pipecat client with a DailyTransport and our configuration
|
||||
this.pcClient = new PipecatClient({
|
||||
transport: new DailyTransport(),
|
||||
params: {
|
||||
// The baseURL and endpoint of your bot server that the client will connect to
|
||||
baseUrl: 'http://localhost:7860',
|
||||
endpoints: {
|
||||
connect: '/connect',
|
||||
},
|
||||
},
|
||||
enableMic: true, // Enable microphone for user input
|
||||
enableMic: true,
|
||||
enableCam: false,
|
||||
callbacks: {
|
||||
// Handle connection state changes
|
||||
@@ -86,7 +141,9 @@ class ChatbotClient {
|
||||
this.updateStatus('Disconnected');
|
||||
this.connectBtn.disabled = false;
|
||||
this.disconnectBtn.disabled = true;
|
||||
this.sendTextBtn.disabled = true;
|
||||
this.log('Client disconnected');
|
||||
this.updateMicToggleButton(false);
|
||||
},
|
||||
// Handle transport state changes
|
||||
onTransportStateChanged: (state) => {
|
||||
@@ -106,6 +163,7 @@ class ChatbotClient {
|
||||
onBotReady: (data) => {
|
||||
this.log(`Bot ready: ${JSON.stringify(data)}`);
|
||||
this.setupMediaTracks();
|
||||
this.sendTextBtn.disabled = false;
|
||||
},
|
||||
// Transcript events
|
||||
onUserTranscript: (data) => {
|
||||
@@ -121,14 +179,20 @@ class ChatbotClient {
|
||||
onMessageError: (error) => {
|
||||
console.log('Message error:', error);
|
||||
},
|
||||
onMicUpdated: (data) => {
|
||||
console.log('Mic updated:', data);
|
||||
this.deviceSelector.value = data.deviceId;
|
||||
},
|
||||
onError: (error) => {
|
||||
console.log('Error:', JSON.stringify(error));
|
||||
},
|
||||
},
|
||||
});
|
||||
window.client = this; // Expose client globally for debugging
|
||||
|
||||
// Set up listeners for media track events
|
||||
this.setupTrackListeners();
|
||||
this.setupEventListeners();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -163,10 +227,10 @@ class ChatbotClient {
|
||||
* This is called when the bot is ready or when the transport state changes to ready
|
||||
*/
|
||||
setupMediaTracks() {
|
||||
if (!this.rtviClient) return;
|
||||
if (!this.pcClient) return;
|
||||
|
||||
// Get current tracks from the client
|
||||
const tracks = this.rtviClient.tracks();
|
||||
const tracks = this.pcClient.tracks();
|
||||
|
||||
// Set up any available bot tracks
|
||||
if (tracks.bot?.audio) {
|
||||
@@ -182,27 +246,34 @@ class ChatbotClient {
|
||||
* This handles new tracks being added during the session
|
||||
*/
|
||||
setupTrackListeners() {
|
||||
if (!this.rtviClient) return;
|
||||
if (!this.pcClient) return;
|
||||
|
||||
// Listen for new tracks starting
|
||||
this.rtviClient.on(RTVIEvent.TrackStarted, (track, participant) => {
|
||||
// Only handle non-local (bot) tracks
|
||||
this.pcClient.on(RTVIEvent.TrackStarted, (track, participant) => {
|
||||
if (!participant?.local) {
|
||||
if (track.kind === 'audio') {
|
||||
this.setupAudioTrack(track);
|
||||
} else if (track.kind === 'video') {
|
||||
this.setupVideoTrack(track);
|
||||
}
|
||||
} else if (track.kind === 'audio') {
|
||||
console.log(`Local audio track started: `, this.pcClient.tracks());
|
||||
// If local audio track starts, update mic
|
||||
this.updateMicToggleButton(true);
|
||||
}
|
||||
});
|
||||
|
||||
// Listen for tracks stopping
|
||||
this.rtviClient.on(RTVIEvent.TrackStopped, (track, participant) => {
|
||||
this.pcClient.on(RTVIEvent.TrackStopped, (track, participant) => {
|
||||
this.log(
|
||||
`Track stopped event: ${track.kind} from ${
|
||||
participant?.name || 'unknown'
|
||||
participant ? (participant.local ? 'local' : 'bot') : 'unknown'
|
||||
}`
|
||||
);
|
||||
if (participant?.local && track.kind === 'audio') {
|
||||
// If local audio track stops, update mic toggle button
|
||||
this.updateMicToggleButton(false);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@@ -251,17 +322,16 @@ class ChatbotClient {
|
||||
|
||||
/**
|
||||
* Initialize and connect to the bot
|
||||
* This sets up the RTVI client, initializes devices, and establishes the connection
|
||||
* This sets up the Pipecat client, initializes devices, and establishes the connection
|
||||
*/
|
||||
async connect() {
|
||||
try {
|
||||
// Initialize audio/video devices
|
||||
this.log('Initializing devices...');
|
||||
await this.rtviClient.initDevices();
|
||||
|
||||
// Connect to the bot
|
||||
this.log('Connecting to bot...');
|
||||
await this.rtviClient.connect();
|
||||
await this.pcClient.connect({
|
||||
endpoint: 'http://localhost:7860/connect',
|
||||
timeout: 25000,
|
||||
});
|
||||
|
||||
this.log('Connection complete');
|
||||
} catch (error) {
|
||||
@@ -271,9 +341,9 @@ class ChatbotClient {
|
||||
this.updateStatus('Error');
|
||||
|
||||
// Clean up if there's an error
|
||||
if (this.rtviClient) {
|
||||
if (this.pcClient) {
|
||||
try {
|
||||
await this.rtviClient.disconnect();
|
||||
await this.pcClient.disconnect();
|
||||
} catch (disconnectError) {
|
||||
this.log(`Error during disconnect: ${disconnectError.message}`);
|
||||
}
|
||||
@@ -285,10 +355,10 @@ class ChatbotClient {
|
||||
* Disconnect from the bot and clean up media resources
|
||||
*/
|
||||
async disconnect() {
|
||||
if (this.rtviClient) {
|
||||
if (this.pcClient) {
|
||||
try {
|
||||
// Disconnect the RTVI client
|
||||
await this.rtviClient.disconnect();
|
||||
// Disconnect the Pipecat client
|
||||
await this.pcClient.disconnect();
|
||||
|
||||
// Clean up audio
|
||||
if (this.botAudio.srcObject) {
|
||||
|
||||
@@ -10,7 +10,8 @@ body {
|
||||
margin: 0 auto;
|
||||
}
|
||||
|
||||
.status-bar {
|
||||
.status-bar,
|
||||
.device-bar {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
@@ -20,7 +21,24 @@ body {
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
|
||||
.controls button {
|
||||
.device-bar {
|
||||
flex-direction: column;
|
||||
gap: 10px;
|
||||
}
|
||||
|
||||
.controls,
|
||||
.device-controls {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 10px; /* Adds spacing between elements */
|
||||
}
|
||||
|
||||
.device-controls {
|
||||
margin-left: auto;
|
||||
}
|
||||
|
||||
.controls button,
|
||||
.device-controls button {
|
||||
padding: 8px 16px;
|
||||
margin-left: 10px;
|
||||
border: none;
|
||||
@@ -28,6 +46,56 @@ body {
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
#bot-selector,
|
||||
#device-selector {
|
||||
padding: 8px 16px;
|
||||
padding-right: 40px;
|
||||
border: none;
|
||||
border-radius: 4px;
|
||||
background-color: #6c757d; /* Gray background */
|
||||
color: white; /* White text */
|
||||
cursor: pointer;
|
||||
appearance: none; /* Removes default browser styling for dropdowns */
|
||||
background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24' fill='white'%3E%3Cpath d='M7 10l5 5 5-5z'/%3E%3C/svg%3E"); /* Custom arrow */
|
||||
background-repeat: no-repeat;
|
||||
background-position: right 8px center; /* Position the arrow */
|
||||
}
|
||||
|
||||
#bot-selector:focus,
|
||||
#device-selector:focus {
|
||||
outline: none;
|
||||
box-shadow: 0 0 4px rgba(0, 0, 0, 0.3); /* Add a subtle focus effect */
|
||||
}
|
||||
|
||||
.text-input-container {
|
||||
display: flex;
|
||||
gap: 8px;
|
||||
margin-left: 10px;
|
||||
width: 100%;
|
||||
flex: 1;
|
||||
}
|
||||
|
||||
#text-input {
|
||||
flex: 1;
|
||||
padding: 8px 16px;
|
||||
border: 1px solid #e0e0e0;
|
||||
border-radius: 4px;
|
||||
min-width: 200px;
|
||||
}
|
||||
|
||||
#send-text-btn {
|
||||
padding: 8px 16px;
|
||||
border: none;
|
||||
border-radius: 4px;
|
||||
background-color: #007bff;
|
||||
color: white;
|
||||
flex-shrink: 0;
|
||||
}
|
||||
|
||||
#send-text-btn:hover {
|
||||
background-color: #0056b3;
|
||||
}
|
||||
|
||||
#connect-btn {
|
||||
background-color: #4caf50;
|
||||
color: white;
|
||||
@@ -38,6 +106,9 @@ body {
|
||||
color: white;
|
||||
}
|
||||
|
||||
#mic-toggle-btn {
|
||||
}
|
||||
|
||||
button:disabled {
|
||||
opacity: 0.5;
|
||||
cursor: not-allowed;
|
||||
|
||||
1394
examples/simple-chatbot/client/react/package-lock.json
generated
1394
examples/simple-chatbot/client/react/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -10,9 +10,9 @@
|
||||
"preview": "vite preview"
|
||||
},
|
||||
"dependencies": {
|
||||
"@pipecat-ai/client-js": "^0.3.5",
|
||||
"@pipecat-ai/client-react": "^0.3.5",
|
||||
"@pipecat-ai/daily-transport": "^0.3.8",
|
||||
"@pipecat-ai/client-js": "^1.0.0",
|
||||
"@pipecat-ai/client-react": "^1.0.0",
|
||||
"@pipecat-ai/daily-transport": "^1.0.0",
|
||||
"react": "^18.3.1",
|
||||
"react-dom": "^18.3.1"
|
||||
},
|
||||
|
||||
@@ -1,22 +1,22 @@
|
||||
import {
|
||||
RTVIClientAudio,
|
||||
RTVIClientVideo,
|
||||
useRTVIClientTransportState,
|
||||
PipecatClientAudio,
|
||||
PipecatClientVideo,
|
||||
usePipecatClientTransportState,
|
||||
} from '@pipecat-ai/client-react';
|
||||
import { RTVIProvider } from './providers/RTVIProvider';
|
||||
import { PipecatProvider } from './providers/PipecatProvider';
|
||||
import { ConnectButton } from './components/ConnectButton';
|
||||
import { StatusDisplay } from './components/StatusDisplay';
|
||||
import { DebugDisplay } from './components/DebugDisplay';
|
||||
import './App.css';
|
||||
|
||||
function BotVideo() {
|
||||
const transportState = useRTVIClientTransportState();
|
||||
const transportState = usePipecatClientTransportState();
|
||||
const isConnected = transportState !== 'disconnected';
|
||||
|
||||
return (
|
||||
<div className="bot-container">
|
||||
<div className="video-container">
|
||||
{isConnected && <RTVIClientVideo participant="bot" fit="cover" />}
|
||||
{isConnected && <PipecatClientVideo participant="bot" fit="cover" />}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
@@ -35,16 +35,16 @@ function AppContent() {
|
||||
</div>
|
||||
|
||||
<DebugDisplay />
|
||||
<RTVIClientAudio />
|
||||
<PipecatClientAudio />
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function App() {
|
||||
return (
|
||||
<RTVIProvider>
|
||||
<PipecatProvider>
|
||||
<AppContent />
|
||||
</RTVIProvider>
|
||||
</PipecatProvider>
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -1,16 +1,16 @@
|
||||
import {
|
||||
useRTVIClient,
|
||||
useRTVIClientTransportState,
|
||||
usePipecatClient,
|
||||
usePipecatClientTransportState,
|
||||
} from '@pipecat-ai/client-react';
|
||||
|
||||
export function ConnectButton() {
|
||||
const client = useRTVIClient();
|
||||
const transportState = useRTVIClientTransportState();
|
||||
const client = usePipecatClient();
|
||||
const transportState = usePipecatClientTransportState();
|
||||
const isConnected = ['connected', 'ready'].includes(transportState);
|
||||
|
||||
const handleClick = async () => {
|
||||
if (!client) {
|
||||
console.error('RTVI client is not initialized');
|
||||
console.error('Pipecat client is not initialized');
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -18,7 +18,7 @@ export function ConnectButton() {
|
||||
if (isConnected) {
|
||||
await client.disconnect();
|
||||
} else {
|
||||
await client.connect();
|
||||
await client.connect({ endpoint: 'http://localhost:7860/connect' });
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Connection error:', error);
|
||||
|
||||
@@ -6,12 +6,12 @@ import {
|
||||
TranscriptData,
|
||||
BotLLMTextData,
|
||||
} from '@pipecat-ai/client-js';
|
||||
import { useRTVIClient, useRTVIClientEvent } from '@pipecat-ai/client-react';
|
||||
import { usePipecatClient, useRTVIClientEvent } from '@pipecat-ai/client-react';
|
||||
import './DebugDisplay.css';
|
||||
|
||||
export function DebugDisplay() {
|
||||
const debugLogRef = useRef<HTMLDivElement>(null);
|
||||
const client = useRTVIClient();
|
||||
const client = usePipecatClient();
|
||||
|
||||
const log = useCallback((message: string) => {
|
||||
if (!debugLogRef.current) return;
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { useRTVIClientTransportState } from '@pipecat-ai/client-react';
|
||||
import { usePipecatClientTransportState } from '@pipecat-ai/client-react';
|
||||
|
||||
export function StatusDisplay() {
|
||||
const transportState = useRTVIClientTransportState();
|
||||
const transportState = usePipecatClientTransportState();
|
||||
|
||||
return (
|
||||
<div className="status">
|
||||
|
||||
@@ -0,0 +1,16 @@
|
||||
import { type PropsWithChildren } from 'react';
|
||||
import { PipecatClient } from '@pipecat-ai/client-js';
|
||||
import { DailyTransport } from '@pipecat-ai/daily-transport';
|
||||
import { PipecatClientProvider } from '@pipecat-ai/client-react';
|
||||
|
||||
const client = new PipecatClient({
|
||||
transport: new DailyTransport(),
|
||||
enableMic: true,
|
||||
enableCam: false,
|
||||
});
|
||||
|
||||
export function PipecatProvider({ children }: PropsWithChildren) {
|
||||
return (
|
||||
<PipecatClientProvider client={client}>{children}</PipecatClientProvider>
|
||||
);
|
||||
}
|
||||
@@ -1,22 +0,0 @@
|
||||
import { type PropsWithChildren } from 'react';
|
||||
import { RTVIClient } from '@pipecat-ai/client-js';
|
||||
import { DailyTransport } from '@pipecat-ai/daily-transport';
|
||||
import { RTVIClientProvider } from '@pipecat-ai/client-react';
|
||||
|
||||
const transport = new DailyTransport();
|
||||
|
||||
const client = new RTVIClient({
|
||||
transport,
|
||||
params: {
|
||||
baseUrl: 'http://localhost:7860',
|
||||
endpoints: {
|
||||
connect: '/connect',
|
||||
},
|
||||
},
|
||||
enableMic: true,
|
||||
enableCam: false,
|
||||
});
|
||||
|
||||
export function RTVIProvider({ children }: PropsWithChildren) {
|
||||
return <RTVIClientProvider client={client}>{children}</RTVIClientProvider>;
|
||||
}
|
||||
104
examples/storytelling-chatbot/client/package-lock.json
generated
104
examples/storytelling-chatbot/client/package-lock.json
generated
@@ -345,9 +345,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/env": {
|
||||
"version": "14.2.28",
|
||||
"resolved": "https://registry.npmjs.org/@next/env/-/env-14.2.28.tgz",
|
||||
"integrity": "sha512-PAmWhJfJQlP+kxZwCjrVd9QnR5x0R3u0mTXTiZDgSd4h5LdXmjxCCWbN9kq6hkZBOax8Rm3xDW5HagWyJuT37g=="
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/env/-/env-14.2.30.tgz",
|
||||
"integrity": "sha512-KBiBKrDY6kxTQWGzKjQB7QirL3PiiOkV7KW98leHFjtVRKtft76Ra5qSA/SL75xT44dp6hOcqiiJ6iievLOYug=="
|
||||
},
|
||||
"node_modules/@next/eslint-plugin-next": {
|
||||
"version": "14.1.4",
|
||||
@@ -359,9 +359,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-darwin-arm64": {
|
||||
"version": "14.2.28",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-14.2.28.tgz",
|
||||
"integrity": "sha512-kzGChl9setxYWpk3H6fTZXXPFFjg7urptLq5o5ZgYezCrqlemKttwMT5iFyx/p1e/JeglTwDFRtb923gTJ3R1w==",
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-14.2.30.tgz",
|
||||
"integrity": "sha512-EAqfOTb3bTGh9+ewpO/jC59uACadRHM6TSA9DdxJB/6gxOpyV+zrbqeXiFTDy9uV6bmipFDkfpAskeaDcO+7/g==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -374,9 +374,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-darwin-x64": {
|
||||
"version": "14.2.28",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-14.2.28.tgz",
|
||||
"integrity": "sha512-z6FXYHDJlFOzVEOiiJ/4NG8aLCeayZdcRSMjPDysW297Up6r22xw6Ea9AOwQqbNsth8JNgIK8EkWz2IDwaLQcw==",
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-14.2.30.tgz",
|
||||
"integrity": "sha512-TyO7Wz1IKE2kGv8dwQ0bmPL3s44EKVencOqwIY69myoS3rdpO1NPg5xPM5ymKu7nfX4oYJrpMxv8G9iqLsnL4A==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -389,9 +389,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-arm64-gnu": {
|
||||
"version": "14.2.28",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-14.2.28.tgz",
|
||||
"integrity": "sha512-9ARHLEQXhAilNJ7rgQX8xs9aH3yJSj888ssSjJLeldiZKR4D7N08MfMqljk77fAwZsWwsrp8ohHsMvurvv9liQ==",
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-14.2.30.tgz",
|
||||
"integrity": "sha512-I5lg1fgPJ7I5dk6mr3qCH1hJYKJu1FsfKSiTKoYwcuUf53HWTrEkwmMI0t5ojFKeA6Vu+SfT2zVy5NS0QLXV4Q==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -404,9 +404,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-arm64-musl": {
|
||||
"version": "14.2.28",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-14.2.28.tgz",
|
||||
"integrity": "sha512-p6gvatI1nX41KCizEe6JkF0FS/cEEF0u23vKDpl+WhPe/fCTBeGkEBh7iW2cUM0rvquPVwPWdiUR6Ebr/kQWxQ==",
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-14.2.30.tgz",
|
||||
"integrity": "sha512-8GkNA+sLclQyxgzCDs2/2GSwBc92QLMrmYAmoP2xehe5MUKBLB2cgo34Yu242L1siSkwQkiV4YLdCnjwc/Micw==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -419,9 +419,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-x64-gnu": {
|
||||
"version": "14.2.28",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-14.2.28.tgz",
|
||||
"integrity": "sha512-nsiSnz2wO6GwMAX2o0iucONlVL7dNgKUqt/mDTATGO2NY59EO/ZKnKEr80BJFhuA5UC1KZOMblJHWZoqIJddpA==",
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-14.2.30.tgz",
|
||||
"integrity": "sha512-8Ly7okjssLuBoe8qaRCcjGtcMsv79hwzn/63wNeIkzJVFVX06h5S737XNr7DZwlsbTBDOyI6qbL2BJB5n6TV/w==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -434,9 +434,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-x64-musl": {
|
||||
"version": "14.2.28",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-14.2.28.tgz",
|
||||
"integrity": "sha512-+IuGQKoI3abrXFqx7GtlvNOpeExUH1mTIqCrh1LGFf8DnlUcTmOOCApEnPJUSLrSbzOdsF2ho2KhnQoO0I1RDw==",
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-14.2.30.tgz",
|
||||
"integrity": "sha512-dBmV1lLNeX4mR7uI7KNVHsGQU+OgTG5RGFPi3tBJpsKPvOPtg9poyav/BYWrB3GPQL4dW5YGGgalwZ79WukbKQ==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -449,9 +449,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-win32-arm64-msvc": {
|
||||
"version": "14.2.28",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-14.2.28.tgz",
|
||||
"integrity": "sha512-l61WZ3nevt4BAnGksUVFKy2uJP5DPz2E0Ma/Oklvo3sGj9sw3q7vBWONFRgz+ICiHpW5mV+mBrkB3XEubMrKaA==",
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-14.2.30.tgz",
|
||||
"integrity": "sha512-6MMHi2Qc1Gkq+4YLXAgbYslE1f9zMGBikKMdmQRHXjkGPot1JY3n5/Qrbg40Uvbi8//wYnydPnyvNhI1DMUW1g==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -464,9 +464,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-win32-ia32-msvc": {
|
||||
"version": "14.2.28",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-ia32-msvc/-/swc-win32-ia32-msvc-14.2.28.tgz",
|
||||
"integrity": "sha512-+Kcp1T3jHZnJ9v9VTJ/yf1t/xmtFAc/Sge4v7mVc1z+NYfYzisi8kJ9AsY8itbgq+WgEwMtOpiLLJsUy2qnXZw==",
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-ia32-msvc/-/swc-win32-ia32-msvc-14.2.30.tgz",
|
||||
"integrity": "sha512-pVZMnFok5qEX4RT59mK2hEVtJX+XFfak+/rjHpyFh7juiT52r177bfFKhnlafm0UOSldhXjj32b+LZIOdswGTg==",
|
||||
"cpu": [
|
||||
"ia32"
|
||||
],
|
||||
@@ -479,9 +479,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-win32-x64-msvc": {
|
||||
"version": "14.2.28",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-14.2.28.tgz",
|
||||
"integrity": "sha512-1gCmpvyhz7DkB1srRItJTnmR2UwQPAUXXIg9r0/56g3O8etGmwlX68skKXJOp9EejW3hhv7nSQUJ2raFiz4MoA==",
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-14.2.30.tgz",
|
||||
"integrity": "sha512-4KCo8hMZXMjpTzs3HOqOGYYwAXymXIy7PEPAXNEcEOyKqkjiDlECumrWziy+JEF0Oi4ILHGxzgQ3YiMGG2t/Lg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -1317,9 +1317,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@typescript-eslint/typescript-estree/node_modules/brace-expansion": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
|
||||
"integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
|
||||
"version": "2.0.2",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz",
|
||||
"integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0"
|
||||
@@ -1960,9 +1960,9 @@
|
||||
"integrity": "sha512-AlcaJBi/pqqJBIQ8U9Mcpc9i8Aqxn88Skv5d+xBX006BY5u8N3mGLHa5Lgppa7L/HfwgwLgZ6NYs+Ag6uUmJRA=="
|
||||
},
|
||||
"node_modules/brace-expansion": {
|
||||
"version": "1.1.11",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
|
||||
"integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
|
||||
"version": "1.1.12",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
|
||||
"integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0",
|
||||
@@ -3391,9 +3391,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/glob/node_modules/brace-expansion": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
|
||||
"integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
|
||||
"version": "2.0.2",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz",
|
||||
"integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==",
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0"
|
||||
}
|
||||
@@ -4389,11 +4389,11 @@
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/next": {
|
||||
"version": "14.2.28",
|
||||
"resolved": "https://registry.npmjs.org/next/-/next-14.2.28.tgz",
|
||||
"integrity": "sha512-QLEIP/kYXynIxtcKB6vNjtWLVs3Y4Sb+EClTC/CSVzdLD1gIuItccpu/n1lhmduffI32iPGEK2cLLxxt28qgYA==",
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/next/-/next-14.2.30.tgz",
|
||||
"integrity": "sha512-+COdu6HQrHHFQ1S/8BBsCag61jZacmvbuL2avHvQFbWa2Ox7bE+d8FyNgxRLjXQ5wtPyQwEmk85js/AuaG2Sbg==",
|
||||
"dependencies": {
|
||||
"@next/env": "14.2.28",
|
||||
"@next/env": "14.2.30",
|
||||
"@swc/helpers": "0.5.5",
|
||||
"busboy": "1.6.0",
|
||||
"caniuse-lite": "^1.0.30001579",
|
||||
@@ -4408,15 +4408,15 @@
|
||||
"node": ">=18.17.0"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@next/swc-darwin-arm64": "14.2.28",
|
||||
"@next/swc-darwin-x64": "14.2.28",
|
||||
"@next/swc-linux-arm64-gnu": "14.2.28",
|
||||
"@next/swc-linux-arm64-musl": "14.2.28",
|
||||
"@next/swc-linux-x64-gnu": "14.2.28",
|
||||
"@next/swc-linux-x64-musl": "14.2.28",
|
||||
"@next/swc-win32-arm64-msvc": "14.2.28",
|
||||
"@next/swc-win32-ia32-msvc": "14.2.28",
|
||||
"@next/swc-win32-x64-msvc": "14.2.28"
|
||||
"@next/swc-darwin-arm64": "14.2.30",
|
||||
"@next/swc-darwin-x64": "14.2.30",
|
||||
"@next/swc-linux-arm64-gnu": "14.2.30",
|
||||
"@next/swc-linux-arm64-musl": "14.2.30",
|
||||
"@next/swc-linux-x64-gnu": "14.2.30",
|
||||
"@next/swc-linux-x64-musl": "14.2.30",
|
||||
"@next/swc-win32-arm64-msvc": "14.2.30",
|
||||
"@next/swc-win32-ia32-msvc": "14.2.30",
|
||||
"@next/swc-win32-x64-msvc": "14.2.30"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@opentelemetry/api": "^1.1.0",
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -19,7 +19,7 @@
|
||||
"vite": "^6.0.2"
|
||||
},
|
||||
"dependencies": {
|
||||
"@pipecat-ai/client-js": "^0.4.0",
|
||||
"@pipecat-ai/websocket-transport": "^0.4.2"
|
||||
"@pipecat-ai/client-js": "^1.0.0",
|
||||
"@pipecat-ai/websocket-transport": "^1.0.0"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,21 +5,22 @@
|
||||
*/
|
||||
|
||||
import {
|
||||
RTVIClient,
|
||||
RTVIClientOptions,
|
||||
RTVIEvent,
|
||||
BotLLMTextData,
|
||||
Participant,
|
||||
PipecatClient,
|
||||
PipecatClientOptions,
|
||||
RTVIEvent, RTVIMessage, TranscriptData,
|
||||
} from '@pipecat-ai/client-js';
|
||||
import {
|
||||
WebSocketTransport,
|
||||
TwilioSerializer,
|
||||
} from "@pipecat-ai/websocket-transport";
|
||||
} from '@pipecat-ai/websocket-transport';
|
||||
|
||||
class WebsocketClientApp {
|
||||
private static STREAM_SID = 'ws_mock_stream_sid';
|
||||
private static CALL_SID = 'ws_mock_call_sid';
|
||||
|
||||
private static STREAM_SID = "ws_mock_stream_sid"
|
||||
private static CALL_SID = "ws_mock_call_sid"
|
||||
|
||||
private rtviClient: RTVIClient | null = null;
|
||||
private rtviClient: PipecatClient | null = null;
|
||||
private connectBtn: HTMLButtonElement | null = null;
|
||||
private disconnectBtn: HTMLButtonElement | null = null;
|
||||
private statusSpan: HTMLElement | null = null;
|
||||
@@ -38,8 +39,12 @@ class WebsocketClientApp {
|
||||
* Set up references to DOM elements and create necessary media elements
|
||||
*/
|
||||
private setupDOMElements(): void {
|
||||
this.connectBtn = document.getElementById('connect-btn') as HTMLButtonElement;
|
||||
this.disconnectBtn = document.getElementById('disconnect-btn') as HTMLButtonElement;
|
||||
this.connectBtn = document.getElementById(
|
||||
'connect-btn'
|
||||
) as HTMLButtonElement;
|
||||
this.disconnectBtn = document.getElementById(
|
||||
'disconnect-btn'
|
||||
) as HTMLButtonElement;
|
||||
this.statusSpan = document.getElementById('connection-status');
|
||||
this.debugLog = document.getElementById('debug-log');
|
||||
}
|
||||
@@ -80,13 +85,23 @@ class WebsocketClientApp {
|
||||
}
|
||||
|
||||
private async emulateTwilioMessages() {
|
||||
const connectedMessage={"event": "connected", "protocol": "Call", "version": "1.0.0"}
|
||||
const connectedMessage = {
|
||||
event: 'connected',
|
||||
protocol: 'Call',
|
||||
version: '1.0.0',
|
||||
};
|
||||
|
||||
const websocketTransport = this.rtviClient?.transport as WebSocketTransport
|
||||
void websocketTransport?.sendRawMessage(connectedMessage)
|
||||
const websocketTransport = this.rtviClient?.transport as WebSocketTransport;
|
||||
void websocketTransport?.sendRawMessage(connectedMessage);
|
||||
|
||||
const startMessage={"event": "start", "start": {"streamSid": WebsocketClientApp.STREAM_SID, "callSid": WebsocketClientApp.CALL_SID}}
|
||||
void websocketTransport?.sendRawMessage(startMessage)
|
||||
const startMessage = {
|
||||
event: 'start',
|
||||
start: {
|
||||
streamSid: WebsocketClientApp.STREAM_SID,
|
||||
callSid: WebsocketClientApp.CALL_SID,
|
||||
},
|
||||
};
|
||||
void websocketTransport?.sendRawMessage(startMessage);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -109,7 +124,7 @@ class WebsocketClientApp {
|
||||
if (!this.rtviClient) return;
|
||||
|
||||
// Listen for new tracks starting
|
||||
this.rtviClient.on(RTVIEvent.TrackStarted, (track, participant) => {
|
||||
this.rtviClient.on(RTVIEvent.TrackStarted, (track: MediaStreamTrack, participant?: Participant) => {
|
||||
// Only handle non-local (bot) tracks
|
||||
if (!participant?.local && track.kind === 'audio') {
|
||||
this.setupAudioTrack(track);
|
||||
@@ -117,8 +132,10 @@ class WebsocketClientApp {
|
||||
});
|
||||
|
||||
// Listen for tracks stopping
|
||||
this.rtviClient.on(RTVIEvent.TrackStopped, (track, participant) => {
|
||||
this.log(`Track stopped: ${track.kind} from ${participant?.name || 'unknown'}`);
|
||||
this.rtviClient.on(RTVIEvent.TrackStopped, (track: MediaStreamTrack, participant?: Participant) => {
|
||||
this.log(
|
||||
`Track stopped: ${track.kind} from ${participant?.name || 'unknown'}`
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -128,7 +145,10 @@ class WebsocketClientApp {
|
||||
*/
|
||||
private setupAudioTrack(track: MediaStreamTrack): void {
|
||||
this.log('Setting up audio track');
|
||||
if (this.botAudio.srcObject && "getAudioTracks" in this.botAudio.srcObject) {
|
||||
if (
|
||||
this.botAudio.srcObject &&
|
||||
'getAudioTracks' in this.botAudio.srcObject
|
||||
) {
|
||||
const oldTrack = this.botAudio.srcObject.getAudioTracks()[0];
|
||||
if (oldTrack?.id === track.id) return;
|
||||
}
|
||||
@@ -143,23 +163,19 @@ class WebsocketClientApp {
|
||||
try {
|
||||
const startTime = Date.now();
|
||||
|
||||
const transport = new WebSocketTransport({
|
||||
const ws_opts = {
|
||||
serializer: new TwilioSerializer(),
|
||||
recorderSampleRate: 8000,
|
||||
playerSampleRate: 8000
|
||||
});
|
||||
const RTVIConfig: RTVIClientOptions = {
|
||||
transport,
|
||||
params: {
|
||||
// The baseURL and endpoint of your bot server that the client will connect to
|
||||
baseUrl: 'http://localhost:8765',
|
||||
endpoints: { connect: '/' },
|
||||
},
|
||||
playerSampleRate: 8000,
|
||||
ws_url: 'http://localhost:8765/ws',
|
||||
};
|
||||
const RTVIConfig: PipecatClientOptions = {
|
||||
transport: new WebSocketTransport(ws_opts),
|
||||
enableMic: true,
|
||||
enableCam: false,
|
||||
callbacks: {
|
||||
onConnected: () => {
|
||||
this.emulateTwilioMessages()
|
||||
this.emulateTwilioMessages();
|
||||
this.updateStatus('Connected');
|
||||
if (this.connectBtn) this.connectBtn.disabled = true;
|
||||
if (this.disconnectBtn) this.disconnectBtn.disabled = false;
|
||||
@@ -170,27 +186,21 @@ class WebsocketClientApp {
|
||||
if (this.disconnectBtn) this.disconnectBtn.disabled = true;
|
||||
this.log('Client disconnected');
|
||||
},
|
||||
onBotReady: (data) => {
|
||||
onBotReady: (data: any) => {
|
||||
this.log(`Bot ready: ${JSON.stringify(data)}`);
|
||||
this.setupMediaTracks();
|
||||
},
|
||||
onUserTranscript: (data) => {
|
||||
onUserTranscript: (data: TranscriptData) => {
|
||||
if (data.final) {
|
||||
this.log(`User: ${data.text}`);
|
||||
}
|
||||
},
|
||||
onBotTranscript: (data) => this.log(`Bot: ${data.text}`),
|
||||
onMessageError: (error) => console.error('Message error:', error),
|
||||
onError: (error) => console.error('Error:', error),
|
||||
onBotTranscript: (data: BotLLMTextData) => this.log(`Bot: ${data.text}`),
|
||||
onMessageError: (error: RTVIMessage) => console.error('Message error:', error),
|
||||
onError: (error: RTVIMessage) => console.error('Error:', error),
|
||||
},
|
||||
}
|
||||
// @ts-ignore
|
||||
RTVIConfig.customConnectHandler = () => Promise.resolve(
|
||||
{
|
||||
ws_url: "/ws",
|
||||
}
|
||||
);
|
||||
this.rtviClient = new RTVIClient(RTVIConfig);
|
||||
};
|
||||
this.rtviClient = new PipecatClient(RTVIConfig);
|
||||
this.setupTrackListeners();
|
||||
|
||||
this.log('Initializing devices...');
|
||||
@@ -223,8 +233,13 @@ class WebsocketClientApp {
|
||||
try {
|
||||
await this.rtviClient.disconnect();
|
||||
this.rtviClient = null;
|
||||
if (this.botAudio.srcObject && "getAudioTracks" in this.botAudio.srcObject) {
|
||||
this.botAudio.srcObject.getAudioTracks().forEach((track) => track.stop());
|
||||
if (
|
||||
this.botAudio.srcObject &&
|
||||
'getAudioTracks' in this.botAudio.srcObject
|
||||
) {
|
||||
this.botAudio.srcObject
|
||||
.getAudioTracks()
|
||||
.forEach((track) => track.stop());
|
||||
this.botAudio.srcObject = null;
|
||||
}
|
||||
} catch (error) {
|
||||
@@ -232,7 +247,6 @@ class WebsocketClientApp {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
declare global {
|
||||
|
||||
@@ -6,10 +6,10 @@ Basic implementation using the [Pipecat JavaScript SDK](https://docs.pipecat.ai/
|
||||
|
||||
1. Run the bot server. See the [server README](../README).
|
||||
|
||||
2. Navigate to the `client/javascript` directory:
|
||||
2. Navigate to the `client` directory:
|
||||
|
||||
```bash
|
||||
cd client/javascript
|
||||
cd client
|
||||
```
|
||||
|
||||
3. Install dependencies:
|
||||
|
||||
563
examples/websocket/client/package-lock.json
generated
563
examples/websocket/client/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user