Compare commits
1 Commits
hush/usage
...
mb/fix-qui
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6531858970 |
22
.github/workflows/publish.yaml
vendored
22
.github/workflows/publish.yaml
vendored
@@ -5,25 +5,25 @@ on:
|
|||||||
inputs:
|
inputs:
|
||||||
gitref:
|
gitref:
|
||||||
type: string
|
type: string
|
||||||
description: 'what git tag to build (e.g. v0.0.74)'
|
description: "what git tag to build (e.g. v0.0.74)"
|
||||||
required: true
|
required: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build:
|
build:
|
||||||
name: 'Build and upload wheels'
|
name: "Build and upload wheels"
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repo
|
- name: Checkout repo
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
ref: ${{ github.event.inputs.gitref }}
|
ref: ${{ github.event.inputs.gitref }}
|
||||||
|
|
||||||
- name: Install uv
|
- name: Install uv
|
||||||
uses: astral-sh/setup-uv@v3
|
uses: astral-sh/setup-uv@v3
|
||||||
with:
|
with:
|
||||||
version: 'latest'
|
version: "latest"
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
run: uv python install 3.12
|
run: uv python install 3.10
|
||||||
- name: Install development dependencies
|
- name: Install development dependencies
|
||||||
run: uv sync --group dev
|
run: uv sync --group dev
|
||||||
- name: Build project
|
- name: Build project
|
||||||
@@ -35,9 +35,9 @@ jobs:
|
|||||||
path: ./dist
|
path: ./dist
|
||||||
|
|
||||||
publish-to-pypi:
|
publish-to-pypi:
|
||||||
name: 'Publish to PyPI'
|
name: "Publish to PyPI"
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
needs: [build]
|
needs: [ build ]
|
||||||
environment:
|
environment:
|
||||||
name: pypi
|
name: pypi
|
||||||
url: https://pypi.org/p/pipecat-ai
|
url: https://pypi.org/p/pipecat-ai
|
||||||
@@ -56,12 +56,12 @@ jobs:
|
|||||||
print-hash: true
|
print-hash: true
|
||||||
|
|
||||||
publish-to-test-pypi:
|
publish-to-test-pypi:
|
||||||
name: 'Publish to Test PyPI'
|
name: "Publish to Test PyPI"
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
needs: [build]
|
needs: [ build ]
|
||||||
environment:
|
environment:
|
||||||
name: testpypi
|
name: testpypi
|
||||||
url: https://test.pypi.org/p/pipecat-ai
|
url: https://pypi.org/p/pipecat-ai
|
||||||
permissions:
|
permissions:
|
||||||
id-token: write
|
id-token: write
|
||||||
steps:
|
steps:
|
||||||
@@ -70,7 +70,7 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
name: wheels
|
name: wheels
|
||||||
path: ./dist
|
path: ./dist
|
||||||
- name: Publish to Test PyPI
|
- name: Publish to PyPI
|
||||||
uses: pypa/gh-action-pypi-publish@release/v1
|
uses: pypa/gh-action-pypi-publish@release/v1
|
||||||
with:
|
with:
|
||||||
verbose: true
|
verbose: true
|
||||||
|
|||||||
12
.github/workflows/publish_test.yaml
vendored
12
.github/workflows/publish_test.yaml
vendored
@@ -4,7 +4,7 @@ on: workflow_dispatch
|
|||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build:
|
build:
|
||||||
name: 'Build and upload wheels'
|
name: "Build and upload wheels"
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repo
|
- name: Checkout repo
|
||||||
@@ -15,9 +15,9 @@ jobs:
|
|||||||
- name: Install uv
|
- name: Install uv
|
||||||
uses: astral-sh/setup-uv@v3
|
uses: astral-sh/setup-uv@v3
|
||||||
with:
|
with:
|
||||||
version: 'latest'
|
version: "latest"
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
run: uv python install 3.12
|
run: uv python install 3.10
|
||||||
- name: Install development dependencies
|
- name: Install development dependencies
|
||||||
run: uv sync --group dev
|
run: uv sync --group dev
|
||||||
- name: Build project
|
- name: Build project
|
||||||
@@ -29,12 +29,12 @@ jobs:
|
|||||||
path: ./dist
|
path: ./dist
|
||||||
|
|
||||||
publish-to-test-pypi:
|
publish-to-test-pypi:
|
||||||
name: 'Publish to Test PyPI'
|
name: "Publish to Test PyPI"
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
needs: [build]
|
needs: [build]
|
||||||
environment:
|
environment:
|
||||||
name: testpypi
|
name: testpypi
|
||||||
url: https://test.pypi.org/p/pipecat-ai
|
url: https://pypi.org/p/pipecat-ai
|
||||||
permissions:
|
permissions:
|
||||||
id-token: write
|
id-token: write
|
||||||
steps:
|
steps:
|
||||||
@@ -43,7 +43,7 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
name: wheels
|
name: wheels
|
||||||
path: ./dist
|
path: ./dist
|
||||||
- name: Publish to Test PyPI
|
- name: Publish to PyPI
|
||||||
uses: pypa/gh-action-pypi-publish@release/v1
|
uses: pypa/gh-action-pypi-publish@release/v1
|
||||||
with:
|
with:
|
||||||
verbose: true
|
verbose: true
|
||||||
|
|||||||
568
CHANGELOG.md
568
CHANGELOG.md
@@ -7,507 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||||||
|
|
||||||
## [Unreleased]
|
## [Unreleased]
|
||||||
|
|
||||||
### Added
|
## Added
|
||||||
|
|
||||||
- The runner `--folder` argument now supports downloading files from
|
|
||||||
subdirectories.
|
|
||||||
|
|
||||||
### Fixed
|
|
||||||
|
|
||||||
- Fixed an issue where `RimeHttpTTSService` and `PiperTTSService` could generate
|
|
||||||
incorrectly 16-bit aligned audio frames, potentially leading to internal
|
|
||||||
errors or static audio.
|
|
||||||
|
|
||||||
## [0.0.90] - 2025-10-10
|
|
||||||
|
|
||||||
### Added
|
|
||||||
|
|
||||||
- Added audio filter `KrispVivaFilter` using the Krisp VIVA SDK.
|
|
||||||
|
|
||||||
- Added `--folder` argument to the runner, allowing files saved in that folder
|
|
||||||
to be downloaded from `http://HOST:PORT/file/FILE`.
|
|
||||||
|
|
||||||
- Added `GeminiLiveVertexLLMService`, for accessing Gemini Live via Google
|
|
||||||
Vertex AI.
|
|
||||||
|
|
||||||
- Added some new configuration options to `GeminiLiveLLMService`:
|
|
||||||
|
|
||||||
- `thinking`
|
|
||||||
- `enable_affective_dialog`
|
|
||||||
- `proactivity`
|
|
||||||
|
|
||||||
Note that these new configuration options require using a newer model than
|
|
||||||
the default, like "gemini-2.5-flash-native-audio-preview-09-2025". The last
|
|
||||||
two require specifying `http_options=HttpOptions(api_version="v1alpha")`.
|
|
||||||
|
|
||||||
- Added `on_pipeline_error` event to `PipelineTask`. This event will get fired
|
|
||||||
when an `ErrorFrame` is pushed (use `FrameProcessor.push_error()`).
|
|
||||||
|
|
||||||
```python
|
|
||||||
@task.event_handler("on_pipeline_error")
|
|
||||||
async def on_pipeline_error(task: PipelineTask, frame: ErrorFrame):
|
|
||||||
...
|
|
||||||
```
|
|
||||||
|
|
||||||
- Added a `service_tier` `InputParam` to the `BaseOpenAILLMService`. This
|
|
||||||
parameter can influence the latency of the response. For example `"priority"`
|
|
||||||
will result in faster completions, but in exchange for a higher price.
|
|
||||||
|
|
||||||
### Changed
|
|
||||||
|
|
||||||
- Updated `GeminiLiveLLMService` to use the `google-genai` library rather than
|
|
||||||
use WebSockets directly.
|
|
||||||
|
|
||||||
### Deprecated
|
|
||||||
|
|
||||||
- `LivekitFrameSerializer` is now deprecated. Use `LiveKitTransport` instead.
|
|
||||||
|
|
||||||
- `pipecat.service.openai_realtime` is now deprecated, use
|
|
||||||
`pipecat.services.openai.realtime` instead or
|
|
||||||
`pipecat.services.azure.realtime` for Azure Realtime.
|
|
||||||
|
|
||||||
- `pipecat.service.aws_nova_sonic` is now deprecated, use
|
|
||||||
`pipecat.services.aws.nova_sonic` instead.
|
|
||||||
|
|
||||||
- `GeminiMultimodalLiveLLMService` is now deprecated, use
|
|
||||||
`GeminiLiveLLMService`.
|
|
||||||
|
|
||||||
### Fixed
|
|
||||||
|
|
||||||
- Fixed a `GoogleVertexLLMService` issue that would generate an error if no
|
|
||||||
token information was returned.
|
|
||||||
|
|
||||||
- `GeminiLiveLLMService` will now end gracefully (i.e. after the bot has
|
|
||||||
finished) upon receiving an `EndFrame`.
|
|
||||||
|
|
||||||
- `GeminiLiveLLMService` will try to seamlessly reconnect when it loses its
|
|
||||||
connection.
|
|
||||||
|
|
||||||
## [0.0.89] - 2025-10-07
|
|
||||||
|
|
||||||
### Fixed
|
|
||||||
|
|
||||||
- Reverted a change introduced in 0.0.88 that was causing pipelines to be frozen
|
|
||||||
when using interruption strategies and processors that block interruption
|
|
||||||
frames (e.g. `STTMuteFilter`).
|
|
||||||
|
|
||||||
## [0.0.88] - 2025-10-07
|
|
||||||
|
|
||||||
### Added
|
|
||||||
|
|
||||||
- Added support for Nano Banana models to `GoogleLLMService`. For example, you
|
|
||||||
can now use the `gemini-2.5-flash-image` model to generate images.
|
|
||||||
|
|
||||||
- Added `HumeTTSService` for text-to-speech synthesis using Hume AI's expressive
|
|
||||||
voice models. Provides high-quality, emotionally expressive speech synthesis
|
|
||||||
with support for various voice models. Includes example in
|
|
||||||
`examples/foundational/07ad-interruptible-hume.py`. Use with:
|
|
||||||
`uv pip install pipecat-ai[hume]`.
|
|
||||||
|
|
||||||
### Changed
|
|
||||||
|
|
||||||
- Updated default `GoogleLLMService` model to `gemini-2.5-flash`.
|
|
||||||
|
|
||||||
### Deprecated
|
|
||||||
|
|
||||||
- PlayHT is shutting down their API on December 31st, 2025. As a result,
|
|
||||||
`PlayHTTTSService` and `PlayHTHttpTTSService` are deprecated and will be
|
|
||||||
removed in a future version.
|
|
||||||
|
|
||||||
### Fixed
|
|
||||||
|
|
||||||
- Fixed an issue with `AWSNovaSonicLLMService` where the client wouldn't
|
|
||||||
connect due to a breaking change in the AWS dependency chain.
|
|
||||||
|
|
||||||
- `PermissionError` is now caught if NLTK's `punkt_tab` can't be downloaded.
|
|
||||||
|
|
||||||
- Fixed an issue that would cause wrong user/assistant context ordering when
|
|
||||||
using interruption strategies.
|
|
||||||
|
|
||||||
- Fixed RTVI incoming message handling, broken in 0.0.87.
|
|
||||||
|
|
||||||
## [0.0.87] - 2025-10-02
|
|
||||||
|
|
||||||
### Added
|
|
||||||
|
|
||||||
- Added `WebsocketSTTService` base class for websocket-based STT services.
|
|
||||||
Combines STT functionality with websocket connectivity, providing automatic
|
|
||||||
error handling and reconnection capabilities with exponential backoff.
|
|
||||||
|
|
||||||
- Added `DeepgramFluxSTTService` for real-time speech recognition using
|
|
||||||
Deepgram's Flux WebSocket API. Flux understands conversational flow and
|
|
||||||
automatically handles turn-taking.
|
|
||||||
|
|
||||||
- Added RTVI messages for user/bot audio levels and system logs.
|
|
||||||
|
|
||||||
- Include OpenAI-based LLM services cached tokens to `MetricsFrame`.
|
|
||||||
|
|
||||||
### Changed
|
|
||||||
|
|
||||||
- Updated the default model for `AnthropicLLMService` to
|
|
||||||
`claude-sonnet-4-5-20250929`.
|
|
||||||
|
|
||||||
### Deprecated
|
|
||||||
|
|
||||||
- `DailyTransportMessageFrame` and `DailyTransportMessageUrgentFrame` are
|
|
||||||
deprecated, use `DailyOutputTransportMessageFrame` and
|
|
||||||
`DailyOutputTransportMessageUrgentFrame` respectively instead.
|
|
||||||
|
|
||||||
- `LiveKitTransportMessageFrame` and `LiveKitTransportMessageUrgentFrame` are
|
|
||||||
deprecated, use `LiveKitOutputTransportMessageFrame` and
|
|
||||||
`LiveKitOutputTransportMessageUrgentFrame` respectively instead.
|
|
||||||
|
|
||||||
- `TransportMessageFrame` and `TransportMessageUrgentFrame` are deprecated, use
|
|
||||||
`OutputTransportMessageFrame` and `OutputTransportMessageUrgentFrame`
|
|
||||||
respectively instead.
|
|
||||||
|
|
||||||
- `InputTransportMessageUrgentFrame` is deprecated, use
|
|
||||||
`InputTransportMessageFrame` instead.
|
|
||||||
|
|
||||||
- `DailyUpdateRemoteParticipantsFrame` is deprecated and will be removed in a
|
|
||||||
future version. Instead, create your own custom frame and handle it in the
|
|
||||||
`@transport.output().event_handler("on_after_push_frame")` event handler or a
|
|
||||||
custom processor.
|
|
||||||
|
|
||||||
## Fixed
|
|
||||||
|
|
||||||
- Fixed an issue in `AWSBedrockLLMService` where timeout exceptions weren't
|
|
||||||
being detected.
|
|
||||||
|
|
||||||
- Fixed a `PipelineTask` issue that could prevent the application to exit if
|
|
||||||
`task.cancel()` was called when the task was already finished.
|
|
||||||
|
|
||||||
- Fixed an issue where local SmartTurn was not being ran in a separate thread.
|
|
||||||
|
|
||||||
## [0.0.86] - 2025-09-24
|
|
||||||
|
|
||||||
### Added
|
|
||||||
|
|
||||||
- Added `HeyGenTransport`. This is an integration for HeyGen Interactive
|
|
||||||
Avatar. A video service that handles audio streaming and requests HeyGen to
|
|
||||||
generate avatar video responses. (see https://www.heygen.com/). When used, the
|
|
||||||
Pipecat bot joins the same virtual room as the HeyGen Avatar and the user.
|
|
||||||
|
|
||||||
- Added support to `TwilioFrameSerializer` for `region` and `edge` settings.
|
|
||||||
|
|
||||||
- Added support for using universal `LLMContext` with:
|
|
||||||
|
|
||||||
- `LLMLogObserver`
|
|
||||||
- `GatedLLMContextAggregator` (formerly `GatedOpenAILLMContextAggregator`)
|
|
||||||
- `LangchainProcessor`
|
|
||||||
- `Mem0MemoryService`
|
|
||||||
|
|
||||||
- Added `StrandsAgentProcessor` which allows you to use the Strands Agents
|
|
||||||
framework to build your voice agents.
|
|
||||||
See https://strandsagents.com
|
|
||||||
|
|
||||||
- Added `ElevenLabsSTTService` for speech-to-text transcription.
|
|
||||||
|
|
||||||
- Added a peer connection monitor to the `SmallWebRTCConnection` that
|
|
||||||
automatically disconnects if the connection fails to establish within
|
|
||||||
the timeout (1 minute by default).
|
|
||||||
|
|
||||||
- Added memory cleanup improvements to reduce memory peaks.
|
|
||||||
|
|
||||||
- Added `on_before_process_frame`, `on_after_process_frame`,
|
|
||||||
`on_before_push_frame` and `on_after_push_frame`. These are synchronous events
|
|
||||||
that get called before and after a frame is processed or pushed. Note that
|
|
||||||
these events are synchrnous so they should ideally perform lightweight tasks
|
|
||||||
in order to not block the pipeline. See
|
|
||||||
`examples/foundational/45-before-and-after-events.py`.
|
|
||||||
|
|
||||||
- Added `on_before_leave` synchronous event to `DailyTransport`.
|
|
||||||
|
|
||||||
- Added `on_before_disconnect` synchronous event to `LiveKitTransport`.
|
|
||||||
|
|
||||||
- It is now possible to register synchronous event handlers. By default, all
|
|
||||||
event handlers are executed in a separate task. However, in some cases we want
|
|
||||||
to guarantee order of execution, for example, executing something before
|
|
||||||
disconnecting a transport.
|
|
||||||
|
|
||||||
```python
|
|
||||||
self._register_event_handler("on_event_name", sync=True)
|
|
||||||
```
|
|
||||||
|
|
||||||
- Added support for global location in `GoogleVertexLLMService`. The service now
|
|
||||||
supports both regional locations (e.g., "us-east4") and the "global" location
|
|
||||||
for Vertex AI endpoints. When using "global" location, the service will use
|
|
||||||
`aiplatform.googleapis.com` as the API host instead of the regional format.
|
|
||||||
|
|
||||||
- Added `on_pipeline_finished` event to `PipelineTask`. This event will get
|
|
||||||
fired when the pipeline is done running. This can be the result of a
|
|
||||||
`StopFrame`, `CancelFrame` or `EndFrame`.
|
|
||||||
|
|
||||||
```python
|
|
||||||
@task.event_handler("on_pipeline_finished")
|
|
||||||
async def on_pipeline_finished(task: PipelineTask, frame: Frame):
|
|
||||||
...
|
|
||||||
```
|
|
||||||
|
|
||||||
- Added support for new RTVI `send-text` event, along with the ability to toggle
|
|
||||||
the audio response off (skip tts) while handling the new context.
|
|
||||||
|
|
||||||
### Changed
|
|
||||||
|
|
||||||
- Updated `aiortc` to 1.13.0.
|
|
||||||
|
|
||||||
- Updated `sentry` to 2.38.0.
|
|
||||||
|
|
||||||
- `BaseOutputTransport` methods `write_audio_frame` and `write_video_frame` now
|
|
||||||
return a boolean to indicate if the transport implementation was able to write
|
|
||||||
the given frame or not.
|
|
||||||
|
|
||||||
- Updated Silero VAD model to v6.
|
|
||||||
|
|
||||||
- Updated `livekit` to 1.0.13.
|
|
||||||
|
|
||||||
- `torch` and `torchaudio` are no longer required for running Smart Turn
|
|
||||||
locally. This avoids gigabytes of dependencies being installed.
|
|
||||||
|
|
||||||
- Updated `websockets` dependency to support version 15.0. Removed deprecated
|
|
||||||
usage of `ConnectionClosed.code` and `ConnectionClosed.reason` attributes in
|
|
||||||
`AWSTranscribeSTTService` for compatibility.
|
|
||||||
|
|
||||||
- Refactored `pyproject.toml` to reduce websockets dependency repetition using
|
|
||||||
self-referencing extras. All websockets-dependent services now reference a
|
|
||||||
shared `websockets-base` extra.
|
|
||||||
|
|
||||||
### Deprecated
|
|
||||||
|
|
||||||
- `GladiaSTTService`'s `confidence` arg is deprecated. `confidence` is no
|
|
||||||
longer needed to determine which transcription or translation frames to
|
|
||||||
emit.
|
|
||||||
|
|
||||||
- `PipelineTask` events `on_pipeline_stopped`, `on_pipeline_ended` and
|
|
||||||
`on_pipeline_cancelled` are now deprecated. Use `on_pipeline_finished`
|
|
||||||
instead.
|
|
||||||
|
|
||||||
- Support for the RTVI `append-to-context` event, in lieu of the new `send-text`
|
|
||||||
event and making way for future events like `send-image`.
|
|
||||||
|
|
||||||
### Fixed
|
|
||||||
|
|
||||||
- Fixed an issue where the pipeline could freeze if a task cancellation never
|
|
||||||
completed because a third-party library swallowed asyncio.CancelledError. We
|
|
||||||
now apply a timeout to task cancellations to prevent these freezes. If the
|
|
||||||
timeout is reached, the system logs warnings and leaves dangling tasks behind,
|
|
||||||
which can help diagnose where cancellation is being blocked.
|
|
||||||
|
|
||||||
- Fixed an `AudioBufferProcessor` issues that was causing user audio to be
|
|
||||||
missing in stereo recordings causing bot and user overlaps.
|
|
||||||
|
|
||||||
- Fixed a `BaseOutputTransport` issue that could produce large saved
|
|
||||||
`AudioBufferProcessor` files when using an audio mixer.
|
|
||||||
|
|
||||||
- Fixed a `PipelineRunner` issue on Windows where setting up SIGINT and SIGTERM
|
|
||||||
was raising an exception.
|
|
||||||
|
|
||||||
- Fixed an issue where multiple handlers for an event would not run in parallel.
|
|
||||||
|
|
||||||
- Fixed `DailyTransport.sip_call_transfer()` to automatically use the session
|
|
||||||
ID from the `on_dialin_connected` event, when not explicitly provided. Now
|
|
||||||
supports cold transfers (from incoming dial-in calls) by automatically
|
|
||||||
tracking session IDs from connection events.
|
|
||||||
|
|
||||||
- Fixed a memory leak in `SmallWebRTCTransport`. In `aiortc`, when you receive
|
|
||||||
a `MediaStreamTrack` (audio or video), frames are produced asynchronously. If
|
|
||||||
the code never consumes these frames, they are queued in memory, causing a
|
|
||||||
memory leak.
|
|
||||||
|
|
||||||
- Fixed an issue in `AsyncAITTSService`, where `TTSTextFrames` were not being
|
|
||||||
pushed.
|
|
||||||
|
|
||||||
- Fixed an issue that would cause `push_interruption_task_frame_and_wait()` to
|
|
||||||
not wait if a previous interruption had already happened.
|
|
||||||
|
|
||||||
- Fixed a couple of bugs in `ServiceSwitcher`:
|
|
||||||
|
|
||||||
- Using multiple `ServiceSwitcher`s in a pipeline would result in an error.
|
|
||||||
- `ServiceSwitcherFrame`s (such as `ManuallySwitchServiceFrame`s) were having
|
|
||||||
an effect too early, essentially "jumping the queue" in terms of pipeline
|
|
||||||
frame ordering.
|
|
||||||
|
|
||||||
- Fixed a self-cancellation deadlock in `UserIdleProcessor` when returning
|
|
||||||
`False` from an idle callback. The task now terminates naturally instead of
|
|
||||||
attempting to cancel itself.
|
|
||||||
|
|
||||||
- Fixed an issue in `AudioBufferProcessor` where a recording is not created
|
|
||||||
when a bot speaks and user input is blocked.
|
|
||||||
|
|
||||||
- Fixed a `FastAPIWebsocketTransport` and `SmallWebRTCTransport` issue where
|
|
||||||
`on_client_disconnected` would be triggered when the bot ends the
|
|
||||||
conversation. That is, `on_client_disconnected` should only be triggered when
|
|
||||||
the remote client actually disconnects.
|
|
||||||
|
|
||||||
- Fixed an issue in `HeyGenVideoService` where the `BotStartedSpeakingFrame`
|
|
||||||
was blocked from moving through the Pipeline.
|
|
||||||
|
|
||||||
## [0.0.85] - 2025-09-12
|
|
||||||
|
|
||||||
### Added
|
|
||||||
|
|
||||||
- `AzureSTTService` now pushes interim transcriptions.
|
|
||||||
|
|
||||||
- Added `voice_cloning_key` to `GoogleTTSService` to support custom cloned
|
|
||||||
voices.
|
|
||||||
|
|
||||||
- Added `speaking_rate` to `GoogleTTSService.InputParams` to control the
|
|
||||||
speaking rate.
|
|
||||||
|
|
||||||
- Added a `speed` arg to `OpenAITTSService` to control the speed of the voice
|
|
||||||
response.
|
|
||||||
|
|
||||||
- Added `FrameProcessor.push_interruption_task_frame_and_wait()`. Use this
|
|
||||||
method to programatically interrupt the bot from any part of the
|
|
||||||
pipeline. This guarantees that all the processors in the pipeline are
|
|
||||||
interrupted in order (from upstream to downstream). Internally, this works by
|
|
||||||
first pushing an `InterruptionTaskFrame` upstream until it reaches the
|
|
||||||
pipeline task. The pipeline task then generates an `InterruptionFrame`, which
|
|
||||||
flows downstream through all processors. Once the `InterruptionFrame` has
|
|
||||||
reaches the processor waiting for the interruption, the function returns and
|
|
||||||
execution continues after the call. Think of it as sending an upstream request
|
|
||||||
for interruption and waiting until the acknowledgment flows back downstream.
|
|
||||||
|
|
||||||
- Added new base `TaskFrame` (which is a system frame). This is the base class
|
|
||||||
for all task frames (`EndTaskFrame`, `CancelTaskFrame`, etc.) that are meant
|
|
||||||
to be pushed upstream to reach the pipeline task.
|
|
||||||
|
|
||||||
- Expanded support for universal `LLMContext` to the AWS Bedrock LLM service.
|
|
||||||
Using the universal `LLMContext` and associated `LLMContextAggregatorPair` is
|
|
||||||
a pre-requisite for using `LLMSwitcher` to switch between LLMs at runtime.
|
|
||||||
|
|
||||||
- Added new fields to the development runner's `parse_telephony_websocket`
|
|
||||||
method in support of providing dynamic data to a bot.
|
|
||||||
|
|
||||||
- Twilio: Added a new `body` parameter, which parses the websocket message
|
|
||||||
for `customParameters`. Provide data via the `Parameter` nouns in your
|
|
||||||
TwiML to use this feature.
|
|
||||||
- Telnyx & Exotel: Both providers make the `to` and `from` phone numbers
|
|
||||||
available in the websocket messages. You can now access these numbers as
|
|
||||||
`call_data["to"]` and `call_data["from"]`.
|
|
||||||
|
|
||||||
Note: Each telephony provider offers different features. Refer to the
|
|
||||||
corresponding example in `pipecat-examples` to see how to pass custom data
|
|
||||||
to your bot.
|
|
||||||
|
|
||||||
- Added `body` to the `WebsocketRunnerArguments` as an optional parameter.
|
|
||||||
Custom `body` information can be passed from the server into the bot file via
|
|
||||||
the `bot()` method using this new parameter.
|
|
||||||
|
|
||||||
- Added video streaming support to `LiveKitTransport`.
|
|
||||||
|
|
||||||
- Added `OpenAIRealtimeLLMService` and `AzureRealtimeLLMService` which provide
|
|
||||||
access to OpenAI Realtime.
|
|
||||||
|
|
||||||
### Changed
|
|
||||||
|
|
||||||
- `pipeline.tests.utils.run_test()` now allows passing `PipelineParams` instead
|
|
||||||
of individual parameters.
|
|
||||||
|
|
||||||
### Removed
|
|
||||||
|
|
||||||
- Remove `VisionImageRawFrame` in favor of context frames (`LLMContextFrame` or
|
|
||||||
`OpenAILLMContextFrame`).
|
|
||||||
|
|
||||||
### Deprecated
|
|
||||||
|
|
||||||
- `BotInterruptionFrame` is now deprecated, use `InterruptionTaskFrame` instead.
|
|
||||||
|
|
||||||
- `StartInterruptionFrame` is now deprected, use `InterruptionFrame` instead.
|
|
||||||
|
|
||||||
- Deprecate `VisionImageFrameAggregator` because `VisionImageRawFrame` has been
|
|
||||||
removed. See the `12*` examples for the new recommended replacement pattern.
|
|
||||||
|
|
||||||
- `NoisereduceFilter` is now deprecated and will be removed in a future
|
|
||||||
version. Use other audio filters like `KrispFilter` or `AICFilter`.
|
|
||||||
|
|
||||||
- Deprecated `OpenAIRealtimeBetaLLMService` and `AzureRealtimeBetaLLMService`.
|
|
||||||
Use `OpenAIRealtimeLLMService` and `AzureRealtimeLLMService`, respectively.
|
|
||||||
Each service will be removed in an upcoming version, 1.0.0.
|
|
||||||
|
|
||||||
### Fixed
|
|
||||||
|
|
||||||
- Fixed a `BaseOutputTransport` issue that caused incorrect detection of when
|
|
||||||
the bot stopped talking while using an audio mixer.
|
|
||||||
|
|
||||||
- Fixed a `LiveKitTransport` issue where RTVI messages were not properly
|
|
||||||
encoded.
|
|
||||||
|
|
||||||
- Add additional fixups to Mistral context messages to ensure they meet
|
|
||||||
Mistral-specific requirements, avoiding Mistral "invalid request" errors.
|
|
||||||
|
|
||||||
- Fixed `DailyTransport` transcription handling to gracefully handle missing
|
|
||||||
`rawResponse` field in transcription messages, preventing KeyError crashes.
|
|
||||||
|
|
||||||
## [0.0.84] - 2025-09-05
|
|
||||||
|
|
||||||
### Added
|
|
||||||
|
|
||||||
- Add the ability to send DTMF to `LiveKitTransport`.
|
|
||||||
|
|
||||||
- Expanded support for universal `LLMContext` to the Anthropic LLM service.
|
|
||||||
Using the universal `LLMContext` and associated `LLMContextAggregatorPair` is
|
|
||||||
a pre-requisite for using `LLMSwitcher` to switch between LLMs at runtime.
|
|
||||||
|
|
||||||
### Changed
|
|
||||||
|
|
||||||
- Updated `daily-python` to 0.19.9.
|
|
||||||
|
|
||||||
- Restored `DailyTransport`'s native DTMF support using Daily's `send_dtmf()`
|
|
||||||
method instead of generated audio tones.
|
|
||||||
|
|
||||||
### Fixed
|
|
||||||
|
|
||||||
- Fixed a `AWSBedrockLLMService` crash caused by an extra `await`.
|
|
||||||
|
|
||||||
- Fixed a `OpenAIImageGenService` issue where it was not creating
|
|
||||||
`URLImageRawFrame` correctly.
|
|
||||||
|
|
||||||
## [0.0.83] - 2025-09-03
|
|
||||||
|
|
||||||
### Added
|
|
||||||
|
|
||||||
- Added multilingual support for AsyncAI in `AsyncAITTSService` and `AsyncAIHttpTTSService`.
|
|
||||||
|
|
||||||
- New `languages`: `es`, `fr`, `de`, `it`.
|
|
||||||
|
|
||||||
- Added new frames `InputTransportMessageUrgentFrame` and
|
|
||||||
`DailyInputTransportMessageUrgentFrame` for transport messages received from
|
|
||||||
external sources.
|
|
||||||
|
|
||||||
- Added `UserSpeakingFrame`. This will be sent upstream and downstream while VAD
|
|
||||||
detects the user is speaking.
|
|
||||||
|
|
||||||
- Expanded support for universal `LLMContext` to more LLM services. Using the
|
|
||||||
universal `LLMContext` and associated `LLMContextAggregatorPair` is a
|
|
||||||
pre-requisite for using `LLMSwitcher` to switch between LLMs at runtime.
|
|
||||||
Here are the newly-supported services:
|
|
||||||
|
|
||||||
- Azure
|
|
||||||
- Cerebras
|
|
||||||
- Deepseek
|
|
||||||
- Fireworks AI
|
|
||||||
- Google Vertex AI
|
|
||||||
- Grok
|
|
||||||
- Groq
|
|
||||||
- Mistral
|
|
||||||
- NVIDIA NIM
|
|
||||||
- Ollama
|
|
||||||
- OpenPipe
|
|
||||||
- OpenRouter
|
|
||||||
- Perplexity
|
|
||||||
- Qwen
|
|
||||||
- SambaNova
|
|
||||||
- Together.ai
|
|
||||||
|
|
||||||
- Added support for WhatsApp User-initiated Calls.
|
|
||||||
|
|
||||||
- Added new audio filter `AICFilter`, speech enhancement for improving VAD/STT
|
|
||||||
performance, no ONNX dependency.
|
|
||||||
See https://ai-coustics.com/sdk/
|
|
||||||
|
|
||||||
- Added a timeout around cancel input tasks to prevent indefinite hangs when
|
|
||||||
cancellation is swallowed by third-party code.
|
|
||||||
|
|
||||||
- Added `pipecat.extensions.ivr` for automated IVR system navigation with
|
- Added `pipecat.extensions.ivr` for automated IVR system navigation with
|
||||||
configurable goals and conversation handling. Supports DTMF input, verbal
|
configurable goals and conversation handling. Supports DTMF input, verbal
|
||||||
@@ -542,34 +42,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||||||
sending it through the transport. This makes sending DTMF generic across all
|
sending it through the transport. This makes sending DTMF generic across all
|
||||||
output transports.
|
output transports.
|
||||||
|
|
||||||
- Added new config parameters to `GladiaSTTService`.
|
- Added new config parameters to `GladiaSTTService`.
|
||||||
- PreProcessingConfig > `audio_enhancer` to enhance audio quality.
|
- PreProcessingConfig > `audio_enhancer` to enhance audio quality.
|
||||||
- CustomVocabularyItem > `pronunciations` and `language` to specify special
|
- CustomVocabularyItem > `pronunciations` and `language` to specify special pronunciations and in which language it will be pronounced.
|
||||||
pronunciations and in which language it will be pronounced.
|
|
||||||
|
|
||||||
### Changed
|
## Changed
|
||||||
|
|
||||||
- `UserStartedSpeakingFrame` and `UserStoppedSpeakingFrame` are also pushed
|
|
||||||
upstream.
|
|
||||||
|
|
||||||
- `ParallelPipeline` now waits for `CancelFrame` to finish in all branches
|
|
||||||
before pushing it downstream.
|
|
||||||
|
|
||||||
- Added `sip_codecs` to the `DailyRoomSipParams`.
|
|
||||||
|
|
||||||
- Updated the `configure()` function in `pipecat.runner.daily` to include new
|
|
||||||
args to create SIP-enabled rooms. Additionally, added new args to control the
|
|
||||||
room and token expiration durations.
|
|
||||||
|
|
||||||
- `pipecat.frames.frames.KeypadEntry` is deprecated and has been moved to
|
- `pipecat.frames.frames.KeypadEntry` is deprecated and has been moved to
|
||||||
`pipecat.audio.dtmf.types.KeypadEntry`.
|
`pipecat.audio.dtmf.types.KeypadEntry`.
|
||||||
|
|
||||||
- Updated `RimeTTSService`'s flush_audio message to conform with Rime's official
|
## Removed
|
||||||
API.
|
|
||||||
|
|
||||||
- Updated the default model for `CerebrasLLMService` to GPT-OSS-120B.
|
|
||||||
|
|
||||||
### Removed
|
|
||||||
|
|
||||||
- Remove `StopInterruptionFrame`. This was a legacy frame that was not being
|
- Remove `StopInterruptionFrame`. This was a legacy frame that was not being
|
||||||
used really anywhere and it didn't provide any useful meaning. It was only
|
used really anywhere and it didn't provide any useful meaning. It was only
|
||||||
@@ -581,41 +63,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||||||
|
|
||||||
- Remove deprecated `DailyTransport.send_dtmf()`.
|
- Remove deprecated `DailyTransport.send_dtmf()`.
|
||||||
|
|
||||||
### Deprecated
|
## Deprecated
|
||||||
|
|
||||||
- Transports have been re-organized.
|
|
||||||
|
|
||||||
```
|
|
||||||
pipecat.transports.network.small_webrtc -> pipecat.transports.smallwebrtc.transport
|
|
||||||
pipecat.transports.network.webrtc_connection -> pipecat.transports.smallwebrtc.connection
|
|
||||||
pipecat.transports.network.websocket_client -> pipecat.transports.websocket.client
|
|
||||||
pipecat.transports.network.websocket_server -> pipecat.transports.websocket.server
|
|
||||||
pipecat.transports.network.fastapi_websocket -> pipecat.transports.websocket.fastapi
|
|
||||||
pipecat.transports.services.daily -> pipecat.transports.daily.transport
|
|
||||||
pipecat.transports.services.helpers.daily_rest -> pipecat.transports.daily.utils
|
|
||||||
pipecat.transports.services.livekit -> pipecat.transports.livekit.transport
|
|
||||||
pipecat.transports.services.tavus -> pipecat.transports.tavus.transport
|
|
||||||
```
|
|
||||||
|
|
||||||
- `pipecat.frames.frames.KeypadEntry` is deprecated use
|
- `pipecat.frames.frames.KeypadEntry` is deprecated use
|
||||||
`pipecat.audio.dtmf.types.KeypadEntry` instead.
|
`pipecat.audio.dtmf.types.KeypadEntry` instead.
|
||||||
|
|
||||||
### Fixed
|
## Fixed
|
||||||
|
|
||||||
- Fixed an issue where messages received from the transport were always being resent.
|
|
||||||
|
|
||||||
- Fixed `SmallWebRTCTransport` to not use `mid` to decide if the transceiver should
|
|
||||||
be `sendrecv` or not.
|
|
||||||
|
|
||||||
- Fixed an issue where Deepgram swallowed `asyncio.CancelledError` during
|
|
||||||
disconnect, preventing tasks from being cancelled.
|
|
||||||
|
|
||||||
- Fixed an issue where `PipelineTask` was not cleaning up the observers.
|
- Fixed an issue where `PipelineTask` was not cleaning up the observers.
|
||||||
|
|
||||||
### Performance
|
|
||||||
|
|
||||||
- Reduced latency and improved memory performance in `Mem0MemoryService`.
|
|
||||||
|
|
||||||
## [0.0.82] - 2025-08-28
|
## [0.0.82] - 2025-08-28
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
@@ -1500,7 +956,7 @@ quality and critical bugs impacting `ParallelPipelines` functionality.**
|
|||||||
- Added `session_token` parameter to `AWSNovaSonicLLMService`.
|
- Added `session_token` parameter to `AWSNovaSonicLLMService`.
|
||||||
|
|
||||||
- Added Gemini Multimodal Live File API for uploading, fetching, listing, and
|
- Added Gemini Multimodal Live File API for uploading, fetching, listing, and
|
||||||
deleting files. See `26f-gemini-live-files-api.py` for example usage.
|
deleting files. See `26f-gemini-multimodal-live-files-api.py` for example usage.
|
||||||
|
|
||||||
### Changed
|
### Changed
|
||||||
|
|
||||||
@@ -3506,7 +2962,7 @@ stt = DeepgramSTTService(..., live_options=LiveOptions(model="nova-2-general"))
|
|||||||
- Added the new modalities option and helper function to set Gemini output
|
- Added the new modalities option and helper function to set Gemini output
|
||||||
modalities.
|
modalities.
|
||||||
|
|
||||||
- Added `examples/foundational/26d-gemini-live-text.py` which is
|
- Added `examples/foundational/26d-gemini-multimodal-live-text.py` which is
|
||||||
using Gemini as TEXT modality and using another TTS provider for TTS process.
|
using Gemini as TEXT modality and using another TTS provider for TTS process.
|
||||||
|
|
||||||
### Changed
|
### Changed
|
||||||
@@ -3693,9 +3149,9 @@ stt = DeepgramSTTService(..., live_options=LiveOptions(model="nova-2-general"))
|
|||||||
- Added new foundational examples for `GeminiMultimodalLiveLLMService`:
|
- Added new foundational examples for `GeminiMultimodalLiveLLMService`:
|
||||||
|
|
||||||
- `26-gemini-multimodal-live.py`
|
- `26-gemini-multimodal-live.py`
|
||||||
- `26a-gemini-live-transcription.py`
|
- `26a-gemini-multimodal-live-transcription.py`
|
||||||
- `26b-gemini-live-video.py`
|
- `26b-gemini-multimodal-live-video.py`
|
||||||
- `26c-gemini-live-video.py`
|
- `26c-gemini-multimodal-live-video.py`
|
||||||
|
|
||||||
- Added `SimliVideoService`. This is an integration for Simli AI avatars.
|
- Added `SimliVideoService`. This is an integration for Simli AI avatars.
|
||||||
(see https://www.simli.com)
|
(see https://www.simli.com)
|
||||||
|
|||||||
@@ -1,336 +0,0 @@
|
|||||||
# Community Integrations Guide
|
|
||||||
|
|
||||||
Pipecat welcomes community-maintained integrations! As our ecosystem grows, we've established a process for any developer to create and maintain their own service integrations while ensuring discoverability for the Pipecat community.
|
|
||||||
|
|
||||||
## Overview
|
|
||||||
|
|
||||||
**What we support:** Community-maintained integrations that live in separate repositories and are maintained by their authors.
|
|
||||||
|
|
||||||
**What we don't do:** The Pipecat team does not code review, test, or maintain community integrations. We provide guidance and list approved integrations for discoverability.
|
|
||||||
|
|
||||||
**Why this approach:** This allows the community to move quickly while keeping the Pipecat core team focused on maintaining the framework itself.
|
|
||||||
|
|
||||||
## Submitting your Integration
|
|
||||||
|
|
||||||
To be listed as an official community integration, follow these steps:
|
|
||||||
|
|
||||||
### Step 1: Build Your Integration
|
|
||||||
|
|
||||||
Create your integration following the patterns and examples shown in the "Integration Patterns and Examples" section below.
|
|
||||||
|
|
||||||
### Step 2: Set Up Your Repository
|
|
||||||
|
|
||||||
Your repository must contain these components:
|
|
||||||
|
|
||||||
- **Source code** - Complete implementation following Pipecat patterns
|
|
||||||
- **Foundational example** - Single file example showing basic usage (see [Pipecat examples](https://github.com/pipecat-ai/pipecat/tree/main/examples/foundational))
|
|
||||||
- **README.md** - Must include:
|
|
||||||
|
|
||||||
- Introduction and explanation of your integration
|
|
||||||
- Installation instructions
|
|
||||||
- Usage instructions with Pipecat Pipeline
|
|
||||||
- How to run your example
|
|
||||||
- Pipecat version compatibility (e.g., "Tested with Pipecat v0.0.86")
|
|
||||||
- Company attribution: If you work for the company providing the service, please mention this in your README. This helps build confidence that the integration will be actively maintained.
|
|
||||||
|
|
||||||
- **LICENSE** - Permissive license (BSD-2 like Pipecat, or equivalent open source terms)
|
|
||||||
- **Code documentation** - Source code with docstrings (we recommend following [Pipecat's docstring conventions](https://github.com/pipecat-ai/pipecat/blob/main/CONTRIBUTING.md#docstring-conventions))
|
|
||||||
- **Changelog** - Maintain a changelog for version updates
|
|
||||||
|
|
||||||
### Step 3: Join Discord
|
|
||||||
|
|
||||||
Join our Discord: https://discord.gg/pipecat
|
|
||||||
|
|
||||||
### Step 4: Submit for Listing
|
|
||||||
|
|
||||||
Submit a pull request to add your integration to our [Community Integrations documentation page](https://docs.pipecat.ai/server/services/community-integrations).
|
|
||||||
|
|
||||||
**To submit:**
|
|
||||||
|
|
||||||
1. Fork the [Pipecat docs repository](https://github.com/pipecat-ai/docs)
|
|
||||||
2. Edit the file `server/services/community-integrations.mdx`
|
|
||||||
3. Add your integration to the appropriate service category table with:
|
|
||||||
- Service name
|
|
||||||
- Link to your repository
|
|
||||||
- Maintainer GitHub username(s)
|
|
||||||
4. Include a link to your demo video (approx 30-60 seconds) in your PR description showing:
|
|
||||||
- Core functionality of your integration
|
|
||||||
- Handling of an interruption (if applicable to service type)
|
|
||||||
5. Submit your pull request
|
|
||||||
|
|
||||||
Once your PR is submitted, post in the `#community-integrations` Discord channel to let us know.
|
|
||||||
|
|
||||||
## Integration Patterns and Examples
|
|
||||||
|
|
||||||
### STT (Speech-to-Text) Services
|
|
||||||
|
|
||||||
#### Websocket-based Services
|
|
||||||
|
|
||||||
**Base class:** `STTService`
|
|
||||||
|
|
||||||
**Examples:**
|
|
||||||
|
|
||||||
- [DeepgramSTTService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/deepgram/stt.py)
|
|
||||||
- [SpeechmaticsSTTService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/speechmatics/stt.py)
|
|
||||||
|
|
||||||
#### File-based Services
|
|
||||||
|
|
||||||
**Base class:** `SegmentedSTTService`
|
|
||||||
|
|
||||||
**Examples:**
|
|
||||||
|
|
||||||
- [RivaSTTService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/riva/stt.py)
|
|
||||||
- [FalSTTService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/fal/stt.py)
|
|
||||||
|
|
||||||
#### Key requirements:
|
|
||||||
|
|
||||||
- STT services should push `InterimTranscriptionFrames` and `TranscriptionFrames`
|
|
||||||
- If confidence values are available, filter for values >50% confidence
|
|
||||||
|
|
||||||
### LLM (Large Language Model) Services
|
|
||||||
|
|
||||||
#### OpenAI-Compatible Services
|
|
||||||
|
|
||||||
**Base class:** `OpenAILLMService`
|
|
||||||
|
|
||||||
**Examples:**
|
|
||||||
|
|
||||||
- [AzureLLMService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/azure/llm.py)
|
|
||||||
- [GrokLLMService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/grok/llm.py) - Shows overriding the base class where needed
|
|
||||||
|
|
||||||
#### Non-OpenAI Compatible Services
|
|
||||||
|
|
||||||
**Requires:** Full implementation
|
|
||||||
|
|
||||||
**Examples:**
|
|
||||||
|
|
||||||
- [AnthropicLLMService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/anthropic/llm.py)
|
|
||||||
- [GoogleLLMService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/google/llm.py)
|
|
||||||
|
|
||||||
#### Key requirements:
|
|
||||||
|
|
||||||
- **Frame sequence:** Output must follow this frame sequence pattern:
|
|
||||||
|
|
||||||
- `LLMFullResponseStartFrame` - Signals the start of an LLM response
|
|
||||||
- `LLMTextFrame` - Contains LLM content, typically streamed as tokens
|
|
||||||
- `LLMFullResponseEndFrame` - Signals the end of an LLM response
|
|
||||||
|
|
||||||
- **Context aggregation:** Implement context aggregation to collect user and assistant content:
|
|
||||||
- Aggregators come in pairs with a `user()` instance and `assistant()` instance
|
|
||||||
- Context must adhere to the `LLMContext` universal format
|
|
||||||
- Aggregators should handle adding messages, function calls, and images to the context
|
|
||||||
|
|
||||||
### TTS (Text-to-Speech) Services
|
|
||||||
|
|
||||||
#### AudioContextWordTTSService
|
|
||||||
|
|
||||||
**Use for:** Websocket-based services supporting word/timestamp alignment
|
|
||||||
|
|
||||||
**Example:**
|
|
||||||
|
|
||||||
- [CartesiaTTSService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/cartesia/tts.py)
|
|
||||||
|
|
||||||
#### InterruptibleTTSService
|
|
||||||
|
|
||||||
**Use for:** Websocket-based services without word/timestamp alignment, requiring disconnection on interruption
|
|
||||||
|
|
||||||
**Example:**
|
|
||||||
|
|
||||||
- [SarvamTTSService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/sarvam/tts.py)
|
|
||||||
|
|
||||||
#### WordTTSService
|
|
||||||
|
|
||||||
**Use for:** HTTP-based services supporting word/timestamp alignment
|
|
||||||
|
|
||||||
**Example:**
|
|
||||||
|
|
||||||
- [ElevenLabsHttpTTSService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/elevenlabs/tts.py)
|
|
||||||
|
|
||||||
#### TTSService
|
|
||||||
|
|
||||||
**Use for:** HTTP-based services without word/timestamp alignment
|
|
||||||
|
|
||||||
**Example:**
|
|
||||||
|
|
||||||
- [GoogleHttpTTSService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/google/tts.py)
|
|
||||||
|
|
||||||
#### Key requirements:
|
|
||||||
|
|
||||||
- For websocket services, use asyncio WebSocket implementation (required for v13+ support)
|
|
||||||
- Handle idle service timeouts with keepalives
|
|
||||||
- TTSServices push both audio (`TTSRawAudioFrame`) and text (`TTSTextFrame`) frames
|
|
||||||
|
|
||||||
### Telephony Serializers
|
|
||||||
|
|
||||||
Pipecat supports telephony provider integration using websocket connections to exchange MediaStreams. These services use a FrameSerializer to serialize and deserialize inputs from the FastAPIWebsocketTransport.
|
|
||||||
|
|
||||||
**Examples:**
|
|
||||||
|
|
||||||
- [Twilio](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/serializers/twilio.py)
|
|
||||||
- [Telnyx](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/serializers/telnyx.py)
|
|
||||||
|
|
||||||
#### Key requirements:
|
|
||||||
|
|
||||||
- Include hang-up functionality using the provider's native API, ideally using `aiohttp`
|
|
||||||
- Support DTMF (dual-tone multi-frequency) events if the provider supports them:
|
|
||||||
- Deserialize DTMF events from the provider's protocol to `InputDTMFFrame`
|
|
||||||
- Use `KeypadEntry` enum for valid keypad entries (0-9, \*, #, A-D)
|
|
||||||
- Handle invalid DTMF digits gracefully by returning `None`
|
|
||||||
|
|
||||||
### Image Generation Services
|
|
||||||
|
|
||||||
**Base class:** `ImageGenService`
|
|
||||||
|
|
||||||
**Examples:**
|
|
||||||
|
|
||||||
- [FalImageGenService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/fal/image.py)
|
|
||||||
- [GoogleImageGenService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/google/image.py)
|
|
||||||
|
|
||||||
#### Key requirements:
|
|
||||||
|
|
||||||
- Must implement `run_image_gen` method returning an `AsyncGenerator`
|
|
||||||
|
|
||||||
### Vision Services
|
|
||||||
|
|
||||||
Vision services process images and provide analysis such as descriptions, object detection, or visual question answering.
|
|
||||||
|
|
||||||
**Base class:** `VisionService`
|
|
||||||
|
|
||||||
**Example:**
|
|
||||||
|
|
||||||
- [MoondreamVisionService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/moondream/vision.py)
|
|
||||||
|
|
||||||
#### Key requirements:
|
|
||||||
|
|
||||||
- Must implement `run_vision` method that takes an `LLMContext` and returns an `AsyncGenerator[Frame, None]`
|
|
||||||
- The method processes the latest image in the context and yields frames with analysis results
|
|
||||||
- Typically yields `TextFrame` objects containing descriptions or answers
|
|
||||||
|
|
||||||
## Implementation Guidelines
|
|
||||||
|
|
||||||
### Naming Conventions
|
|
||||||
|
|
||||||
- **STT:** `VendorSTTService`
|
|
||||||
- **LLM:** `VendorLLMService`
|
|
||||||
- **TTS:**
|
|
||||||
- Websocket: `VendorTTSService`
|
|
||||||
- HTTP: `VendorHttpTTSService`
|
|
||||||
- **Image:** `VendorImageGenService`
|
|
||||||
- **Vision:** `VendorVisionService`
|
|
||||||
- **Telephony:** `VendorFrameSerializer`
|
|
||||||
|
|
||||||
### Metrics Support
|
|
||||||
|
|
||||||
Enable metrics in your service:
|
|
||||||
|
|
||||||
```python
|
|
||||||
def can_generate_metrics(self) -> bool:
|
|
||||||
"""Check if this service can generate processing metrics.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
True, as this service supports metrics.
|
|
||||||
"""
|
|
||||||
return True
|
|
||||||
```
|
|
||||||
|
|
||||||
### Dynamic Settings Updates
|
|
||||||
|
|
||||||
STT, LLM, and TTS services support `ServiceUpdateSettingsFrame` for dynamic configuration changes. The base STTService has an `_update_settings()` method that handles settings, and the private `_settings` `Dict` is used to store settings and provide access to the subclass.
|
|
||||||
|
|
||||||
```python
|
|
||||||
async def set_language(self, language: Language):
|
|
||||||
"""Set the recognition language and reconnect.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
language: The language to use for speech recognition.
|
|
||||||
"""
|
|
||||||
logger.info(f"Switching STT language to: [{language}]")
|
|
||||||
self._settings["language"] = language
|
|
||||||
await self._disconnect()
|
|
||||||
await self._connect()
|
|
||||||
```
|
|
||||||
|
|
||||||
Note that, in this example, Deepgram requires the websocket connection be disconnected and reconnected to reinitialize the service with the new value. Consider if your service requires reconnection.
|
|
||||||
|
|
||||||
### Sample Rate Handling
|
|
||||||
|
|
||||||
Sample rates are set via PipelineParams and passed to each frame processor at initialization. The pattern is to _not_ set the sample rate value in the constructor of a given service. Instead, use the `start()` method to initialize sample rates from the frame:
|
|
||||||
|
|
||||||
```python
|
|
||||||
async def start(self, frame: StartFrame):
|
|
||||||
"""Start the service."""
|
|
||||||
await super().start(frame)
|
|
||||||
self._settings["output_format"]["sample_rate"] = self.sample_rate
|
|
||||||
await self._connect()
|
|
||||||
```
|
|
||||||
|
|
||||||
Note that `self.sample_rate` is a `@property` set in the TTSService base class, which provides access to the private sample rate value obtained from the StartFrame.
|
|
||||||
|
|
||||||
### Tracing Decorators
|
|
||||||
|
|
||||||
Use Pipecat's tracing decorators:
|
|
||||||
|
|
||||||
- **STT:** `@traced_stt` - decorate a function that handles `transcript`, `is_final`, `language` as args
|
|
||||||
- **LLM:** `@traced_llm` - decorate the `_process_context()` method
|
|
||||||
- **TTS:** `@traced_tts` - decorate the `run_tts()` method
|
|
||||||
|
|
||||||
## Best Practices
|
|
||||||
|
|
||||||
### Packaging and Distribution
|
|
||||||
|
|
||||||
- Use [uv](https://docs.astral.sh/uv/) for packaging (encouraged)
|
|
||||||
- Consider releasing to PyPI for easier installation
|
|
||||||
- Follow semantic versioning principles
|
|
||||||
- Maintain a changelog
|
|
||||||
|
|
||||||
### HTTP Communication
|
|
||||||
|
|
||||||
For REST-based communication, use aiohttp. Pipecat includes this as a required dependency, so using it prevents adding an additional dependency to your integration.
|
|
||||||
|
|
||||||
### Error Handling
|
|
||||||
|
|
||||||
- Wrap API calls in appropriate try/catch blocks
|
|
||||||
- Handle rate limits and network failures gracefully
|
|
||||||
- Provide meaningful error messages
|
|
||||||
- When errors occur, raise exceptions AND push `ErrorFrame`s to notify the pipeline:
|
|
||||||
|
|
||||||
```python
|
|
||||||
from pipecat.frames.frames import ErrorFrame
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Your API call
|
|
||||||
result = await self._make_api_call()
|
|
||||||
except Exception as e:
|
|
||||||
# Push error frame to pipeline
|
|
||||||
await self.push_error(ErrorFrame(error=f"{self} error: {e}"))
|
|
||||||
# Raise or handle as appropriate
|
|
||||||
raise
|
|
||||||
```
|
|
||||||
|
|
||||||
### Testing
|
|
||||||
|
|
||||||
- Your foundational example serves as a valuable integration-level test
|
|
||||||
- Unit tests are nice to have. As the Pipecat teams provides better guidance, we will encourage unit testing more
|
|
||||||
|
|
||||||
## Disclaimer
|
|
||||||
|
|
||||||
Community integrations are community-maintained and not officially supported by the Pipecat team. Users should evaluate these integrations independently. The Pipecat team reserves the right to remove listings that become unmaintained or problematic.
|
|
||||||
|
|
||||||
## Staying Up to Date
|
|
||||||
|
|
||||||
Pipecat evolves rapidly to support the latest AI technologies and patterns. While we strive to minimize breaking changes, they do occur as the framework matures.
|
|
||||||
|
|
||||||
**We strongly recommend:**
|
|
||||||
|
|
||||||
- Join our Discord at https://discord.gg/pipecat and monitor the `#announcements` channel for release notifications
|
|
||||||
- Follow our changelog: https://github.com/pipecat-ai/pipecat/blob/main/CHANGELOG.md
|
|
||||||
- Test your integration against new Pipecat releases promptly
|
|
||||||
- Update your README with the last tested Pipecat version
|
|
||||||
|
|
||||||
This helps ensure your integration remains compatible and your users have clear expectations about version support.
|
|
||||||
|
|
||||||
## Questions?
|
|
||||||
|
|
||||||
Join our Discord community at https://discord.gg/pipecat and post in the `#community-integrations` channel for guidance and support.
|
|
||||||
|
|
||||||
For additional questions, you can also reach out to us at pipecat-ai@daily.co.
|
|
||||||
@@ -1,9 +1,5 @@
|
|||||||
## Contributing to Pipecat
|
## Contributing to Pipecat
|
||||||
|
|
||||||
**Want to add a new service integration?**
|
|
||||||
We encourage community-maintained integrations! Please see our [Community Integration Guide](COMMUNITY_INTEGRATIONS.md) for the process and requirements.
|
|
||||||
|
|
||||||
**Want to contribute to Pipecat core?**
|
|
||||||
We welcome contributions of all kinds! Your help is appreciated. Follow these steps to get involved:
|
We welcome contributions of all kinds! Your help is appreciated. Follow these steps to get involved:
|
||||||
|
|
||||||
1. **Fork this repository**: Start by forking the Pipecat Documentation repository to your GitHub account.
|
1. **Fork this repository**: Start by forking the Pipecat Documentation repository to your GitHub account.
|
||||||
|
|||||||
122
README.md
122
README.md
@@ -2,8 +2,7 @@
|
|||||||
<img alt="pipecat" width="300px" height="auto" src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/pipecat.png">
|
<img alt="pipecat" width="300px" height="auto" src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/pipecat.png">
|
||||||
</div></h1>
|
</div></h1>
|
||||||
|
|
||||||
[](https://pypi.org/project/pipecat-ai)  [](https://codecov.io/gh/pipecat-ai/pipecat) [](https://docs.pipecat.ai) [](https://discord.gg/pipecat) [](https://deepwiki.com/pipecat-ai/pipecat)
|
[](https://pypi.org/project/pipecat-ai)  [](https://codecov.io/gh/pipecat-ai/pipecat) [](https://docs.pipecat.ai) [](https://discord.gg/pipecat)
|
||||||
[](https://getmanta.ai/pipecat)
|
|
||||||
|
|
||||||
# 🎙️ Pipecat: Real-Time Voice & Multimodal AI Agents
|
# 🎙️ Pipecat: Real-Time Voice & Multimodal AI Agents
|
||||||
|
|
||||||
@@ -20,6 +19,8 @@
|
|||||||
- **Business Agents** – customer intake, support bots, guided flows
|
- **Business Agents** – customer intake, support bots, guided flows
|
||||||
- **Complex Dialog Systems** – design logic with structured conversations
|
- **Complex Dialog Systems** – design logic with structured conversations
|
||||||
|
|
||||||
|
🧭 Looking to build structured conversations? Check out [Pipecat Flows](https://github.com/pipecat-ai/pipecat-flows) for managing complex conversational states and transitions.
|
||||||
|
|
||||||
## 🧠 Why Pipecat?
|
## 🧠 Why Pipecat?
|
||||||
|
|
||||||
- **Voice-first**: Integrates speech recognition, text-to-speech, and conversation handling
|
- **Voice-first**: Integrates speech recognition, text-to-speech, and conversation handling
|
||||||
@@ -27,35 +28,6 @@
|
|||||||
- **Composable Pipelines**: Build complex behavior from modular components
|
- **Composable Pipelines**: Build complex behavior from modular components
|
||||||
- **Real-Time**: Ultra-low latency interaction with different transports (e.g. WebSockets or WebRTC)
|
- **Real-Time**: Ultra-low latency interaction with different transports (e.g. WebSockets or WebRTC)
|
||||||
|
|
||||||
## 🌐 Pipecat Ecosystem
|
|
||||||
|
|
||||||
### 📱 Client SDKs
|
|
||||||
|
|
||||||
Building client applications? You can connect to Pipecat from any platform using our official SDKs:
|
|
||||||
|
|
||||||
<a href="https://docs.pipecat.ai/client/js/introduction">JavaScript</a> | <a href="https://docs.pipecat.ai/client/react/introduction">React</a> | <a href="https://docs.pipecat.ai/client/react-native/introduction">React Native</a> |
|
|
||||||
<a href="https://docs.pipecat.ai/client/ios/introduction">Swift</a> | <a href="https://docs.pipecat.ai/client/android/introduction">Kotlin</a> | <a href="https://docs.pipecat.ai/client/c++/introduction">C++</a> | <a href="https://github.com/pipecat-ai/pipecat-esp32">ESP32</a>
|
|
||||||
|
|
||||||
### 🧭 Structured conversations
|
|
||||||
|
|
||||||
Looking to build structured conversations? Check out [Pipecat Flows](https://github.com/pipecat-ai/pipecat-flows) for managing complex conversational states and transitions.
|
|
||||||
|
|
||||||
### 🪄 Beautiful UIs
|
|
||||||
|
|
||||||
Want to build beautiful and engaging experiences? Checkout the [Voice UI Kit](https://github.com/pipecat-ai/voice-ui-kit), a collection of components, hooks and templates for building voice AI applications quickly.
|
|
||||||
|
|
||||||
### 🔍 Debugging
|
|
||||||
|
|
||||||
Looking for help debugging your pipeline and processors? Check out [Whisker](https://github.com/pipecat-ai/whisker), a real-time Pipecat debugger.
|
|
||||||
|
|
||||||
### 🖥️ Terminal
|
|
||||||
|
|
||||||
Love terminal applications? Check out [Tail](https://github.com/pipecat-ai/tail), a terminal dashboard for Pipecat.
|
|
||||||
|
|
||||||
### 📺️ Pipecat TV Channel
|
|
||||||
|
|
||||||
Catch new features, interviews, and how-tos on our [Pipecat TV](https://www.youtube.com/playlist?list=PLzU2zoMTQIHjqC3v4q2XVSR3hGSzwKFwH) channel.
|
|
||||||
|
|
||||||
## 🎬 See it in action
|
## 🎬 See it in action
|
||||||
|
|
||||||
<p float="left">
|
<p float="left">
|
||||||
@@ -66,20 +38,31 @@ Catch new features, interviews, and how-tos on our [Pipecat TV](https://www.yout
|
|||||||
<a href="https://github.com/pipecat-ai/pipecat-examples/tree/main/moondream-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat-examples/main/moondream-chatbot/image.png" width="400" /></a>
|
<a href="https://github.com/pipecat-ai/pipecat-examples/tree/main/moondream-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat-examples/main/moondream-chatbot/image.png" width="400" /></a>
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
|
## 📱 Client SDKs
|
||||||
|
|
||||||
|
You can connect to Pipecat from any platform using our official SDKs:
|
||||||
|
|
||||||
|
| Platform | SDK Repo | Description |
|
||||||
|
| -------- | ------------------------------------------------------------------------------ | -------------------------------- |
|
||||||
|
| Web | [pipecat-client-web](https://github.com/pipecat-ai/pipecat-client-web) | JavaScript and React client SDKs |
|
||||||
|
| iOS | [pipecat-client-ios](https://github.com/pipecat-ai/pipecat-client-ios) | Swift SDK for iOS |
|
||||||
|
| Android | [pipecat-client-android](https://github.com/pipecat-ai/pipecat-client-android) | Kotlin SDK for Android |
|
||||||
|
| C++ | [pipecat-client-cxx](https://github.com/pipecat-ai/pipecat-client-cxx) | C++ client SDK |
|
||||||
|
|
||||||
## 🧩 Available services
|
## 🧩 Available services
|
||||||
|
|
||||||
| Category | Services |
|
| Category | Services |
|
||||||
| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/stt/elevenlabs), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [NVIDIA Riva](https://docs.pipecat.ai/server/services/stt/riva), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [SambaNova (Whisper)](https://docs.pipecat.ai/server/services/stt/sambanova), [Soniox](https://docs.pipecat.ai/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/server/services/stt/speechmatics), [Ultravox](https://docs.pipecat.ai/server/services/stt/ultravox), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) |
|
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [NVIDIA Riva](https://docs.pipecat.ai/server/services/stt/riva), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [SambaNova (Whisper)](https://docs.pipecat.ai/server/services/stt/sambanova), [Soniox](https://docs.pipecat.ai/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/server/services/stt/speechmatics), [Ultravox](https://docs.pipecat.ai/server/services/stt/ultravox), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) |
|
||||||
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [Mistral](https://docs.pipecat.ai/server/services/llm/mistral), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/server/services/llm/sambanova) [Together AI](https://docs.pipecat.ai/server/services/llm/together) |
|
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [Mistral](https://docs.pipecat.ai/server/services/llm/mistral), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/server/services/llm/sambanova) [Together AI](https://docs.pipecat.ai/server/services/llm/together) |
|
||||||
| Text-to-Speech | [Async](https://docs.pipecat.ai/server/services/tts/asyncai), [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [Groq](https://docs.pipecat.ai/server/services/tts/groq), [Hume](https://docs.pipecat.ai/server/services/tts/hume), [Inworld](https://docs.pipecat.ai/server/services/tts/inworld), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [NVIDIA Riva](https://docs.pipecat.ai/server/services/tts/riva), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
|
| Text-to-Speech | [Async](https://docs.pipecat.ai/server/services/tts/asyncai), [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [Groq](https://docs.pipecat.ai/server/services/tts/groq), [Inworld](https://docs.pipecat.ai/server/services/tts/inworld), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [NVIDIA Riva](https://docs.pipecat.ai/server/services/tts/riva), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
|
||||||
| Speech-to-Speech | [AWS Nova Sonic](https://docs.pipecat.ai/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai) |
|
| Speech-to-Speech | [AWS Nova Sonic](https://docs.pipecat.ai/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai) |
|
||||||
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local |
|
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local |
|
||||||
| Serializers | [Plivo](https://docs.pipecat.ai/server/utilities/serializers/plivo), [Twilio](https://docs.pipecat.ai/server/utilities/serializers/twilio), [Telnyx](https://docs.pipecat.ai/server/utilities/serializers/telnyx) |
|
| Serializers | [Plivo](https://docs.pipecat.ai/server/utilities/serializers/plivo), [Twilio](https://docs.pipecat.ai/server/utilities/serializers/twilio), [Telnyx](https://docs.pipecat.ai/server/utilities/serializers/telnyx) |
|
||||||
| Video | [HeyGen](https://docs.pipecat.ai/server/services/video/heygen), [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) |
|
| Video | [HeyGen](https://docs.pipecat.ai/server/services/video/heygen), [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) |
|
||||||
| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) |
|
| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) |
|
||||||
| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/fal), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) |
|
| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/fal), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) |
|
||||||
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [ai-coustics](https://docs.pipecat.ai/server/utilities/audio/aic-filter) |
|
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [Noisereduce](https://docs.pipecat.ai/server/utilities/audio/noisereduce-filter) |
|
||||||
| Analytics & Metrics | [OpenTelemetry](https://docs.pipecat.ai/server/utilities/opentelemetry), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) |
|
| Analytics & Metrics | [OpenTelemetry](https://docs.pipecat.ai/server/utilities/opentelemetry), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) |
|
||||||
|
|
||||||
📚 [View full services documentation →](https://docs.pipecat.ai/server/services/supported-services)
|
📚 [View full services documentation →](https://docs.pipecat.ai/server/services/supported-services)
|
||||||
@@ -146,11 +129,7 @@ You can get started with Pipecat running on your local machine, then move your a
|
|||||||
2. Install development and testing dependencies:
|
2. Install development and testing dependencies:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
uv sync --group dev --all-extras \
|
uv sync --group dev --all-extras --no-extra gstreamer --no-extra krisp --no-extra local
|
||||||
--no-extra gstreamer \
|
|
||||||
--no-extra krisp \
|
|
||||||
--no-extra local \
|
|
||||||
--no-extra ultravox # (ultravox not fully supported on macOS)
|
|
||||||
```
|
```
|
||||||
|
|
||||||
3. Install the git pre-commit hooks:
|
3. Install the git pre-commit hooks:
|
||||||
@@ -159,6 +138,23 @@ You can get started with Pipecat running on your local machine, then move your a
|
|||||||
uv run pre-commit install
|
uv run pre-commit install
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Python 3.13+ Compatibility
|
||||||
|
|
||||||
|
Some features require PyTorch, which doesn't yet support Python 3.13+. Install using:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
uv sync --group dev --all-extras \
|
||||||
|
--no-extra gstreamer \
|
||||||
|
--no-extra krisp \
|
||||||
|
--no-extra local \
|
||||||
|
--no-extra local-smart-turn \
|
||||||
|
--no-extra mlx-whisper \
|
||||||
|
--no-extra moondream \
|
||||||
|
--no-extra ultravox
|
||||||
|
```
|
||||||
|
|
||||||
|
> **Tip:** For full compatibility, use Python 3.12: `uv python pin 3.12`
|
||||||
|
|
||||||
> **Note**: Some extras (local, gstreamer) require system dependencies. See documentation if you encounter build errors.
|
> **Note**: Some extras (local, gstreamer) require system dependencies. See documentation if you encounter build errors.
|
||||||
|
|
||||||
### Running tests
|
### Running tests
|
||||||
@@ -175,6 +171,54 @@ Run a specific test suite:
|
|||||||
uv run pytest tests/test_name.py
|
uv run pytest tests/test_name.py
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Setting up your editor
|
||||||
|
|
||||||
|
This project uses strict [PEP 8](https://peps.python.org/pep-0008/) formatting via [Ruff](https://github.com/astral-sh/ruff).
|
||||||
|
|
||||||
|
#### Emacs
|
||||||
|
|
||||||
|
You can use [use-package](https://github.com/jwiegley/use-package) to install [emacs-lazy-ruff](https://github.com/christophermadsen/emacs-lazy-ruff) package and configure `ruff` arguments:
|
||||||
|
|
||||||
|
```elisp
|
||||||
|
(use-package lazy-ruff
|
||||||
|
:ensure t
|
||||||
|
:hook ((python-mode . lazy-ruff-mode))
|
||||||
|
:config
|
||||||
|
(setq lazy-ruff-format-command "ruff format")
|
||||||
|
(setq lazy-ruff-check-command "ruff check --select I"))
|
||||||
|
```
|
||||||
|
|
||||||
|
`ruff` was installed in the `venv` environment described before, so you should be able to use [pyvenv-auto](https://github.com/ryotaro612/pyvenv-auto) to automatically load that environment inside Emacs.
|
||||||
|
|
||||||
|
```elisp
|
||||||
|
(use-package pyvenv-auto
|
||||||
|
:ensure t
|
||||||
|
:defer t
|
||||||
|
:hook ((python-mode . pyvenv-auto-run)))
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Visual Studio Code
|
||||||
|
|
||||||
|
Install the
|
||||||
|
[Ruff](https://marketplace.visualstudio.com/items?itemName=charliermarsh.ruff) extension. Then edit the user settings (_Ctrl-Shift-P_ `Open User Settings (JSON)`) and set it as the default Python formatter, and enable formatting on save:
|
||||||
|
|
||||||
|
```json
|
||||||
|
"[python]": {
|
||||||
|
"editor.defaultFormatter": "charliermarsh.ruff",
|
||||||
|
"editor.formatOnSave": true
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### PyCharm
|
||||||
|
|
||||||
|
`ruff` was installed in the `venv` environment described before, now to enable autoformatting on save, go to `File` -> `Settings` -> `Tools` -> `File Watchers` and add a new watcher with the following settings:
|
||||||
|
|
||||||
|
1. **Name**: `Ruff formatter`
|
||||||
|
2. **File type**: `Python`
|
||||||
|
3. **Working directory**: `$ContentRoot$`
|
||||||
|
4. **Arguments**: `format $FilePath$`
|
||||||
|
5. **Program**: `$PyInterpreterDirectory$/ruff`
|
||||||
|
|
||||||
## 🤝 Contributing
|
## 🤝 Contributing
|
||||||
|
|
||||||
We welcome contributions from the community! Whether you're fixing bugs, improving documentation, or adding new features, here's how you can help:
|
We welcome contributions from the community! Whether you're fixing bugs, improving documentation, or adding new features, here's how you can help:
|
||||||
|
|||||||
@@ -1,5 +0,0 @@
|
|||||||
# Security Policy
|
|
||||||
|
|
||||||
## Reporting a Vulnerability
|
|
||||||
|
|
||||||
Please email `disclosures@daily.co`.
|
|
||||||
@@ -50,7 +50,6 @@ autodoc_mock_imports = [
|
|||||||
# Krisp - has build issues on some platforms
|
# Krisp - has build issues on some platforms
|
||||||
"pipecat_ai_krisp",
|
"pipecat_ai_krisp",
|
||||||
"krisp",
|
"krisp",
|
||||||
"krisp_audio",
|
|
||||||
# System-specific GUI libraries
|
# System-specific GUI libraries
|
||||||
"_tkinter",
|
"_tkinter",
|
||||||
"tkinter",
|
"tkinter",
|
||||||
|
|||||||
28
env.example
28
env.example
@@ -1,6 +1,3 @@
|
|||||||
# AI-COUSTICS
|
|
||||||
AICOUSTICS_LICENSE_KEY=...
|
|
||||||
|
|
||||||
# Anthropic
|
# Anthropic
|
||||||
ANTHROPIC_API_KEY=...
|
ANTHROPIC_API_KEY=...
|
||||||
|
|
||||||
@@ -58,9 +55,6 @@ GOOGLE_CLOUD_PROJECT_ID=...
|
|||||||
GOOGLE_TEST_CREDENTIALS=...
|
GOOGLE_TEST_CREDENTIALS=...
|
||||||
GOOGLE_VERTEX_TEST_CREDENTIALS=...
|
GOOGLE_VERTEX_TEST_CREDENTIALS=...
|
||||||
|
|
||||||
# Hume
|
|
||||||
HUME_API_KEY=...
|
|
||||||
|
|
||||||
# LMNT
|
# LMNT
|
||||||
LMNT_API_KEY=...
|
LMNT_API_KEY=...
|
||||||
LMNT_VOICE_ID=...
|
LMNT_VOICE_ID=...
|
||||||
@@ -69,8 +63,8 @@ LMNT_VOICE_ID=...
|
|||||||
PERPLEXITY_API_KEY=...
|
PERPLEXITY_API_KEY=...
|
||||||
|
|
||||||
# PlayHT
|
# PlayHT
|
||||||
PLAYHT_USER_ID=...
|
PLAY_HT_USER_ID=...
|
||||||
PLAYHT_API_KEY=...
|
PLAY_HT_API_KEY=...
|
||||||
|
|
||||||
# OpenAI
|
# OpenAI
|
||||||
OPENAI_API_KEY=...
|
OPENAI_API_KEY=...
|
||||||
@@ -90,9 +84,6 @@ SIMLI_FACE_ID=...
|
|||||||
# Krisp
|
# Krisp
|
||||||
KRISP_MODEL_PATH=...
|
KRISP_MODEL_PATH=...
|
||||||
|
|
||||||
# Krisp Viva
|
|
||||||
KRISP_VIVA_MODEL_PATH=...
|
|
||||||
|
|
||||||
# DeepSeek
|
# DeepSeek
|
||||||
DEEPSEEK_API_KEY=...
|
DEEPSEEK_API_KEY=...
|
||||||
|
|
||||||
@@ -152,18 +143,3 @@ SENTRY_DSN=...
|
|||||||
|
|
||||||
# Heygen
|
# Heygen
|
||||||
HEYGEN_API_KEY=...
|
HEYGEN_API_KEY=...
|
||||||
|
|
||||||
# Mistral
|
|
||||||
MISTRAL_API_KEY=...
|
|
||||||
|
|
||||||
# NVIDIA
|
|
||||||
NVIDIA_API_KEY=...
|
|
||||||
|
|
||||||
# Qwen
|
|
||||||
QWEN_API_KEY=...
|
|
||||||
|
|
||||||
# WhatsApp
|
|
||||||
WHATSAPP_TOKEN=
|
|
||||||
WHATSAPP_WEBHOOK_VERIFICATION_TOKEN=
|
|
||||||
WHATSAPP_PHONE_NUMBER_ID=
|
|
||||||
WHATSAPP_APP_SECRET=
|
|
||||||
@@ -18,8 +18,8 @@ from pipecat.runner.types import RunnerArguments
|
|||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.piper.tts import PiperTTSService
|
from pipecat.services.piper.tts import PiperTTSService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
|
|||||||
@@ -18,8 +18,8 @@ from pipecat.runner.types import RunnerArguments
|
|||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.rime.tts import RimeHttpTTSService
|
from pipecat.services.rime.tts import RimeHttpTTSService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
|
|||||||
@@ -17,8 +17,8 @@ from pipecat.runner.types import RunnerArguments
|
|||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
|
|||||||
@@ -11,13 +11,13 @@ import sys
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.frames.frames import TTSSpeakFrame
|
from pipecat.frames.frames import TextFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineTask
|
from pipecat.pipeline.task import PipelineTask
|
||||||
from pipecat.runner.livekit import configure
|
from pipecat.runner.livekit import configure
|
||||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
from pipecat.transports.livekit.transport import LiveKitParams, LiveKitTransport
|
from pipecat.transports.services.livekit import LiveKitParams, LiveKitTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -50,7 +50,7 @@ async def main():
|
|||||||
async def on_first_participant_joined(transport, participant_id):
|
async def on_first_participant_joined(transport, participant_id):
|
||||||
await asyncio.sleep(1)
|
await asyncio.sleep(1)
|
||||||
await task.queue_frame(
|
await task.queue_frame(
|
||||||
TTSSpeakFrame(
|
TextFrame(
|
||||||
"Hello there! How are you doing today? Would you like to talk about the weather?"
|
"Hello there! How are you doing today? Would you like to talk about the weather?"
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -17,8 +17,8 @@ from pipecat.runner.types import RunnerArguments
|
|||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.riva.tts import FastPitchTTSService
|
from pipecat.services.riva.tts import FastPitchTTSService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
|
|||||||
@@ -9,18 +9,21 @@ import os
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.frames.frames import EndFrame, LLMContextFrame
|
from pipecat.frames.frames import EndFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineTask
|
from pipecat.pipeline.task import PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import (
|
||||||
|
OpenAILLMContext,
|
||||||
|
OpenAILLMContextFrame,
|
||||||
|
)
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
from pipecat.services.openai.llm import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -60,7 +63,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
# Register an event handler so we can play the audio when the client joins
|
# Register an event handler so we can play the audio when the client joins
|
||||||
@transport.event_handler("on_client_connected")
|
@transport.event_handler("on_client_connected")
|
||||||
async def on_client_connected(transport, client):
|
async def on_client_connected(transport, client):
|
||||||
await task.queue_frames([LLMContextFrame(LLMContext(messages)), EndFrame()])
|
await task.queue_frames([OpenAILLMContextFrame(OpenAILLMContext(messages)), EndFrame()])
|
||||||
|
|
||||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||||
|
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ from pipecat.runner.types import RunnerArguments
|
|||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.fal.image import FalImageGenService
|
from pipecat.services.fal.image import FalImageGenService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ from pipecat.runner.types import RunnerArguments
|
|||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.google.image import GoogleImageGenService
|
from pipecat.services.google.image import GoogleImageGenService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
|
|||||||
@@ -17,22 +17,18 @@ from fastapi.responses import RedirectResponse
|
|||||||
from loguru import logger
|
from loguru import logger
|
||||||
from pipecat_ai_small_webrtc_prebuilt.frontend import SmallWebRTCPrebuiltUI
|
from pipecat_ai_small_webrtc_prebuilt.frontend import SmallWebRTCPrebuiltUI
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame
|
from pipecat.frames.frames import LLMRunFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||||
from pipecat.services.openai.llm import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.base_transport import TransportParams
|
from pipecat.transports.base_transport import TransportParams
|
||||||
from pipecat.transports.smallwebrtc.connection import IceServer, SmallWebRTCConnection
|
from pipecat.transports.network.small_webrtc import SmallWebRTCTransport
|
||||||
from pipecat.transports.smallwebrtc.transport import SmallWebRTCTransport
|
from pipecat.transports.network.webrtc_connection import IceServer, SmallWebRTCConnection
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -60,8 +56,7 @@ async def run_example(webrtc_connection: SmallWebRTCConnection):
|
|||||||
params=TransportParams(
|
params=TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -81,8 +76,8 @@ async def run_example(webrtc_connection: SmallWebRTCConnection):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -12,20 +12,16 @@ import aiohttp
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame
|
from pipecat.frames.frames import LLMRunFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.runner.daily import configure
|
from pipecat.runner.daily import configure
|
||||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
from pipecat.services.openai.llm import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.daily.transport import DailyParams, DailyTransport
|
from pipecat.transports.services.daily import DailyLogLevel, DailyParams, DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -45,10 +41,10 @@ async def main():
|
|||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
transcription_enabled=True,
|
transcription_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
transport.set_log_level(DailyLogLevel.Info)
|
||||||
|
|
||||||
tts = CartesiaTTSService(
|
tts = CartesiaTTSService(
|
||||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||||
@@ -64,8 +60,8 @@ async def main():
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -12,27 +12,23 @@ import sys
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import (
|
from pipecat.frames.frames import (
|
||||||
InterruptionFrame,
|
BotInterruptionFrame,
|
||||||
|
TextFrame,
|
||||||
TranscriptionFrame,
|
TranscriptionFrame,
|
||||||
TTSSpeakFrame,
|
|
||||||
UserStartedSpeakingFrame,
|
UserStartedSpeakingFrame,
|
||||||
UserStoppedSpeakingFrame,
|
UserStoppedSpeakingFrame,
|
||||||
)
|
)
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.runner.livekit import configure
|
from pipecat.runner.livekit import configure
|
||||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||||
from pipecat.services.openai.llm import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.livekit.transport import LiveKitParams, LiveKitTransport
|
from pipecat.transports.services.livekit import LiveKitParams, LiveKitTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -50,8 +46,7 @@ async def main():
|
|||||||
params=LiveKitParams(
|
params=LiveKitParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -74,8 +69,8 @@ async def main():
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
@@ -103,7 +98,7 @@ async def main():
|
|||||||
async def on_first_participant_joined(transport, participant_id):
|
async def on_first_participant_joined(transport, participant_id):
|
||||||
await asyncio.sleep(1)
|
await asyncio.sleep(1)
|
||||||
await task.queue_frame(
|
await task.queue_frame(
|
||||||
TTSSpeakFrame(
|
TextFrame(
|
||||||
"Hello there! How are you doing today? Would you like to talk about the weather?"
|
"Hello there! How are you doing today? Would you like to talk about the weather?"
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
@@ -120,7 +115,7 @@ async def main():
|
|||||||
|
|
||||||
await task.queue_frames(
|
await task.queue_frames(
|
||||||
[
|
[
|
||||||
InterruptionFrame(),
|
BotInterruptionFrame(),
|
||||||
UserStartedSpeakingFrame(),
|
UserStartedSpeakingFrame(),
|
||||||
TranscriptionFrame(
|
TranscriptionFrame(
|
||||||
user_id=participant_id,
|
user_id=participant_id,
|
||||||
|
|||||||
@@ -14,7 +14,6 @@ from loguru import logger
|
|||||||
from pipecat.frames.frames import (
|
from pipecat.frames.frames import (
|
||||||
DataFrame,
|
DataFrame,
|
||||||
Frame,
|
Frame,
|
||||||
LLMContextFrame,
|
|
||||||
LLMFullResponseStartFrame,
|
LLMFullResponseStartFrame,
|
||||||
TextFrame,
|
TextFrame,
|
||||||
)
|
)
|
||||||
@@ -22,8 +21,10 @@ from pipecat.pipeline.pipeline import Pipeline
|
|||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.sync_parallel_pipeline import SyncParallelPipeline
|
from pipecat.pipeline.sync_parallel_pipeline import SyncParallelPipeline
|
||||||
from pipecat.pipeline.task import PipelineTask
|
from pipecat.pipeline.task import PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import (
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
OpenAILLMContext,
|
||||||
|
OpenAILLMContextFrame,
|
||||||
|
)
|
||||||
from pipecat.processors.aggregators.sentence import SentenceAggregator
|
from pipecat.processors.aggregators.sentence import SentenceAggregator
|
||||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
@@ -32,7 +33,7 @@ from pipecat.services.cartesia.tts import CartesiaHttpTTSService
|
|||||||
from pipecat.services.fal.image import FalImageGenService
|
from pipecat.services.fal.image import FalImageGenService
|
||||||
from pipecat.services.openai.llm import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -155,7 +156,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
}
|
}
|
||||||
]
|
]
|
||||||
frames.append(MonthFrame(month=month))
|
frames.append(MonthFrame(month=month))
|
||||||
frames.append(LLMContextFrame(LLMContext(messages)))
|
frames.append(OpenAILLMContextFrame(OpenAILLMContext(messages)))
|
||||||
|
|
||||||
task = PipelineTask(
|
task = PipelineTask(
|
||||||
pipeline,
|
pipeline,
|
||||||
|
|||||||
@@ -15,7 +15,6 @@ from loguru import logger
|
|||||||
|
|
||||||
from pipecat.frames.frames import (
|
from pipecat.frames.frames import (
|
||||||
Frame,
|
Frame,
|
||||||
LLMContextFrame,
|
|
||||||
OutputAudioRawFrame,
|
OutputAudioRawFrame,
|
||||||
TextFrame,
|
TextFrame,
|
||||||
TTSAudioRawFrame,
|
TTSAudioRawFrame,
|
||||||
@@ -25,8 +24,10 @@ from pipecat.pipeline.pipeline import Pipeline
|
|||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.sync_parallel_pipeline import SyncParallelPipeline
|
from pipecat.pipeline.sync_parallel_pipeline import SyncParallelPipeline
|
||||||
from pipecat.pipeline.task import PipelineTask
|
from pipecat.pipeline.task import PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import (
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
OpenAILLMContext,
|
||||||
|
OpenAILLMContextFrame,
|
||||||
|
)
|
||||||
from pipecat.processors.aggregators.sentence import SentenceAggregator
|
from pipecat.processors.aggregators.sentence import SentenceAggregator
|
||||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||||
from pipecat.services.cartesia.tts import CartesiaHttpTTSService
|
from pipecat.services.cartesia.tts import CartesiaHttpTTSService
|
||||||
@@ -139,7 +140,7 @@ async def main():
|
|||||||
)
|
)
|
||||||
|
|
||||||
task = PipelineTask(pipeline)
|
task = PipelineTask(pipeline)
|
||||||
await task.queue_frame(LLMContextFrame(LLMContext(messages)))
|
await task.queue_frame(OpenAILLMContextFrame(OpenAILLMContext(messages)))
|
||||||
await task.stop_when_done()
|
await task.stop_when_done()
|
||||||
|
|
||||||
await runner.run(task)
|
await runner.run(task)
|
||||||
|
|||||||
@@ -9,10 +9,7 @@ import os
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import Frame, LLMRunFrame, MetricsFrame
|
from pipecat.frames.frames import Frame, LLMRunFrame, MetricsFrame
|
||||||
from pipecat.metrics.metrics import (
|
from pipecat.metrics.metrics import (
|
||||||
LLMUsageMetricsData,
|
LLMUsageMetricsData,
|
||||||
@@ -23,8 +20,7 @@ from pipecat.metrics.metrics import (
|
|||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
@@ -32,8 +28,8 @@ from pipecat.services.cartesia.tts import CartesiaTTSService
|
|||||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||||
from pipecat.services.openai.llm import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -65,20 +61,17 @@ transport_params = {
|
|||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -104,8 +97,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -10,10 +10,7 @@ from dotenv import load_dotenv
|
|||||||
from loguru import logger
|
from loguru import logger
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import (
|
from pipecat.frames.frames import (
|
||||||
BotStartedSpeakingFrame,
|
BotStartedSpeakingFrame,
|
||||||
BotStoppedSpeakingFrame,
|
BotStoppedSpeakingFrame,
|
||||||
@@ -24,8 +21,7 @@ from pipecat.frames.frames import (
|
|||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
@@ -33,7 +29,7 @@ from pipecat.services.cartesia.tts import CartesiaTTSService
|
|||||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||||
from pipecat.services.openai.llm import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -70,7 +66,7 @@ class ImageSyncAggregator(FrameProcessor):
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
await self.push_frame(frame, direction)
|
await self.push_frame(frame)
|
||||||
|
|
||||||
|
|
||||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||||
@@ -83,8 +79,7 @@ transport_params = {
|
|||||||
video_out_enabled=True,
|
video_out_enabled=True,
|
||||||
video_out_width=1024,
|
video_out_width=1024,
|
||||||
video_out_height=1024,
|
video_out_height=1024,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
@@ -92,8 +87,7 @@ transport_params = {
|
|||||||
video_out_enabled=True,
|
video_out_enabled=True,
|
||||||
video_out_width=1024,
|
video_out_width=1024,
|
||||||
video_out_height=1024,
|
video_out_height=1024,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -117,8 +111,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
image_sync_aggregator = ImageSyncAggregator(
|
image_sync_aggregator = ImageSyncAggregator(
|
||||||
os.path.join(os.path.dirname(__file__), "assets", "speaking.png"),
|
os.path.join(os.path.dirname(__file__), "assets", "speaking.png"),
|
||||||
|
|||||||
@@ -9,24 +9,20 @@ import os
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame
|
from pipecat.frames.frames import LLMRunFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.cartesia.tts import CartesiaHttpTTSService
|
from pipecat.services.cartesia.tts import CartesiaHttpTTSService
|
||||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||||
from pipecat.services.openai.llm import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -38,20 +34,17 @@ transport_params = {
|
|||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -75,8 +68,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -9,24 +9,20 @@ import os
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame
|
from pipecat.frames.frames import LLMRunFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||||
from pipecat.services.openai.llm import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -37,20 +33,17 @@ transport_params = {
|
|||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -74,8 +67,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -13,11 +13,10 @@ from pipecat.frames.frames import LLMRunFrame
|
|||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
|
||||||
from pipecat.processors.aggregators.llm_response import (
|
from pipecat.processors.aggregators.llm_response import (
|
||||||
LLMUserAggregatorParams,
|
LLMUserAggregatorParams,
|
||||||
)
|
)
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
|
from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
|
||||||
@@ -26,8 +25,8 @@ from pipecat.services.openai.llm import OpenAILLMService
|
|||||||
from pipecat.services.speechmatics.stt import SpeechmaticsSTTService
|
from pipecat.services.speechmatics.stt import SpeechmaticsSTTService
|
||||||
from pipecat.transcriptions.language import Language
|
from pipecat.transcriptions.language import Language
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -124,8 +123,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(
|
context_aggregator = llm.create_context_aggregator(
|
||||||
context,
|
context,
|
||||||
user_params=LLMUserAggregatorParams(aggregation_timeout=0.005),
|
user_params=LLMUserAggregatorParams(aggregation_timeout=0.005),
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -9,19 +9,15 @@ import os
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame
|
from pipecat.frames.frames import LLMRunFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
|
||||||
from pipecat.processors.aggregators.llm_response import (
|
from pipecat.processors.aggregators.llm_response import (
|
||||||
LLMUserAggregatorParams,
|
LLMUserAggregatorParams,
|
||||||
)
|
)
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
|
from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
|
||||||
@@ -30,8 +26,8 @@ from pipecat.services.openai.llm import OpenAILLMService
|
|||||||
from pipecat.services.speechmatics.stt import SpeechmaticsSTTService
|
from pipecat.services.speechmatics.stt import SpeechmaticsSTTService
|
||||||
from pipecat.transcriptions.language import Language
|
from pipecat.transcriptions.language import Language
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -42,20 +38,17 @@ transport_params = {
|
|||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -113,8 +106,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(
|
context_aggregator = llm.create_context_aggregator(
|
||||||
context,
|
context,
|
||||||
user_params=LLMUserAggregatorParams(aggregation_timeout=0.005),
|
user_params=LLMUserAggregatorParams(aggregation_timeout=0.005),
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -10,24 +10,20 @@ import os
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame
|
from pipecat.frames.frames import LLMRunFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
from pipecat.services.openai.llm import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.services.soniox.stt import SonioxSTTService
|
from pipecat.services.soniox.stt import SonioxSTTService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -35,20 +31,17 @@ transport_params = {
|
|||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -74,8 +67,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -11,24 +11,20 @@ import aiohttp
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame
|
from pipecat.frames.frames import LLMRunFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||||
from pipecat.services.inworld.tts import InworldTTSService
|
from pipecat.services.inworld.tts import InworldTTSService
|
||||||
from pipecat.services.openai.llm import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -39,20 +35,17 @@ transport_params = {
|
|||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -74,6 +67,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
voice_id="Ashley",
|
voice_id="Ashley",
|
||||||
model="inworld-tts-1",
|
model="inworld-tts-1",
|
||||||
streaming=streaming, # True: real-time chunks, False: complete audio then playback
|
streaming=streaming, # True: real-time chunks, False: complete audio then playback
|
||||||
|
params=InworldTTSService.InputParams(
|
||||||
|
temperature=0.8,
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||||
@@ -85,8 +81,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -11,24 +11,20 @@ import aiohttp
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame
|
from pipecat.frames.frames import LLMRunFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.asyncai.tts import AsyncAIHttpTTSService
|
from pipecat.services.asyncai.tts import AsyncAIHttpTTSService
|
||||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||||
from pipecat.services.openai.llm import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -40,20 +36,17 @@ transport_params = {
|
|||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -80,8 +73,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -10,24 +10,20 @@ import os
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame
|
from pipecat.frames.frames import LLMRunFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.asyncai.tts import AsyncAITTSService
|
from pipecat.services.asyncai.tts import AsyncAITTSService
|
||||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||||
from pipecat.services.openai.llm import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -39,20 +35,17 @@ transport_params = {
|
|||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -76,8 +69,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -1,169 +0,0 @@
|
|||||||
#
|
|
||||||
# Copyright (c) 2024–2025, Daily
|
|
||||||
#
|
|
||||||
# SPDX-License-Identifier: BSD 2-Clause License
|
|
||||||
#
|
|
||||||
|
|
||||||
|
|
||||||
import datetime
|
|
||||||
import os
|
|
||||||
import wave
|
|
||||||
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
from loguru import logger
|
|
||||||
|
|
||||||
from pipecat.audio.filters.aic_filter import AICFilter
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame
|
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
|
||||||
from pipecat.runner.utils import create_transport
|
|
||||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
|
||||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
|
||||||
from pipecat.services.openai.llm import OpenAILLMService
|
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
|
||||||
|
|
||||||
load_dotenv(override=True)
|
|
||||||
|
|
||||||
|
|
||||||
# Create audio buffer processor so we can hear the audio fitler results.
|
|
||||||
audiobuffer = AudioBufferProcessor(
|
|
||||||
num_channels=2, # 1 for mono, 2 for stereo (user left, bot right)
|
|
||||||
enable_turn_audio=False, # Enable per-turn audio recording
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _create_aic_filter() -> AICFilter:
|
|
||||||
license_key = os.getenv("AICOUSTICS_LICENSE_KEY", "")
|
|
||||||
|
|
||||||
return AICFilter(
|
|
||||||
license_key=license_key,
|
|
||||||
enhancement_level=1.0,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
|
||||||
# instantiated. The function will be called when the desired transport gets
|
|
||||||
# selected.
|
|
||||||
transport_params = {
|
|
||||||
"daily": lambda: DailyParams(
|
|
||||||
audio_in_enabled=True,
|
|
||||||
audio_out_enabled=True,
|
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
audio_in_filter=_create_aic_filter(),
|
|
||||||
),
|
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
|
||||||
audio_in_enabled=True,
|
|
||||||
audio_out_enabled=True,
|
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
audio_in_filter=_create_aic_filter(),
|
|
||||||
),
|
|
||||||
"webrtc": lambda: TransportParams(
|
|
||||||
audio_in_enabled=True,
|
|
||||||
audio_out_enabled=True,
|
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
audio_in_filter=_create_aic_filter(),
|
|
||||||
),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|
||||||
logger.info(f"Starting bot")
|
|
||||||
|
|
||||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
|
||||||
|
|
||||||
tts = CartesiaTTSService(
|
|
||||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
|
||||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
|
||||||
)
|
|
||||||
|
|
||||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
|
||||||
|
|
||||||
messages = [
|
|
||||||
{
|
|
||||||
"role": "system",
|
|
||||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
|
||||||
},
|
|
||||||
]
|
|
||||||
|
|
||||||
context = LLMContext(messages)
|
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
|
||||||
|
|
||||||
pipeline = Pipeline(
|
|
||||||
[
|
|
||||||
transport.input(), # Transport user input
|
|
||||||
stt, # STT
|
|
||||||
context_aggregator.user(), # User responses
|
|
||||||
llm, # LLM
|
|
||||||
tts, # TTS
|
|
||||||
transport.output(), # Transport bot output
|
|
||||||
audiobuffer, # write audio data to a file
|
|
||||||
context_aggregator.assistant(), # Assistant spoken responses
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
task = PipelineTask(
|
|
||||||
pipeline,
|
|
||||||
params=PipelineParams(
|
|
||||||
enable_metrics=True,
|
|
||||||
enable_usage_metrics=True,
|
|
||||||
),
|
|
||||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
|
||||||
)
|
|
||||||
|
|
||||||
@transport.event_handler("on_client_connected")
|
|
||||||
async def on_client_connected(transport, client):
|
|
||||||
logger.info(f"Client connected")
|
|
||||||
await audiobuffer.start_recording()
|
|
||||||
# Kick off the conversation.
|
|
||||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
|
||||||
await task.queue_frames([LLMRunFrame()])
|
|
||||||
|
|
||||||
@audiobuffer.event_handler("on_audio_data")
|
|
||||||
async def on_audio_data(buffer, audio, sample_rate, num_channels):
|
|
||||||
# Save or process the composite audio
|
|
||||||
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
||||||
filename = f"./conversation_{timestamp}.wav"
|
|
||||||
|
|
||||||
# Create the WAV file
|
|
||||||
with wave.open(filename, "wb") as wf:
|
|
||||||
wf.setnchannels(num_channels)
|
|
||||||
wf.setsampwidth(2) # 16-bit audio
|
|
||||||
wf.setframerate(sample_rate)
|
|
||||||
wf.writeframes(audio)
|
|
||||||
|
|
||||||
logger.info(f"Saved recording to {filename}")
|
|
||||||
|
|
||||||
@transport.event_handler("on_client_disconnected")
|
|
||||||
async def on_client_disconnected(transport, client):
|
|
||||||
logger.info(f"Client disconnected")
|
|
||||||
await task.cancel()
|
|
||||||
|
|
||||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
|
||||||
|
|
||||||
await runner.run(task)
|
|
||||||
|
|
||||||
|
|
||||||
async def bot(runner_args: RunnerArguments):
|
|
||||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
|
||||||
transport = await create_transport(runner_args, transport_params)
|
|
||||||
await run_bot(transport, runner_args)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
from pipecat.runner.run import main
|
|
||||||
|
|
||||||
main()
|
|
||||||
@@ -1,138 +0,0 @@
|
|||||||
#
|
|
||||||
# Copyright (c) 2024–2025, Daily
|
|
||||||
#
|
|
||||||
# SPDX-License-Identifier: BSD 2-Clause License
|
|
||||||
#
|
|
||||||
|
|
||||||
import os
|
|
||||||
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
from loguru import logger
|
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame
|
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.processors.frameworks.rtvi import RTVIConfig, RTVIObserver, RTVIProcessor
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
|
||||||
from pipecat.runner.utils import create_transport
|
|
||||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
|
||||||
from pipecat.services.hume.tts import HUME_SAMPLE_RATE, HumeTTSService
|
|
||||||
from pipecat.services.openai.llm import OpenAILLMService
|
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
|
||||||
|
|
||||||
load_dotenv(override=True)
|
|
||||||
|
|
||||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
|
||||||
# instantiated. The function will be called when the desired transport gets
|
|
||||||
# selected.
|
|
||||||
transport_params = {
|
|
||||||
"daily": lambda: DailyParams(
|
|
||||||
audio_in_enabled=True,
|
|
||||||
audio_out_enabled=True,
|
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
|
||||||
audio_in_enabled=True,
|
|
||||||
audio_out_enabled=True,
|
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
|
||||||
"webrtc": lambda: TransportParams(
|
|
||||||
audio_in_enabled=True,
|
|
||||||
audio_out_enabled=True,
|
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|
||||||
logger.info(f"Starting bot")
|
|
||||||
|
|
||||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
|
||||||
|
|
||||||
tts = HumeTTSService(
|
|
||||||
api_key=os.getenv("HUME_API_KEY"),
|
|
||||||
# Replace with your Hume voice ID
|
|
||||||
voice_id="f898a92e-685f-43fa-985b-a46920f0650b",
|
|
||||||
)
|
|
||||||
|
|
||||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
|
||||||
|
|
||||||
messages = [
|
|
||||||
{
|
|
||||||
"role": "system",
|
|
||||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
|
||||||
},
|
|
||||||
]
|
|
||||||
|
|
||||||
context = LLMContext(messages)
|
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
|
||||||
|
|
||||||
rtvi = RTVIProcessor(config=RTVIConfig(config=[]))
|
|
||||||
|
|
||||||
pipeline = Pipeline(
|
|
||||||
[
|
|
||||||
transport.input(), # Transport user input
|
|
||||||
rtvi,
|
|
||||||
stt,
|
|
||||||
context_aggregator.user(), # User responses
|
|
||||||
llm, # LLM
|
|
||||||
tts, # TTS
|
|
||||||
transport.output(), # Transport bot output
|
|
||||||
context_aggregator.assistant(), # Assistant spoken responses
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
task = PipelineTask(
|
|
||||||
pipeline,
|
|
||||||
params=PipelineParams(
|
|
||||||
enable_metrics=True,
|
|
||||||
enable_usage_metrics=True,
|
|
||||||
audio_out_sample_rate=HUME_SAMPLE_RATE,
|
|
||||||
),
|
|
||||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
|
||||||
observers=[RTVIObserver(rtvi)],
|
|
||||||
)
|
|
||||||
|
|
||||||
@rtvi.event_handler("on_client_ready")
|
|
||||||
async def on_client_ready(rtvi):
|
|
||||||
await rtvi.set_bot_ready()
|
|
||||||
|
|
||||||
@transport.event_handler("on_client_connected")
|
|
||||||
async def on_client_connected(transport, client):
|
|
||||||
logger.info(f"Client connected")
|
|
||||||
# Kick off the conversation.
|
|
||||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
|
||||||
await task.queue_frames([LLMRunFrame()])
|
|
||||||
|
|
||||||
@transport.event_handler("on_client_disconnected")
|
|
||||||
async def on_client_disconnected(transport, client):
|
|
||||||
logger.info(f"Client disconnected")
|
|
||||||
await task.cancel()
|
|
||||||
|
|
||||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
|
||||||
|
|
||||||
await runner.run(task)
|
|
||||||
|
|
||||||
|
|
||||||
async def bot(runner_args: RunnerArguments):
|
|
||||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
|
||||||
transport = await create_transport(runner_args, transport_params)
|
|
||||||
await run_bot(transport, runner_args)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
from pipecat.runner.run import main
|
|
||||||
|
|
||||||
main()
|
|
||||||
@@ -15,24 +15,26 @@ from langchain_core.runnables.history import RunnableWithMessageHistory
|
|||||||
from langchain_openai import ChatOpenAI
|
from langchain_openai import ChatOpenAI
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMMessagesUpdateFrame
|
from pipecat.frames.frames import LLMMessagesUpdateFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.llm_response import (
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
LLMAssistantContextAggregator,
|
||||||
|
LLMUserContextAggregator,
|
||||||
|
)
|
||||||
|
from pipecat.processors.aggregators.openai_llm_context import (
|
||||||
|
OpenAILLMContext,
|
||||||
|
)
|
||||||
from pipecat.processors.frameworks.langchain import LangchainProcessor
|
from pipecat.processors.frameworks.langchain import LangchainProcessor
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -53,20 +55,17 @@ transport_params = {
|
|||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -101,18 +100,19 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
)
|
)
|
||||||
lc = LangchainProcessor(history_chain)
|
lc = LangchainProcessor(history_chain)
|
||||||
|
|
||||||
context = LLMContext()
|
context = OpenAILLMContext()
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
tma_in = LLMUserContextAggregator(context=context)
|
||||||
|
tma_out = LLMAssistantContextAggregator(context=context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
transport.input(), # Transport user input
|
transport.input(), # Transport user input
|
||||||
stt,
|
stt,
|
||||||
context_aggregator.user(), # User responses
|
tma_in, # User responses
|
||||||
lc, # Langchain
|
lc, # Langchain
|
||||||
tts, # TTS
|
tts, # TTS
|
||||||
transport.output(), # Transport bot output
|
transport.output(), # Transport bot output
|
||||||
context_aggregator.assistant(), # Assistant spoken responses
|
tma_out, # Assistant spoken responses
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -129,7 +129,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
async def on_client_connected(transport, client):
|
async def on_client_connected(transport, client):
|
||||||
logger.info(f"Client connected")
|
logger.info(f"Client connected")
|
||||||
# Kick off the conversation.
|
# Kick off the conversation.
|
||||||
# An `LLMContextFrame` will be picked up by the LangchainProcessor using
|
# An `OpenAILLMContextFrame` will be picked up by the LangchainProcessor using
|
||||||
# only the content of the last message to inject it in the prompt defined
|
# only the content of the last message to inject it in the prompt defined
|
||||||
# above. So no role is required here.
|
# above. So no role is required here.
|
||||||
messages = [({"content": "Please briefly introduce yourself to the user."})]
|
messages = [({"content": "Please briefly introduce yourself to the user."})]
|
||||||
|
|||||||
@@ -1,118 +0,0 @@
|
|||||||
#
|
|
||||||
# Copyright (c) 2024–2025, Daily
|
|
||||||
#
|
|
||||||
# SPDX-License-Identifier: BSD 2-Clause License
|
|
||||||
#
|
|
||||||
|
|
||||||
|
|
||||||
import os
|
|
||||||
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
from loguru import logger
|
|
||||||
|
|
||||||
from pipecat.frames.frames import LLMRunFrame
|
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
|
||||||
from pipecat.processors.aggregators.llm_response_universal import (
|
|
||||||
LLMContext,
|
|
||||||
LLMContextAggregatorPair,
|
|
||||||
)
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
|
||||||
from pipecat.runner.utils import create_transport
|
|
||||||
from pipecat.services.deepgram.flux.stt import DeepgramFluxSTTService
|
|
||||||
from pipecat.services.deepgram.tts import DeepgramTTSService
|
|
||||||
from pipecat.services.openai.llm import OpenAILLMService
|
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
|
||||||
|
|
||||||
load_dotenv(override=True)
|
|
||||||
|
|
||||||
|
|
||||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
|
||||||
# instantiated. The function will be called when the desired transport gets
|
|
||||||
# selected.
|
|
||||||
transport_params = {
|
|
||||||
"daily": lambda: DailyParams(
|
|
||||||
audio_in_enabled=True,
|
|
||||||
audio_out_enabled=True,
|
|
||||||
),
|
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
|
||||||
audio_in_enabled=True,
|
|
||||||
audio_out_enabled=True,
|
|
||||||
),
|
|
||||||
"webrtc": lambda: TransportParams(
|
|
||||||
audio_in_enabled=True,
|
|
||||||
audio_out_enabled=True,
|
|
||||||
),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|
||||||
logger.info(f"Starting bot")
|
|
||||||
|
|
||||||
stt = DeepgramFluxSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
|
||||||
|
|
||||||
tts = DeepgramTTSService(api_key=os.getenv("DEEPGRAM_API_KEY"), voice="aura-2-andromeda-en")
|
|
||||||
|
|
||||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
|
||||||
|
|
||||||
messages = [
|
|
||||||
{
|
|
||||||
"role": "system",
|
|
||||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
|
||||||
},
|
|
||||||
]
|
|
||||||
|
|
||||||
context = LLMContext(messages)
|
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
|
||||||
|
|
||||||
pipeline = Pipeline(
|
|
||||||
[
|
|
||||||
transport.input(), # Transport user input
|
|
||||||
stt, # STT
|
|
||||||
context_aggregator.user(), # User responses
|
|
||||||
llm, # LLM
|
|
||||||
tts, # TTS
|
|
||||||
transport.output(), # Transport bot output
|
|
||||||
context_aggregator.assistant(), # Assistant spoken responses
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
task = PipelineTask(
|
|
||||||
pipeline,
|
|
||||||
params=PipelineParams(
|
|
||||||
enable_metrics=True,
|
|
||||||
enable_usage_metrics=True,
|
|
||||||
),
|
|
||||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
|
||||||
)
|
|
||||||
|
|
||||||
@transport.event_handler("on_client_connected")
|
|
||||||
async def on_client_connected(transport, client):
|
|
||||||
logger.info(f"Client connected")
|
|
||||||
# Kick off the conversation.
|
|
||||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
|
||||||
await task.queue_frames([LLMRunFrame()])
|
|
||||||
|
|
||||||
@transport.event_handler("on_client_disconnected")
|
|
||||||
async def on_client_disconnected(transport, client):
|
|
||||||
logger.info(f"Client disconnected")
|
|
||||||
await task.cancel()
|
|
||||||
|
|
||||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
|
||||||
|
|
||||||
await runner.run(task)
|
|
||||||
|
|
||||||
|
|
||||||
async def bot(runner_args: RunnerArguments):
|
|
||||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
|
||||||
transport = await create_transport(runner_args, transport_params)
|
|
||||||
await run_bot(transport, runner_args)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
from pipecat.runner.run import main
|
|
||||||
|
|
||||||
main()
|
|
||||||
@@ -12,24 +12,23 @@ from dotenv import load_dotenv
|
|||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.frames.frames import (
|
from pipecat.frames.frames import (
|
||||||
InterruptionFrame,
|
|
||||||
LLMRunFrame,
|
LLMRunFrame,
|
||||||
|
StartInterruptionFrame,
|
||||||
UserStartedSpeakingFrame,
|
UserStartedSpeakingFrame,
|
||||||
UserStoppedSpeakingFrame,
|
UserStoppedSpeakingFrame,
|
||||||
)
|
)
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||||
from pipecat.services.deepgram.tts import DeepgramTTSService
|
from pipecat.services.deepgram.tts import DeepgramTTSService
|
||||||
from pipecat.services.openai.llm import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -72,8 +71,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
@@ -98,7 +97,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
|
|
||||||
@stt.event_handler("on_speech_started")
|
@stt.event_handler("on_speech_started")
|
||||||
async def on_speech_started(stt, *args, **kwargs):
|
async def on_speech_started(stt, *args, **kwargs):
|
||||||
await task.queue_frames([InterruptionFrame(), UserStartedSpeakingFrame()])
|
await task.queue_frames([StartInterruptionFrame(), UserStartedSpeakingFrame()])
|
||||||
|
|
||||||
@stt.event_handler("on_utterance_end")
|
@stt.event_handler("on_utterance_end")
|
||||||
async def on_utterance_end(stt, *args, **kwargs):
|
async def on_utterance_end(stt, *args, **kwargs):
|
||||||
|
|||||||
@@ -10,24 +10,20 @@ import os
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame
|
from pipecat.frames.frames import LLMRunFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||||
from pipecat.services.deepgram.tts import DeepgramTTSService
|
from pipecat.services.deepgram.tts import DeepgramTTSService
|
||||||
from pipecat.services.openai.llm import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -39,20 +35,17 @@ transport_params = {
|
|||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -73,8 +66,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -11,24 +11,20 @@ import aiohttp
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame
|
from pipecat.frames.frames import LLMRunFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.elevenlabs.stt import ElevenLabsSTTService
|
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||||
from pipecat.services.elevenlabs.tts import ElevenLabsHttpTTSService
|
from pipecat.services.elevenlabs.tts import ElevenLabsHttpTTSService
|
||||||
from pipecat.services.openai.llm import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -40,20 +36,17 @@ transport_params = {
|
|||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -63,10 +56,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
|
|
||||||
# Create an HTTP session
|
# Create an HTTP session
|
||||||
async with aiohttp.ClientSession() as session:
|
async with aiohttp.ClientSession() as session:
|
||||||
stt = ElevenLabsSTTService(
|
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
|
||||||
aiohttp_session=session,
|
|
||||||
)
|
|
||||||
|
|
||||||
tts = ElevenLabsHttpTTSService(
|
tts = ElevenLabsHttpTTSService(
|
||||||
api_key=os.getenv("ELEVENLABS_API_KEY", ""),
|
api_key=os.getenv("ELEVENLABS_API_KEY", ""),
|
||||||
@@ -83,8 +73,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -10,24 +10,20 @@ import os
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame
|
from pipecat.frames.frames import LLMRunFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||||
from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
|
from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
|
||||||
from pipecat.services.openai.llm import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -39,20 +35,17 @@ transport_params = {
|
|||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -76,8 +69,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -10,24 +10,20 @@ import os
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame
|
from pipecat.frames.frames import LLMRunFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||||
from pipecat.services.openai.llm import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.services.playht.tts import PlayHTHttpTTSService
|
from pipecat.services.playht.tts import PlayHTHttpTTSService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -38,20 +34,17 @@ transport_params = {
|
|||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -76,8 +69,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -10,16 +10,12 @@ import os
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame
|
from pipecat.frames.frames import LLMRunFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||||
@@ -27,8 +23,8 @@ from pipecat.services.openai.llm import OpenAILLMService
|
|||||||
from pipecat.services.playht.tts import PlayHTTTSService
|
from pipecat.services.playht.tts import PlayHTTTSService
|
||||||
from pipecat.transcriptions.language import Language
|
from pipecat.transcriptions.language import Language
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -39,20 +35,17 @@ transport_params = {
|
|||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -78,8 +71,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -10,24 +10,20 @@ import os
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame
|
from pipecat.frames.frames import LLMRunFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.azure.llm import AzureLLMService
|
from pipecat.services.azure.llm import AzureLLMService
|
||||||
from pipecat.services.azure.stt import AzureSTTService
|
from pipecat.services.azure.stt import AzureSTTService
|
||||||
from pipecat.services.azure.tts import AzureTTSService
|
from pipecat.services.azure.tts import AzureTTSService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -38,20 +34,17 @@ transport_params = {
|
|||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -82,8 +75,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -10,24 +10,20 @@ import os
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame
|
from pipecat.frames.frames import LLMRunFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.openai.llm import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.services.openai.stt import OpenAISTTService
|
from pipecat.services.openai.stt import OpenAISTTService
|
||||||
from pipecat.services.openai.tts import OpenAITTSService
|
from pipecat.services.openai.tts import OpenAITTSService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -38,20 +34,17 @@ transport_params = {
|
|||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -76,8 +69,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -11,24 +11,20 @@ import time
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame
|
from pipecat.frames.frames import LLMRunFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||||
from pipecat.services.openpipe.llm import OpenPipeLLMService
|
from pipecat.services.openpipe.llm import OpenPipeLLMService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -39,20 +35,17 @@ transport_params = {
|
|||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -81,8 +74,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -11,24 +11,20 @@ import aiohttp
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame
|
from pipecat.frames.frames import LLMRunFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||||
from pipecat.services.openai.llm import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.services.xtts.tts import XTTSService
|
from pipecat.services.xtts.tts import XTTSService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -39,20 +35,17 @@ transport_params = {
|
|||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -79,8 +72,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -10,16 +10,12 @@ import os
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame
|
from pipecat.frames.frames import LLMRunFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
@@ -28,8 +24,8 @@ from pipecat.services.gladia.stt import GladiaSTTService
|
|||||||
from pipecat.services.openai.llm import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transcriptions.language import Language
|
from pipecat.transcriptions.language import Language
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -40,20 +36,17 @@ transport_params = {
|
|||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -85,8 +78,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -10,24 +10,20 @@ import os
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame
|
from pipecat.frames.frames import LLMRunFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||||
from pipecat.services.lmnt.tts import LmntTTSService
|
from pipecat.services.lmnt.tts import LmntTTSService
|
||||||
from pipecat.services.openai.llm import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -38,20 +34,17 @@ transport_params = {
|
|||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -72,8 +65,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -10,25 +10,21 @@ import os
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame
|
from pipecat.frames.frames import LLMRunFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
|
||||||
from pipecat.processors.aggregators.llm_response import LLMUserAggregatorParams
|
from pipecat.processors.aggregators.llm_response import LLMUserAggregatorParams
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.groq.llm import GroqLLMService
|
from pipecat.services.groq.llm import GroqLLMService
|
||||||
from pipecat.services.groq.stt import GroqSTTService
|
from pipecat.services.groq.stt import GroqSTTService
|
||||||
from pipecat.services.groq.tts import GroqTTSService
|
from pipecat.services.groq.tts import GroqTTSService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -39,20 +35,17 @@ transport_params = {
|
|||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -75,8 +68,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(
|
context_aggregator = llm.create_context_aggregator(
|
||||||
context, user_params=LLMUserAggregatorParams(aggregation_timeout=0.05)
|
context, user_params=LLMUserAggregatorParams(aggregation_timeout=0.05)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -1,177 +0,0 @@
|
|||||||
#
|
|
||||||
# Copyright (c) 2024–2025, Daily
|
|
||||||
#
|
|
||||||
# SPDX-License-Identifier: BSD 2-Clause License
|
|
||||||
#
|
|
||||||
|
|
||||||
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
from loguru import logger
|
|
||||||
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
|
||||||
from pipecat.frames.frames import LLMMessagesAppendFrame, LLMRunFrame
|
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.processors.frameworks.strands_agents import StrandsAgentsProcessor
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
|
||||||
from pipecat.runner.utils import create_transport
|
|
||||||
from pipecat.services.aws.stt import AWSTranscribeSTTService
|
|
||||||
from pipecat.services.aws.tts import AWSPollyTTSService
|
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
|
||||||
|
|
||||||
# Strands agent setup
|
|
||||||
try:
|
|
||||||
from strands import Agent, tool
|
|
||||||
from strands.models import BedrockModel
|
|
||||||
except ImportError:
|
|
||||||
logger.warning("Strands not installed. Please install with: pip install strands-agents")
|
|
||||||
Agent = None
|
|
||||||
BedrockModel = None
|
|
||||||
|
|
||||||
load_dotenv(override=True)
|
|
||||||
|
|
||||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
|
||||||
# instantiated. The function will be called when the desired transport gets
|
|
||||||
# selected.
|
|
||||||
transport_params = {
|
|
||||||
"daily": lambda: DailyParams(
|
|
||||||
audio_in_enabled=True,
|
|
||||||
audio_out_enabled=True,
|
|
||||||
vad_analyzer=SileroVADAnalyzer(),
|
|
||||||
),
|
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
|
||||||
audio_in_enabled=True,
|
|
||||||
audio_out_enabled=True,
|
|
||||||
vad_analyzer=SileroVADAnalyzer(),
|
|
||||||
),
|
|
||||||
"webrtc": lambda: TransportParams(
|
|
||||||
audio_in_enabled=True,
|
|
||||||
audio_out_enabled=True,
|
|
||||||
vad_analyzer=SileroVADAnalyzer(),
|
|
||||||
),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def build_agent(model_id: str, max_tokens: int):
|
|
||||||
"""Create and configure a Strands agent for NAB customer service coaching.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
model_id: The AWS Bedrock model ID to use
|
|
||||||
max_tokens: Maximum tokens for the model
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Configured Strands Agent
|
|
||||||
"""
|
|
||||||
|
|
||||||
@tool
|
|
||||||
def check_weather(location: str) -> str:
|
|
||||||
if location.lower() == "san francisco":
|
|
||||||
return "The weather in San Francisco is sunny and 30 degrees."
|
|
||||||
elif location.lower() == "sydney":
|
|
||||||
return "The weather in Sydney is cloudy and 20 degrees."
|
|
||||||
else:
|
|
||||||
return "I'm not sure about the weather in that location."
|
|
||||||
|
|
||||||
agent = Agent(
|
|
||||||
model=BedrockModel(
|
|
||||||
model_id=model_id,
|
|
||||||
max_tokens=max_tokens,
|
|
||||||
),
|
|
||||||
tools=[check_weather],
|
|
||||||
system_prompt="You are a helpful assistant that can check the weather in a given location.",
|
|
||||||
)
|
|
||||||
|
|
||||||
return agent
|
|
||||||
|
|
||||||
|
|
||||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|
||||||
logger.info(f"Starting bot")
|
|
||||||
|
|
||||||
stt = AWSTranscribeSTTService()
|
|
||||||
|
|
||||||
tts = AWSPollyTTSService(
|
|
||||||
region="us-west-2", # only specific regions support generative TTS
|
|
||||||
voice_id="Joanna",
|
|
||||||
params=AWSPollyTTSService.InputParams(engine="generative", rate="1.1"),
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create Strands agent processor
|
|
||||||
try:
|
|
||||||
agent = build_agent(model_id="us.anthropic.claude-3-5-haiku-20241022-v1:0", max_tokens=8000)
|
|
||||||
llm = StrandsAgentsProcessor(agent=agent)
|
|
||||||
logger.info("Successfully created Strands agent for NAB customer service coaching")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to create Strands agent: {e}")
|
|
||||||
raise ValueError(
|
|
||||||
"Unable to create Strands processor. Please ensure you have properly "
|
|
||||||
"installed strands-agents and configured your AWS credentials."
|
|
||||||
)
|
|
||||||
|
|
||||||
# Setup context aggregators for message handling
|
|
||||||
context = LLMContext()
|
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
|
||||||
|
|
||||||
pipeline = Pipeline(
|
|
||||||
[
|
|
||||||
transport.input(), # Transport user input
|
|
||||||
stt, # Speech-to-text
|
|
||||||
context_aggregator.user(), # User responses
|
|
||||||
llm, # Strands Agents processor
|
|
||||||
tts, # Text-to-speech
|
|
||||||
transport.output(), # Transport bot output
|
|
||||||
context_aggregator.assistant(), # Assistant spoken responses
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
task = PipelineTask(
|
|
||||||
pipeline,
|
|
||||||
params=PipelineParams(
|
|
||||||
enable_metrics=True,
|
|
||||||
enable_usage_metrics=True,
|
|
||||||
),
|
|
||||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
|
||||||
)
|
|
||||||
|
|
||||||
@transport.event_handler("on_client_connected")
|
|
||||||
async def on_client_connected(transport, client):
|
|
||||||
logger.info(f"Client connected")
|
|
||||||
# Kick off the conversation.
|
|
||||||
await task.queue_frames(
|
|
||||||
[
|
|
||||||
LLMMessagesAppendFrame(
|
|
||||||
messages=[
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": f"Greet the user and introduce yourself.",
|
|
||||||
}
|
|
||||||
],
|
|
||||||
run_llm=True,
|
|
||||||
)
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
@transport.event_handler("on_client_disconnected")
|
|
||||||
async def on_client_disconnected(transport, client):
|
|
||||||
logger.info(f"Client disconnected")
|
|
||||||
await task.cancel()
|
|
||||||
|
|
||||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
|
||||||
|
|
||||||
await runner.run(task)
|
|
||||||
|
|
||||||
|
|
||||||
async def bot(runner_args: RunnerArguments):
|
|
||||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
|
||||||
transport = await create_transport(runner_args, transport_params)
|
|
||||||
await run_bot(transport, runner_args)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
from pipecat.runner.run import main
|
|
||||||
|
|
||||||
main()
|
|
||||||
@@ -8,24 +8,20 @@
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame
|
from pipecat.frames.frames import LLMRunFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.aws.llm import AWSBedrockLLMService
|
from pipecat.services.aws.llm import AWSBedrockLLMService
|
||||||
from pipecat.services.aws.stt import AWSTranscribeSTTService
|
from pipecat.services.aws.stt import AWSTranscribeSTTService
|
||||||
from pipecat.services.aws.tts import AWSPollyTTSService
|
from pipecat.services.aws.tts import AWSPollyTTSService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -36,20 +32,17 @@ transport_params = {
|
|||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -78,8 +71,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -1,151 +0,0 @@
|
|||||||
#
|
|
||||||
# Copyright (c) 2024–2025, Daily
|
|
||||||
#
|
|
||||||
# SPDX-License-Identifier: BSD 2-Clause License
|
|
||||||
#
|
|
||||||
|
|
||||||
"""
|
|
||||||
A conversational AI bot using Gemini for both LLM, STT and TTS.
|
|
||||||
|
|
||||||
This example demonstrates how to use Gemini's image generation capabilities.
|
|
||||||
|
|
||||||
Features showcased:
|
|
||||||
- Gemini LLM for conversation and image generation
|
|
||||||
- Google TTS and STT
|
|
||||||
|
|
||||||
Run with:
|
|
||||||
python examples/foundational/07n-interruptible-gemini-image.py
|
|
||||||
|
|
||||||
Make sure to set your environment variables:
|
|
||||||
export GOOGLE_API_KEY=your_api_key_here
|
|
||||||
"""
|
|
||||||
|
|
||||||
import os
|
|
||||||
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
from loguru import logger
|
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame
|
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
|
||||||
from pipecat.runner.utils import create_transport
|
|
||||||
from pipecat.services.google.llm import GoogleLLMService
|
|
||||||
from pipecat.services.google.stt import GoogleSTTService
|
|
||||||
from pipecat.services.google.tts import GoogleTTSService
|
|
||||||
from pipecat.transcriptions.language import Language
|
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
|
||||||
|
|
||||||
load_dotenv(override=True)
|
|
||||||
|
|
||||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
|
||||||
# instantiated. The function will be called when the desired transport gets
|
|
||||||
# selected.
|
|
||||||
transport_params = {
|
|
||||||
"daily": lambda: DailyParams(
|
|
||||||
audio_in_enabled=True,
|
|
||||||
audio_out_enabled=True,
|
|
||||||
video_out_enabled=True,
|
|
||||||
video_out_width=1024,
|
|
||||||
video_out_height=1024,
|
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
|
||||||
"webrtc": lambda: TransportParams(
|
|
||||||
audio_in_enabled=True,
|
|
||||||
audio_out_enabled=True,
|
|
||||||
video_out_enabled=True,
|
|
||||||
video_out_width=1024,
|
|
||||||
video_out_height=1024,
|
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|
||||||
logger.info(f"Starting bot")
|
|
||||||
|
|
||||||
stt = GoogleSTTService(
|
|
||||||
params=GoogleSTTService.InputParams(languages=Language.EN_US),
|
|
||||||
credentials=os.getenv("GOOGLE_TEST_CREDENTIALS"),
|
|
||||||
)
|
|
||||||
|
|
||||||
tts = GoogleTTSService(
|
|
||||||
voice_id="en-US-Chirp3-HD-Charon",
|
|
||||||
params=GoogleTTSService.InputParams(language=Language.EN_US),
|
|
||||||
credentials=os.getenv("GOOGLE_TEST_CREDENTIALS"),
|
|
||||||
)
|
|
||||||
|
|
||||||
llm = GoogleLLMService(
|
|
||||||
api_key=os.getenv("GOOGLE_API_KEY"),
|
|
||||||
model="gemini-2.5-flash-image",
|
|
||||||
)
|
|
||||||
|
|
||||||
messages = [
|
|
||||||
{
|
|
||||||
"role": "system",
|
|
||||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
|
||||||
},
|
|
||||||
]
|
|
||||||
|
|
||||||
context = LLMContext(messages)
|
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
|
||||||
|
|
||||||
pipeline = Pipeline(
|
|
||||||
[
|
|
||||||
transport.input(), # Transport user input
|
|
||||||
stt, # STT
|
|
||||||
context_aggregator.user(), # User responses
|
|
||||||
llm, # LLM
|
|
||||||
tts, # Gemini TTS
|
|
||||||
transport.output(), # Transport bot output
|
|
||||||
context_aggregator.assistant(), # Assistant spoken responses
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
task = PipelineTask(
|
|
||||||
pipeline,
|
|
||||||
params=PipelineParams(
|
|
||||||
enable_metrics=True,
|
|
||||||
enable_usage_metrics=True,
|
|
||||||
),
|
|
||||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
|
||||||
)
|
|
||||||
|
|
||||||
@transport.event_handler("on_client_connected")
|
|
||||||
async def on_client_connected(transport, client):
|
|
||||||
logger.info(f"Client connected")
|
|
||||||
# Kick off the conversation with a styled introduction
|
|
||||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
|
||||||
await task.queue_frames([LLMRunFrame()])
|
|
||||||
|
|
||||||
@transport.event_handler("on_client_disconnected")
|
|
||||||
async def on_client_disconnected(transport, client):
|
|
||||||
logger.info(f"Client disconnected")
|
|
||||||
await task.cancel()
|
|
||||||
|
|
||||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
|
||||||
|
|
||||||
await runner.run(task)
|
|
||||||
|
|
||||||
|
|
||||||
async def bot(runner_args: RunnerArguments):
|
|
||||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
|
||||||
transport = await create_transport(runner_args, transport_params)
|
|
||||||
await run_bot(transport, runner_args)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
from pipecat.runner.run import main
|
|
||||||
|
|
||||||
main()
|
|
||||||
@@ -28,16 +28,12 @@ import os
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame
|
from pipecat.frames.frames import LLMRunFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.google.llm import GoogleLLMService
|
from pipecat.services.google.llm import GoogleLLMService
|
||||||
@@ -45,8 +41,8 @@ from pipecat.services.google.stt import GoogleSTTService
|
|||||||
from pipecat.services.google.tts import GeminiTTSService
|
from pipecat.services.google.tts import GeminiTTSService
|
||||||
from pipecat.transcriptions.language import Language
|
from pipecat.transcriptions.language import Language
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -57,20 +53,17 @@ transport_params = {
|
|||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -113,8 +106,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -10,16 +10,12 @@ import os
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame
|
from pipecat.frames.frames import LLMRunFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.google.llm import GoogleLLMService
|
from pipecat.services.google.llm import GoogleLLMService
|
||||||
@@ -27,8 +23,8 @@ from pipecat.services.google.stt import GoogleSTTService
|
|||||||
from pipecat.services.google.tts import GoogleTTSService
|
from pipecat.services.google.tts import GoogleTTSService
|
||||||
from pipecat.transcriptions.language import Language
|
from pipecat.transcriptions.language import Language
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -39,20 +35,17 @@ transport_params = {
|
|||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -85,8 +78,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -10,24 +10,20 @@ import os
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame
|
from pipecat.frames.frames import LLMRunFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.assemblyai.stt import AssemblyAISTTService
|
from pipecat.services.assemblyai.stt import AssemblyAISTTService
|
||||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
from pipecat.services.openai.llm import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -39,20 +35,17 @@ transport_params = {
|
|||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -78,8 +71,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -1,129 +0,0 @@
|
|||||||
#
|
|
||||||
# Copyright (c) 2024–2025, Daily
|
|
||||||
#
|
|
||||||
# SPDX-License-Identifier: BSD 2-Clause License
|
|
||||||
#
|
|
||||||
|
|
||||||
|
|
||||||
import os
|
|
||||||
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
from loguru import logger
|
|
||||||
|
|
||||||
from pipecat.audio.filters.krisp_viva_filter import KrispVivaFilter
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame
|
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
|
||||||
from pipecat.runner.utils import create_transport
|
|
||||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
|
||||||
from pipecat.services.deepgram.tts import DeepgramTTSService
|
|
||||||
from pipecat.services.openai.llm import OpenAILLMService
|
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
|
||||||
|
|
||||||
load_dotenv(override=True)
|
|
||||||
|
|
||||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
|
||||||
# instantiated. The function will be called when the desired transport gets
|
|
||||||
# selected.
|
|
||||||
transport_params = {
|
|
||||||
"daily": lambda: DailyParams(
|
|
||||||
audio_in_enabled=True,
|
|
||||||
audio_out_enabled=True,
|
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
audio_in_filter=KrispVivaFilter(),
|
|
||||||
),
|
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
|
||||||
audio_in_enabled=True,
|
|
||||||
audio_out_enabled=True,
|
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
audio_in_filter=KrispVivaFilter(),
|
|
||||||
),
|
|
||||||
"webrtc": lambda: TransportParams(
|
|
||||||
audio_in_enabled=True,
|
|
||||||
audio_out_enabled=True,
|
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
audio_in_filter=KrispVivaFilter(),
|
|
||||||
),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|
||||||
logger.info(f"Starting bot")
|
|
||||||
|
|
||||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
|
||||||
|
|
||||||
tts = DeepgramTTSService(api_key=os.getenv("DEEPGRAM_API_KEY"), voice="aura-helios-en")
|
|
||||||
|
|
||||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
|
||||||
|
|
||||||
messages = [
|
|
||||||
{
|
|
||||||
"role": "system",
|
|
||||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
|
||||||
},
|
|
||||||
]
|
|
||||||
|
|
||||||
context = LLMContext(messages)
|
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
|
||||||
|
|
||||||
pipeline = Pipeline(
|
|
||||||
[
|
|
||||||
transport.input(), # Transport user input
|
|
||||||
stt, # STT
|
|
||||||
context_aggregator.user(), # User responses
|
|
||||||
llm, # LLM
|
|
||||||
tts, # TTS
|
|
||||||
transport.output(), # Transport bot output
|
|
||||||
context_aggregator.assistant(), # Assistant spoken responses
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
task = PipelineTask(
|
|
||||||
pipeline,
|
|
||||||
params=PipelineParams(
|
|
||||||
enable_metrics=True,
|
|
||||||
enable_usage_metrics=True,
|
|
||||||
),
|
|
||||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
|
||||||
)
|
|
||||||
|
|
||||||
@transport.event_handler("on_client_connected")
|
|
||||||
async def on_client_connected(transport, client):
|
|
||||||
logger.info(f"Client connected")
|
|
||||||
# Kick off the conversation.
|
|
||||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
|
||||||
await task.queue_frames([LLMRunFrame()])
|
|
||||||
|
|
||||||
@transport.event_handler("on_client_disconnected")
|
|
||||||
async def on_client_disconnected(transport, client):
|
|
||||||
logger.info(f"Client disconnected")
|
|
||||||
await task.cancel()
|
|
||||||
|
|
||||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
|
||||||
|
|
||||||
await runner.run(task)
|
|
||||||
|
|
||||||
|
|
||||||
async def bot(runner_args: RunnerArguments):
|
|
||||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
|
||||||
transport = await create_transport(runner_args, transport_params)
|
|
||||||
await run_bot(transport, runner_args)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
from pipecat.runner.run import main
|
|
||||||
|
|
||||||
main()
|
|
||||||
@@ -11,24 +11,20 @@ from dotenv import load_dotenv
|
|||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.filters.krisp_filter import KrispFilter
|
from pipecat.audio.filters.krisp_filter import KrispFilter
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame
|
from pipecat.frames.frames import LLMRunFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||||
from pipecat.services.deepgram.tts import DeepgramTTSService
|
from pipecat.services.deepgram.tts import DeepgramTTSService
|
||||||
from pipecat.services.openai.llm import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -39,22 +35,19 @@ transport_params = {
|
|||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
audio_in_filter=KrispFilter(),
|
audio_in_filter=KrispFilter(),
|
||||||
),
|
),
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
audio_in_filter=KrispFilter(),
|
audio_in_filter=KrispFilter(),
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
audio_in_filter=KrispFilter(),
|
audio_in_filter=KrispFilter(),
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
@@ -76,8 +69,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -11,24 +11,20 @@ import aiohttp
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame
|
from pipecat.frames.frames import LLMRunFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||||
from pipecat.services.openai.llm import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.services.rime.tts import RimeHttpTTSService
|
from pipecat.services.rime.tts import RimeHttpTTSService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -40,20 +36,17 @@ transport_params = {
|
|||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -81,8 +74,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -10,24 +10,20 @@ import os
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame
|
from pipecat.frames.frames import LLMRunFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||||
from pipecat.services.openai.llm import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.services.rime.tts import RimeTTSService
|
from pipecat.services.rime.tts import RimeTTSService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -38,20 +34,17 @@ transport_params = {
|
|||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -75,8 +68,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -10,24 +10,20 @@ import os
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame
|
from pipecat.frames.frames import LLMRunFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.nim.llm import NimLLMService
|
from pipecat.services.nim.llm import NimLLMService
|
||||||
from pipecat.services.riva.stt import RivaSTTService
|
from pipecat.services.riva.stt import RivaSTTService
|
||||||
from pipecat.services.riva.tts import RivaTTSService
|
from pipecat.services.riva.tts import RivaTTSService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -38,20 +34,17 @@ transport_params = {
|
|||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -72,8 +65,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -12,17 +12,14 @@ from dotenv import load_dotenv
|
|||||||
from google.genai.types import Content, Part
|
from google.genai.types import Content, Part
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import (
|
from pipecat.frames.frames import (
|
||||||
Frame,
|
Frame,
|
||||||
InputAudioRawFrame,
|
InputAudioRawFrame,
|
||||||
InterruptionFrame,
|
|
||||||
LLMFullResponseEndFrame,
|
LLMFullResponseEndFrame,
|
||||||
LLMFullResponseStartFrame,
|
LLMFullResponseStartFrame,
|
||||||
LLMRunFrame,
|
LLMRunFrame,
|
||||||
|
StartInterruptionFrame,
|
||||||
TextFrame,
|
TextFrame,
|
||||||
TranscriptionFrame,
|
TranscriptionFrame,
|
||||||
UserStartedSpeakingFrame,
|
UserStartedSpeakingFrame,
|
||||||
@@ -31,8 +28,7 @@ from pipecat.frames.frames import (
|
|||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.processors.frame_processor import FrameProcessor
|
from pipecat.processors.frame_processor import FrameProcessor
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
@@ -40,8 +36,8 @@ from pipecat.services.google.llm import GoogleLLMService
|
|||||||
from pipecat.services.google.tts import GoogleTTSService
|
from pipecat.services.google.tts import GoogleTTSService
|
||||||
from pipecat.transcriptions.language import Language
|
from pipecat.transcriptions.language import Language
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -97,8 +93,9 @@ class UserAudioCollector(FrameProcessor):
|
|||||||
elif isinstance(frame, UserStoppedSpeakingFrame):
|
elif isinstance(frame, UserStoppedSpeakingFrame):
|
||||||
self._user_speaking = False
|
self._user_speaking = False
|
||||||
self._context.add_audio_frames_message(audio_frames=self._audio_frames)
|
self._context.add_audio_frames_message(audio_frames=self._audio_frames)
|
||||||
await self._user_context_aggregator.push_frame(LLMRunFrame())
|
await self._user_context_aggregator.push_frame(
|
||||||
|
self._user_context_aggregator.get_context_frame()
|
||||||
|
)
|
||||||
elif isinstance(frame, InputAudioRawFrame):
|
elif isinstance(frame, InputAudioRawFrame):
|
||||||
if self._user_speaking:
|
if self._user_speaking:
|
||||||
self._audio_frames.append(frame)
|
self._audio_frames.append(frame)
|
||||||
@@ -154,7 +151,7 @@ class TranscriptExtractor(FrameProcessor):
|
|||||||
await self.push_frame(frame, direction)
|
await self.push_frame(frame, direction)
|
||||||
|
|
||||||
|
|
||||||
class TranscriptionContextFixup(FrameProcessor):
|
class TanscriptionContextFixup(FrameProcessor):
|
||||||
def __init__(self, context):
|
def __init__(self, context):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self._context = context
|
self._context = context
|
||||||
@@ -185,7 +182,9 @@ class TranscriptionContextFixup(FrameProcessor):
|
|||||||
|
|
||||||
if isinstance(frame, MagicDemoTranscriptionFrame):
|
if isinstance(frame, MagicDemoTranscriptionFrame):
|
||||||
self._transcript = frame.text
|
self._transcript = frame.text
|
||||||
elif isinstance(frame, LLMFullResponseEndFrame) or isinstance(frame, InterruptionFrame):
|
elif isinstance(frame, LLMFullResponseEndFrame) or isinstance(
|
||||||
|
frame, StartInterruptionFrame
|
||||||
|
):
|
||||||
self.swap_user_audio()
|
self.swap_user_audio()
|
||||||
self.add_transcript_back_to_inference_output()
|
self.add_transcript_back_to_inference_output()
|
||||||
self._transcript = ""
|
self._transcript = ""
|
||||||
@@ -200,20 +199,17 @@ transport_params = {
|
|||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -245,11 +241,11 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
audio_collector = UserAudioCollector(context, context_aggregator.user())
|
audio_collector = UserAudioCollector(context, context_aggregator.user())
|
||||||
pull_transcript_out_of_llm_output = TranscriptExtractor(context)
|
pull_transcript_out_of_llm_output = TranscriptExtractor(context)
|
||||||
fixup_context_messages = TranscriptionContextFixup(context)
|
fixup_context_messages = TanscriptionContextFixup(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -10,24 +10,20 @@ import os
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame
|
from pipecat.frames.frames import LLMRunFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||||
from pipecat.services.fish.tts import FishAudioTTSService
|
from pipecat.services.fish.tts import FishAudioTTSService
|
||||||
from pipecat.services.openai.llm import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -39,20 +35,17 @@ transport_params = {
|
|||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -76,8 +69,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -10,10 +10,7 @@ import os
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
@@ -22,8 +19,8 @@ from pipecat.runner.utils import create_transport
|
|||||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
from pipecat.services.ultravox.stt import UltravoxSTTService
|
from pipecat.services.ultravox.stt import UltravoxSTTService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -47,20 +44,17 @@ transport_params = {
|
|||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -11,24 +11,20 @@ import aiohttp
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame
|
from pipecat.frames.frames import LLMRunFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||||
from pipecat.services.neuphonic.tts import NeuphonicHttpTTSService
|
from pipecat.services.neuphonic.tts import NeuphonicHttpTTSService
|
||||||
from pipecat.services.openai.llm import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -40,20 +36,17 @@ transport_params = {
|
|||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -80,8 +73,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -10,24 +10,20 @@ import os
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame
|
from pipecat.frames.frames import LLMRunFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||||
from pipecat.services.neuphonic.tts import NeuphonicTTSService
|
from pipecat.services.neuphonic.tts import NeuphonicTTSService
|
||||||
from pipecat.services.openai.llm import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -38,20 +34,17 @@ transport_params = {
|
|||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -75,8 +68,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -10,24 +10,20 @@ import os
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame
|
from pipecat.frames.frames import LLMRunFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
from pipecat.services.fal.stt import FalSTTService
|
from pipecat.services.fal.stt import FalSTTService
|
||||||
from pipecat.services.openai.llm import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -39,20 +35,17 @@ transport_params = {
|
|||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -78,8 +71,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -11,16 +11,12 @@ import sys
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame
|
from pipecat.frames.frames import LLMRunFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
@@ -39,8 +35,7 @@ async def main():
|
|||||||
LocalAudioTransportParams(
|
LocalAudioTransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -60,8 +55,8 @@ async def main():
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -11,16 +11,12 @@ import aiohttp
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame
|
from pipecat.frames.frames import LLMRunFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||||
@@ -28,8 +24,8 @@ from pipecat.services.minimax.tts import MiniMaxHttpTTSService
|
|||||||
from pipecat.services.openai.llm import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transcriptions.language import Language
|
from pipecat.transcriptions.language import Language
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -41,20 +37,17 @@ transport_params = {
|
|||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -82,8 +75,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -11,15 +11,11 @@ import aiohttp
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||||
@@ -27,8 +23,8 @@ from pipecat.services.openai.llm import OpenAILLMService
|
|||||||
from pipecat.services.sarvam.tts import SarvamHttpTTSService
|
from pipecat.services.sarvam.tts import SarvamHttpTTSService
|
||||||
from pipecat.transcriptions.language import Language
|
from pipecat.transcriptions.language import Language
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -40,20 +36,17 @@ transport_params = {
|
|||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -80,8 +73,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -12,24 +12,20 @@ import aiohttp
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame
|
from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||||
from pipecat.services.openai.llm import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.services.sarvam.tts import SarvamTTSService
|
from pipecat.services.sarvam.tts import SarvamTTSService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -41,20 +37,17 @@ transport_params = {
|
|||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -78,8 +71,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -6,16 +6,18 @@ from typing import Tuple
|
|||||||
import aiohttp
|
import aiohttp
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
from pipecat.frames.frames import AudioFrame, EndFrame, ImageFrame, LLMContextFrame, TextFrame
|
from pipecat.frames.frames import AudioFrame, EndFrame, ImageFrame, TextFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.processors.aggregators import SentenceAggregator
|
from pipecat.processors.aggregators import SentenceAggregator
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import (
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
OpenAILLMContext,
|
||||||
|
OpenAILLMContextFrame,
|
||||||
|
)
|
||||||
from pipecat.runner.daily import configure
|
from pipecat.runner.daily import configure
|
||||||
from pipecat.services.azure import AzureLLMService, AzureTTSService
|
from pipecat.services.azure import AzureLLMService, AzureTTSService
|
||||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||||
from pipecat.services.fal import FalImageGenService
|
from pipecat.services.fal import FalImageGenService
|
||||||
from pipecat.transports.daily.transport import DailyTransport
|
from pipecat.transports.services.daily import DailyTransport
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -81,7 +83,7 @@ async def main():
|
|||||||
sentence_aggregator = SentenceAggregator()
|
sentence_aggregator = SentenceAggregator()
|
||||||
pipeline = Pipeline([llm, sentence_aggregator, tts1], source_queue, sink_queue)
|
pipeline = Pipeline([llm, sentence_aggregator, tts1], source_queue, sink_queue)
|
||||||
|
|
||||||
await source_queue.put(LLMContextFrame(LLMContext(messages)))
|
await source_queue.put(OpenAILLMContextFrame(OpenAILLMContext(messages)))
|
||||||
await source_queue.put(EndFrame())
|
await source_queue.put(EndFrame())
|
||||||
await pipeline.run_pipeline()
|
await pipeline.run_pipeline()
|
||||||
|
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
|||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
|
|||||||
@@ -24,8 +24,8 @@ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
|||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport, maybe_capture_participant_camera
|
from pipecat.runner.utils import create_transport, maybe_capture_participant_camera
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
|
||||||
from pipecat.transports.local.tk import TkLocalTransport, TkTransportParams
|
from pipecat.transports.local.tk import TkLocalTransport, TkTransportParams
|
||||||
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
|
|||||||
@@ -9,16 +9,12 @@ import os
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import TTSSpeakFrame
|
from pipecat.frames.frames import TTSSpeakFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.processors.filters.wake_check_filter import WakeCheckFilter
|
from pipecat.processors.filters.wake_check_filter import WakeCheckFilter
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
@@ -26,8 +22,8 @@ from pipecat.services.cartesia.tts import CartesiaTTSService
|
|||||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||||
from pipecat.services.openai.llm import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -38,20 +34,17 @@ transport_params = {
|
|||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -77,8 +70,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
|
|
||||||
hey_robot_filter = WakeCheckFilter(["hey robot", "hey, robot"])
|
hey_robot_filter = WakeCheckFilter(["hey robot", "hey, robot"])
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -10,13 +10,9 @@ import wave
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import (
|
from pipecat.frames.frames import (
|
||||||
Frame,
|
Frame,
|
||||||
LLMContextFrame,
|
|
||||||
LLMFullResponseEndFrame,
|
LLMFullResponseEndFrame,
|
||||||
OutputAudioRawFrame,
|
OutputAudioRawFrame,
|
||||||
TTSSpeakFrame,
|
TTSSpeakFrame,
|
||||||
@@ -24,8 +20,10 @@ from pipecat.frames.frames import (
|
|||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineTask
|
from pipecat.pipeline.task import PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import (
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
OpenAILLMContext,
|
||||||
|
OpenAILLMContextFrame,
|
||||||
|
)
|
||||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||||
from pipecat.processors.logger import FrameLogger
|
from pipecat.processors.logger import FrameLogger
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
@@ -34,8 +32,8 @@ from pipecat.services.cartesia.tts import CartesiaTTSService
|
|||||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||||
from pipecat.services.openai.llm import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -73,7 +71,7 @@ class InboundSoundEffectWrapper(FrameProcessor):
|
|||||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||||
await super().process_frame(frame, direction)
|
await super().process_frame(frame, direction)
|
||||||
|
|
||||||
if isinstance(frame, LLMContextFrame):
|
if isinstance(frame, OpenAILLMContextFrame):
|
||||||
await self.push_frame(sounds["ding2.wav"])
|
await self.push_frame(sounds["ding2.wav"])
|
||||||
# In case anything else downstream needs it
|
# In case anything else downstream needs it
|
||||||
await self.push_frame(frame, direction)
|
await self.push_frame(frame, direction)
|
||||||
@@ -88,20 +86,17 @@ transport_params = {
|
|||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -125,8 +120,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages)
|
context = OpenAILLMContext(messages)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
out_sound = OutboundSoundEffectWrapper()
|
out_sound = OutboundSoundEffectWrapper()
|
||||||
in_sound = InboundSoundEffectWrapper()
|
in_sound = InboundSoundEffectWrapper()
|
||||||
fl = FrameLogger("LLM Out")
|
fl = FrameLogger("LLM Out")
|
||||||
|
|||||||
@@ -10,23 +10,13 @@ from typing import Optional
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
from pipecat.frames.frames import Frame, TextFrame, TTSSpeakFrame, UserImageRequestFrame
|
||||||
from pipecat.frames.frames import (
|
|
||||||
Frame,
|
|
||||||
LLMContextFrame,
|
|
||||||
TextFrame,
|
|
||||||
TTSSpeakFrame,
|
|
||||||
UserImageRawFrame,
|
|
||||||
UserImageRequestFrame,
|
|
||||||
)
|
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineTask
|
from pipecat.pipeline.task import PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
|
||||||
from pipecat.processors.aggregators.user_response import UserResponseAggregator
|
from pipecat.processors.aggregators.user_response import UserResponseAggregator
|
||||||
|
from pipecat.processors.aggregators.vision_image_frame import VisionImageFrameAggregator
|
||||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import (
|
from pipecat.runner.utils import (
|
||||||
@@ -38,14 +28,12 @@ from pipecat.services.cartesia.tts import CartesiaTTSService
|
|||||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||||
from pipecat.services.moondream.vision import MoondreamService
|
from pipecat.services.moondream.vision import MoondreamService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
|
|
||||||
class UserImageRequester(FrameProcessor):
|
class UserImageRequester(FrameProcessor):
|
||||||
"""Converts incoming text into requests for user images."""
|
|
||||||
|
|
||||||
def __init__(self, participant_id: Optional[str] = None):
|
def __init__(self, participant_id: Optional[str] = None):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self._participant_id = participant_id
|
self._participant_id = participant_id
|
||||||
@@ -58,32 +46,9 @@ class UserImageRequester(FrameProcessor):
|
|||||||
|
|
||||||
if self._participant_id and isinstance(frame, TextFrame):
|
if self._participant_id and isinstance(frame, TextFrame):
|
||||||
await self.push_frame(
|
await self.push_frame(
|
||||||
UserImageRequestFrame(self._participant_id, context=frame.text),
|
UserImageRequestFrame(self._participant_id), FrameDirection.UPSTREAM
|
||||||
FrameDirection.UPSTREAM,
|
|
||||||
)
|
)
|
||||||
else:
|
await self.push_frame(frame, direction)
|
||||||
await self.push_frame(frame, direction)
|
|
||||||
|
|
||||||
|
|
||||||
class UserImageProcessor(FrameProcessor):
|
|
||||||
"""Converts incoming user images into context frames."""
|
|
||||||
|
|
||||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
|
||||||
await super().process_frame(frame, direction)
|
|
||||||
|
|
||||||
if isinstance(frame, UserImageRawFrame):
|
|
||||||
if frame.request and frame.request.context:
|
|
||||||
context = LLMContext()
|
|
||||||
context.add_image_frame_message(
|
|
||||||
image=frame.image,
|
|
||||||
text=frame.request.context,
|
|
||||||
size=frame.size,
|
|
||||||
format=frame.format,
|
|
||||||
)
|
|
||||||
frame = LLMContextFrame(context)
|
|
||||||
await self.push_frame(frame)
|
|
||||||
else:
|
|
||||||
await self.push_frame(frame, direction)
|
|
||||||
|
|
||||||
|
|
||||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||||
@@ -94,15 +59,13 @@ transport_params = {
|
|||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
video_in_enabled=True,
|
video_in_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
video_in_enabled=True,
|
video_in_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -115,7 +78,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
# Initialize the image requester without setting the participant ID yet
|
# Initialize the image requester without setting the participant ID yet
|
||||||
image_requester = UserImageRequester()
|
image_requester = UserImageRequester()
|
||||||
|
|
||||||
image_processor = UserImageProcessor()
|
vision_aggregator = VisionImageFrameAggregator()
|
||||||
|
|
||||||
# If you run into weird description, try with use_cpu=True
|
# If you run into weird description, try with use_cpu=True
|
||||||
moondream = MoondreamService()
|
moondream = MoondreamService()
|
||||||
@@ -133,7 +96,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
stt,
|
stt,
|
||||||
user_response,
|
user_response,
|
||||||
image_requester,
|
image_requester,
|
||||||
image_processor,
|
vision_aggregator,
|
||||||
moondream,
|
moondream,
|
||||||
tts,
|
tts,
|
||||||
transport.output(),
|
transport.output(),
|
||||||
@@ -156,7 +119,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
image_requester.set_participant_id(client_id)
|
image_requester.set_participant_id(client_id)
|
||||||
|
|
||||||
# Welcome message
|
# Welcome message
|
||||||
await task.queue_frame(TTSSpeakFrame("Hi there! Feel free to ask me about what I see."))
|
await task.queue_frame(TTSSpeakFrame("Hi there! Feel free to ask me what I see."))
|
||||||
|
|
||||||
@transport.event_handler("on_client_disconnected")
|
@transport.event_handler("on_client_disconnected")
|
||||||
async def on_client_disconnected(transport, client):
|
async def on_client_disconnected(transport, client):
|
||||||
|
|||||||
@@ -10,23 +10,13 @@ from typing import Optional
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
from pipecat.frames.frames import Frame, TextFrame, TTSSpeakFrame, UserImageRequestFrame
|
||||||
from pipecat.frames.frames import (
|
|
||||||
Frame,
|
|
||||||
LLMContextFrame,
|
|
||||||
TextFrame,
|
|
||||||
TTSSpeakFrame,
|
|
||||||
UserImageRawFrame,
|
|
||||||
UserImageRequestFrame,
|
|
||||||
)
|
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
|
||||||
from pipecat.processors.aggregators.user_response import UserResponseAggregator
|
from pipecat.processors.aggregators.user_response import UserResponseAggregator
|
||||||
|
from pipecat.processors.aggregators.vision_image_frame import VisionImageFrameAggregator
|
||||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import (
|
from pipecat.runner.utils import (
|
||||||
@@ -38,14 +28,12 @@ from pipecat.services.cartesia.tts import CartesiaTTSService
|
|||||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||||
from pipecat.services.google.llm import GoogleLLMService
|
from pipecat.services.google.llm import GoogleLLMService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
|
|
||||||
class UserImageRequester(FrameProcessor):
|
class UserImageRequester(FrameProcessor):
|
||||||
"""Converts incoming text into requests for user images."""
|
|
||||||
|
|
||||||
def __init__(self, participant_id: Optional[str] = None):
|
def __init__(self, participant_id: Optional[str] = None):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self._participant_id = participant_id
|
self._participant_id = participant_id
|
||||||
@@ -58,32 +46,9 @@ class UserImageRequester(FrameProcessor):
|
|||||||
|
|
||||||
if self._participant_id and isinstance(frame, TextFrame):
|
if self._participant_id and isinstance(frame, TextFrame):
|
||||||
await self.push_frame(
|
await self.push_frame(
|
||||||
UserImageRequestFrame(self._participant_id, context=frame.text),
|
UserImageRequestFrame(self._participant_id), FrameDirection.UPSTREAM
|
||||||
FrameDirection.UPSTREAM,
|
|
||||||
)
|
)
|
||||||
else:
|
await self.push_frame(frame, direction)
|
||||||
await self.push_frame(frame, direction)
|
|
||||||
|
|
||||||
|
|
||||||
class UserImageProcessor(FrameProcessor):
|
|
||||||
"""Converts incoming user images into context frames."""
|
|
||||||
|
|
||||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
|
||||||
await super().process_frame(frame, direction)
|
|
||||||
|
|
||||||
if isinstance(frame, UserImageRawFrame):
|
|
||||||
if frame.request and frame.request.context:
|
|
||||||
context = LLMContext()
|
|
||||||
context.add_image_frame_message(
|
|
||||||
image=frame.image,
|
|
||||||
text=frame.request.context,
|
|
||||||
size=frame.size,
|
|
||||||
format=frame.format,
|
|
||||||
)
|
|
||||||
frame = LLMContextFrame(context)
|
|
||||||
await self.push_frame(frame)
|
|
||||||
else:
|
|
||||||
await self.push_frame(frame, direction)
|
|
||||||
|
|
||||||
|
|
||||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||||
@@ -94,15 +59,13 @@ transport_params = {
|
|||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
video_in_enabled=True,
|
video_in_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
video_in_enabled=True,
|
video_in_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -115,7 +78,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
# Initialize the image requester without setting the participant ID yet
|
# Initialize the image requester without setting the participant ID yet
|
||||||
image_requester = UserImageRequester()
|
image_requester = UserImageRequester()
|
||||||
|
|
||||||
image_processor = UserImageProcessor()
|
vision_aggregator = VisionImageFrameAggregator()
|
||||||
|
|
||||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||||
|
|
||||||
@@ -133,7 +96,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
stt,
|
stt,
|
||||||
user_response,
|
user_response,
|
||||||
image_requester,
|
image_requester,
|
||||||
image_processor,
|
vision_aggregator,
|
||||||
google,
|
google,
|
||||||
tts,
|
tts,
|
||||||
transport.output(),
|
transport.output(),
|
||||||
@@ -160,7 +123,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
image_requester.set_participant_id(client_id)
|
image_requester.set_participant_id(client_id)
|
||||||
|
|
||||||
# Welcome message
|
# Welcome message
|
||||||
await task.queue_frame(TTSSpeakFrame("Hi there! Feel free to ask me about what I see."))
|
await task.queue_frame(TTSSpeakFrame("Hi there! Feel free to ask me what I see."))
|
||||||
|
|
||||||
@transport.event_handler("on_client_disconnected")
|
@transport.event_handler("on_client_disconnected")
|
||||||
async def on_client_disconnected(transport, client):
|
async def on_client_disconnected(transport, client):
|
||||||
|
|||||||
@@ -10,23 +10,13 @@ from typing import Optional
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
from pipecat.frames.frames import Frame, TextFrame, TTSSpeakFrame, UserImageRequestFrame
|
||||||
from pipecat.frames.frames import (
|
|
||||||
Frame,
|
|
||||||
LLMContextFrame,
|
|
||||||
TextFrame,
|
|
||||||
TTSSpeakFrame,
|
|
||||||
UserImageRawFrame,
|
|
||||||
UserImageRequestFrame,
|
|
||||||
)
|
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
|
||||||
from pipecat.processors.aggregators.user_response import UserResponseAggregator
|
from pipecat.processors.aggregators.user_response import UserResponseAggregator
|
||||||
|
from pipecat.processors.aggregators.vision_image_frame import VisionImageFrameAggregator
|
||||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import (
|
from pipecat.runner.utils import (
|
||||||
@@ -38,14 +28,12 @@ from pipecat.services.cartesia.tts import CartesiaTTSService
|
|||||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||||
from pipecat.services.openai.llm import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
|
|
||||||
class UserImageRequester(FrameProcessor):
|
class UserImageRequester(FrameProcessor):
|
||||||
"""Converts incoming text into requests for user images."""
|
|
||||||
|
|
||||||
def __init__(self, participant_id: Optional[str] = None):
|
def __init__(self, participant_id: Optional[str] = None):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self._participant_id = participant_id
|
self._participant_id = participant_id
|
||||||
@@ -58,32 +46,9 @@ class UserImageRequester(FrameProcessor):
|
|||||||
|
|
||||||
if self._participant_id and isinstance(frame, TextFrame):
|
if self._participant_id and isinstance(frame, TextFrame):
|
||||||
await self.push_frame(
|
await self.push_frame(
|
||||||
UserImageRequestFrame(self._participant_id, context=frame.text),
|
UserImageRequestFrame(self._participant_id), FrameDirection.UPSTREAM
|
||||||
FrameDirection.UPSTREAM,
|
|
||||||
)
|
)
|
||||||
else:
|
await self.push_frame(frame, direction)
|
||||||
await self.push_frame(frame, direction)
|
|
||||||
|
|
||||||
|
|
||||||
class UserImageProcessor(FrameProcessor):
|
|
||||||
"""Converts incoming user images into context frames."""
|
|
||||||
|
|
||||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
|
||||||
await super().process_frame(frame, direction)
|
|
||||||
|
|
||||||
if isinstance(frame, UserImageRawFrame):
|
|
||||||
if frame.request and frame.request.context:
|
|
||||||
context = LLMContext()
|
|
||||||
context.add_image_frame_message(
|
|
||||||
image=frame.image,
|
|
||||||
text=frame.request.context,
|
|
||||||
size=frame.size,
|
|
||||||
format=frame.format,
|
|
||||||
)
|
|
||||||
frame = LLMContextFrame(context)
|
|
||||||
await self.push_frame(frame)
|
|
||||||
else:
|
|
||||||
await self.push_frame(frame, direction)
|
|
||||||
|
|
||||||
|
|
||||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||||
@@ -94,15 +59,13 @@ transport_params = {
|
|||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
video_in_enabled=True,
|
video_in_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
video_in_enabled=True,
|
video_in_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -115,7 +78,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
# Initialize the image requester without setting the participant ID yet
|
# Initialize the image requester without setting the participant ID yet
|
||||||
image_requester = UserImageRequester()
|
image_requester = UserImageRequester()
|
||||||
|
|
||||||
image_processor = UserImageProcessor()
|
vision_aggregator = VisionImageFrameAggregator()
|
||||||
|
|
||||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||||
|
|
||||||
@@ -133,7 +96,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
stt,
|
stt,
|
||||||
user_response,
|
user_response,
|
||||||
image_requester,
|
image_requester,
|
||||||
image_processor,
|
vision_aggregator,
|
||||||
openai,
|
openai,
|
||||||
tts,
|
tts,
|
||||||
transport.output(),
|
transport.output(),
|
||||||
@@ -160,7 +123,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
image_requester.set_participant_id(client_id)
|
image_requester.set_participant_id(client_id)
|
||||||
|
|
||||||
# Welcome message
|
# Welcome message
|
||||||
await task.queue_frame(TTSSpeakFrame("Hi there! Feel free to ask me about what I see."))
|
await task.queue_frame(TTSSpeakFrame("Hi there! Feel free to ask me what I see."))
|
||||||
|
|
||||||
@transport.event_handler("on_client_disconnected")
|
@transport.event_handler("on_client_disconnected")
|
||||||
async def on_client_disconnected(transport, client):
|
async def on_client_disconnected(transport, client):
|
||||||
|
|||||||
@@ -10,23 +10,13 @@ from typing import Optional
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
from pipecat.frames.frames import Frame, TextFrame, TTSSpeakFrame, UserImageRequestFrame
|
||||||
from pipecat.frames.frames import (
|
|
||||||
Frame,
|
|
||||||
LLMContextFrame,
|
|
||||||
TextFrame,
|
|
||||||
TTSSpeakFrame,
|
|
||||||
UserImageRawFrame,
|
|
||||||
UserImageRequestFrame,
|
|
||||||
)
|
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
|
||||||
from pipecat.processors.aggregators.user_response import UserResponseAggregator
|
from pipecat.processors.aggregators.user_response import UserResponseAggregator
|
||||||
|
from pipecat.processors.aggregators.vision_image_frame import VisionImageFrameAggregator
|
||||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import (
|
from pipecat.runner.utils import (
|
||||||
@@ -38,14 +28,12 @@ from pipecat.services.anthropic.llm import AnthropicLLMService
|
|||||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
|
|
||||||
class UserImageRequester(FrameProcessor):
|
class UserImageRequester(FrameProcessor):
|
||||||
"""Converts incoming text into requests for user images."""
|
|
||||||
|
|
||||||
def __init__(self, participant_id: Optional[str] = None):
|
def __init__(self, participant_id: Optional[str] = None):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self._participant_id = participant_id
|
self._participant_id = participant_id
|
||||||
@@ -58,32 +46,9 @@ class UserImageRequester(FrameProcessor):
|
|||||||
|
|
||||||
if self._participant_id and isinstance(frame, TextFrame):
|
if self._participant_id and isinstance(frame, TextFrame):
|
||||||
await self.push_frame(
|
await self.push_frame(
|
||||||
UserImageRequestFrame(self._participant_id, context=frame.text),
|
UserImageRequestFrame(self._participant_id), FrameDirection.UPSTREAM
|
||||||
FrameDirection.UPSTREAM,
|
|
||||||
)
|
)
|
||||||
else:
|
await self.push_frame(frame, direction)
|
||||||
await self.push_frame(frame, direction)
|
|
||||||
|
|
||||||
|
|
||||||
class UserImageProcessor(FrameProcessor):
|
|
||||||
"""Converts incoming user images into context frames."""
|
|
||||||
|
|
||||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
|
||||||
await super().process_frame(frame, direction)
|
|
||||||
|
|
||||||
if isinstance(frame, UserImageRawFrame):
|
|
||||||
if frame.request and frame.request.context:
|
|
||||||
context = LLMContext()
|
|
||||||
context.add_image_frame_message(
|
|
||||||
image=frame.image,
|
|
||||||
text=frame.request.context,
|
|
||||||
size=frame.size,
|
|
||||||
format=frame.format,
|
|
||||||
)
|
|
||||||
frame = LLMContextFrame(context)
|
|
||||||
await self.push_frame(frame)
|
|
||||||
else:
|
|
||||||
await self.push_frame(frame, direction)
|
|
||||||
|
|
||||||
|
|
||||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||||
@@ -94,15 +59,13 @@ transport_params = {
|
|||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
video_in_enabled=True,
|
video_in_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
video_in_enabled=True,
|
video_in_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -115,7 +78,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
# Initialize the image requester without setting the participant ID yet
|
# Initialize the image requester without setting the participant ID yet
|
||||||
image_requester = UserImageRequester()
|
image_requester = UserImageRequester()
|
||||||
|
|
||||||
image_processor = UserImageProcessor()
|
vision_aggregator = VisionImageFrameAggregator()
|
||||||
|
|
||||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||||
|
|
||||||
@@ -133,7 +96,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
stt,
|
stt,
|
||||||
user_response,
|
user_response,
|
||||||
image_requester,
|
image_requester,
|
||||||
image_processor,
|
vision_aggregator,
|
||||||
anthropic,
|
anthropic,
|
||||||
tts,
|
tts,
|
||||||
transport.output(),
|
transport.output(),
|
||||||
@@ -160,7 +123,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
image_requester.set_participant_id(client_id)
|
image_requester.set_participant_id(client_id)
|
||||||
|
|
||||||
# Welcome message
|
# Welcome message
|
||||||
await task.queue_frame(TTSSpeakFrame("Hi there! Feel free to ask me about what I see."))
|
await task.queue_frame(TTSSpeakFrame("Hi there! Feel free to ask me what I see."))
|
||||||
|
|
||||||
@transport.event_handler("on_client_disconnected")
|
@transport.event_handler("on_client_disconnected")
|
||||||
async def on_client_disconnected(transport, client):
|
async def on_client_disconnected(transport, client):
|
||||||
|
|||||||
@@ -1,192 +0,0 @@
|
|||||||
#
|
|
||||||
# Copyright (c) 2024–2025, Daily
|
|
||||||
#
|
|
||||||
# SPDX-License-Identifier: BSD 2-Clause License
|
|
||||||
#
|
|
||||||
|
|
||||||
import os
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
from loguru import logger
|
|
||||||
|
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import (
|
|
||||||
Frame,
|
|
||||||
LLMContextFrame,
|
|
||||||
TextFrame,
|
|
||||||
TTSSpeakFrame,
|
|
||||||
UserImageRawFrame,
|
|
||||||
UserImageRequestFrame,
|
|
||||||
)
|
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
|
||||||
from pipecat.processors.aggregators.user_response import UserResponseAggregator
|
|
||||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
|
||||||
from pipecat.runner.utils import (
|
|
||||||
create_transport,
|
|
||||||
get_transport_client_id,
|
|
||||||
maybe_capture_participant_camera,
|
|
||||||
)
|
|
||||||
from pipecat.services.aws.llm import AWSBedrockLLMService
|
|
||||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
|
||||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
|
||||||
|
|
||||||
load_dotenv(override=True)
|
|
||||||
|
|
||||||
|
|
||||||
class UserImageRequester(FrameProcessor):
|
|
||||||
"""Converts incoming text into requests for user images."""
|
|
||||||
|
|
||||||
def __init__(self, participant_id: Optional[str] = None):
|
|
||||||
super().__init__()
|
|
||||||
self._participant_id = participant_id
|
|
||||||
|
|
||||||
def set_participant_id(self, participant_id: str):
|
|
||||||
self._participant_id = participant_id
|
|
||||||
|
|
||||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
|
||||||
await super().process_frame(frame, direction)
|
|
||||||
|
|
||||||
if self._participant_id and isinstance(frame, TextFrame):
|
|
||||||
await self.push_frame(
|
|
||||||
UserImageRequestFrame(self._participant_id, context=frame.text),
|
|
||||||
FrameDirection.UPSTREAM,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
await self.push_frame(frame, direction)
|
|
||||||
|
|
||||||
|
|
||||||
class UserImageProcessor(FrameProcessor):
|
|
||||||
"""Converts incoming user images into context frames."""
|
|
||||||
|
|
||||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
|
||||||
await super().process_frame(frame, direction)
|
|
||||||
|
|
||||||
if isinstance(frame, UserImageRawFrame):
|
|
||||||
if frame.request and frame.request.context:
|
|
||||||
# Note: AWS Bedrock does not yet support the universal LLMContext
|
|
||||||
context = LLMContext()
|
|
||||||
context.add_image_frame_message(
|
|
||||||
image=frame.image,
|
|
||||||
text=frame.request.context,
|
|
||||||
size=frame.size,
|
|
||||||
format=frame.format,
|
|
||||||
)
|
|
||||||
frame = LLMContextFrame(context)
|
|
||||||
await self.push_frame(frame)
|
|
||||||
else:
|
|
||||||
await self.push_frame(frame, direction)
|
|
||||||
|
|
||||||
|
|
||||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
|
||||||
# instantiated. The function will be called when the desired transport gets
|
|
||||||
# selected.
|
|
||||||
transport_params = {
|
|
||||||
"daily": lambda: DailyParams(
|
|
||||||
audio_in_enabled=True,
|
|
||||||
audio_out_enabled=True,
|
|
||||||
video_in_enabled=True,
|
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
|
||||||
"webrtc": lambda: TransportParams(
|
|
||||||
audio_in_enabled=True,
|
|
||||||
audio_out_enabled=True,
|
|
||||||
video_in_enabled=True,
|
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|
||||||
logger.info(f"Starting bot")
|
|
||||||
|
|
||||||
user_response = UserResponseAggregator()
|
|
||||||
|
|
||||||
# Initialize the image requester without setting the participant ID yet
|
|
||||||
image_requester = UserImageRequester()
|
|
||||||
|
|
||||||
image_processor = UserImageProcessor()
|
|
||||||
|
|
||||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
|
||||||
|
|
||||||
# AWS for vision analysis
|
|
||||||
aws = AWSBedrockLLMService(
|
|
||||||
aws_region="us-west-2",
|
|
||||||
model="us.anthropic.claude-3-7-sonnet-20250219-v1:0",
|
|
||||||
# Note: usually, prefer providing latency="optimized" param.
|
|
||||||
# Here we can't because AWS Bedrock doesn't support it for Claude 3.7,
|
|
||||||
# which we need for image input.
|
|
||||||
params=AWSBedrockLLMService.InputParams(temperature=0.8),
|
|
||||||
)
|
|
||||||
|
|
||||||
tts = CartesiaTTSService(
|
|
||||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
|
||||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
|
||||||
)
|
|
||||||
|
|
||||||
pipeline = Pipeline(
|
|
||||||
[
|
|
||||||
transport.input(),
|
|
||||||
stt,
|
|
||||||
user_response,
|
|
||||||
image_requester,
|
|
||||||
image_processor,
|
|
||||||
aws,
|
|
||||||
tts,
|
|
||||||
transport.output(),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
task = PipelineTask(
|
|
||||||
pipeline,
|
|
||||||
params=PipelineParams(
|
|
||||||
enable_metrics=True,
|
|
||||||
enable_usage_metrics=True,
|
|
||||||
),
|
|
||||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
|
||||||
)
|
|
||||||
|
|
||||||
@transport.event_handler("on_client_connected")
|
|
||||||
async def on_client_connected(transport, client):
|
|
||||||
logger.info(f"Client connected: {client}")
|
|
||||||
|
|
||||||
await maybe_capture_participant_camera(transport, client)
|
|
||||||
|
|
||||||
# Set the participant ID in the image requester
|
|
||||||
client_id = get_transport_client_id(transport, client)
|
|
||||||
image_requester.set_participant_id(client_id)
|
|
||||||
|
|
||||||
# Welcome message
|
|
||||||
await task.queue_frame(TTSSpeakFrame("Hi there! Feel free to ask me about what I see."))
|
|
||||||
|
|
||||||
@transport.event_handler("on_client_disconnected")
|
|
||||||
async def on_client_disconnected(transport, client):
|
|
||||||
logger.info(f"Client disconnected")
|
|
||||||
await task.cancel()
|
|
||||||
|
|
||||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
|
||||||
|
|
||||||
await runner.run(task)
|
|
||||||
|
|
||||||
|
|
||||||
async def bot(runner_args: RunnerArguments):
|
|
||||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
|
||||||
transport = await create_transport(runner_args, transport_params)
|
|
||||||
await run_bot(transport, runner_args)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
from pipecat.runner.run import main
|
|
||||||
|
|
||||||
main()
|
|
||||||
@@ -18,8 +18,8 @@ from pipecat.runner.types import RunnerArguments
|
|||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.whisper.stt import WhisperSTTService
|
from pipecat.services.whisper.stt import WhisperSTTService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -31,9 +31,6 @@ class TranscriptionLogger(FrameProcessor):
|
|||||||
if isinstance(frame, TranscriptionFrame):
|
if isinstance(frame, TranscriptionFrame):
|
||||||
print(f"Transcription: {frame.text}")
|
print(f"Transcription: {frame.text}")
|
||||||
|
|
||||||
# Push all frames through
|
|
||||||
await self.push_frame(frame, direction)
|
|
||||||
|
|
||||||
|
|
||||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||||
# instantiated. The function will be called when the desired transport gets
|
# instantiated. The function will be called when the desired transport gets
|
||||||
|
|||||||
@@ -32,9 +32,6 @@ class TranscriptionLogger(FrameProcessor):
|
|||||||
if isinstance(frame, TranscriptionFrame):
|
if isinstance(frame, TranscriptionFrame):
|
||||||
print(f"Transcription: {frame.text}")
|
print(f"Transcription: {frame.text}")
|
||||||
|
|
||||||
# Push all frames through
|
|
||||||
await self.push_frame(frame, direction)
|
|
||||||
|
|
||||||
|
|
||||||
async def main():
|
async def main():
|
||||||
transport = LocalAudioTransport(
|
transport = LocalAudioTransport(
|
||||||
|
|||||||
@@ -18,8 +18,8 @@ from pipecat.runner.types import RunnerArguments
|
|||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.deepgram.stt import DeepgramSTTService, Language, LiveOptions
|
from pipecat.services.deepgram.stt import DeepgramSTTService, Language, LiveOptions
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -31,9 +31,6 @@ class TranscriptionLogger(FrameProcessor):
|
|||||||
if isinstance(frame, TranscriptionFrame):
|
if isinstance(frame, TranscriptionFrame):
|
||||||
print(f"Transcription: {frame.text}")
|
print(f"Transcription: {frame.text}")
|
||||||
|
|
||||||
# Push all frames through
|
|
||||||
await self.push_frame(frame, direction)
|
|
||||||
|
|
||||||
|
|
||||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||||
# instantiated. The function will be called when the desired transport gets
|
# instantiated. The function will be called when the desired transport gets
|
||||||
|
|||||||
@@ -18,8 +18,8 @@ from pipecat.runner.types import RunnerArguments
|
|||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.gladia import GladiaSTTService
|
from pipecat.services.gladia import GladiaSTTService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -31,9 +31,6 @@ class TranscriptionLogger(FrameProcessor):
|
|||||||
if isinstance(frame, TranscriptionFrame):
|
if isinstance(frame, TranscriptionFrame):
|
||||||
print(f"Transcription: {frame.text}")
|
print(f"Transcription: {frame.text}")
|
||||||
|
|
||||||
# Push all frames through
|
|
||||||
await self.push_frame(frame, direction)
|
|
||||||
|
|
||||||
|
|
||||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||||
# instantiated. The function will be called when the desired transport gets
|
# instantiated. The function will be called when the desired transport gets
|
||||||
|
|||||||
@@ -25,8 +25,8 @@ from pipecat.services.gladia.config import (
|
|||||||
from pipecat.services.gladia.stt import GladiaSTTService
|
from pipecat.services.gladia.stt import GladiaSTTService
|
||||||
from pipecat.transcriptions.language import Language
|
from pipecat.transcriptions.language import Language
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -40,9 +40,6 @@ class TranscriptionLogger(FrameProcessor):
|
|||||||
elif isinstance(frame, TranslationFrame):
|
elif isinstance(frame, TranslationFrame):
|
||||||
print(f"Translation ({frame.language}): {frame.text}")
|
print(f"Translation ({frame.language}): {frame.text}")
|
||||||
|
|
||||||
# Push all frames through
|
|
||||||
await self.push_frame(frame, direction)
|
|
||||||
|
|
||||||
|
|
||||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||||
# instantiated. The function will be called when the desired transport gets
|
# instantiated. The function will be called when the desired transport gets
|
||||||
|
|||||||
@@ -18,8 +18,8 @@ from pipecat.runner.types import RunnerArguments
|
|||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.assemblyai.stt import AssemblyAISTTService
|
from pipecat.services.assemblyai.stt import AssemblyAISTTService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -31,9 +31,6 @@ class TranscriptionLogger(FrameProcessor):
|
|||||||
if isinstance(frame, TranscriptionFrame):
|
if isinstance(frame, TranscriptionFrame):
|
||||||
print(f"Transcription: {frame.text}")
|
print(f"Transcription: {frame.text}")
|
||||||
|
|
||||||
# Push all frames through
|
|
||||||
await self.push_frame(frame, direction)
|
|
||||||
|
|
||||||
|
|
||||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||||
# instantiated. The function will be called when the desired transport gets
|
# instantiated. The function will be called when the desired transport gets
|
||||||
|
|||||||
@@ -20,8 +20,8 @@ from pipecat.runner.types import RunnerArguments
|
|||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.whisper.stt import MLXModel, WhisperSTTServiceMLX
|
from pipecat.services.whisper.stt import MLXModel, WhisperSTTServiceMLX
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -52,9 +52,6 @@ class TranscriptionLogger(FrameProcessor):
|
|||||||
if isinstance(frame, TranscriptionFrame):
|
if isinstance(frame, TranscriptionFrame):
|
||||||
self._last_transcription_time = time.time()
|
self._last_transcription_time = time.time()
|
||||||
|
|
||||||
# Push all frames through
|
|
||||||
await self.push_frame(frame, direction)
|
|
||||||
|
|
||||||
|
|
||||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||||
# instantiated. The function will be called when the desired transport gets
|
# instantiated. The function will be called when the desired transport gets
|
||||||
|
|||||||
@@ -18,8 +18,8 @@ from pipecat.runner.types import RunnerArguments
|
|||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.cartesia.stt import CartesiaSTTService
|
from pipecat.services.cartesia.stt import CartesiaSTTService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -31,9 +31,6 @@ class TranscriptionLogger(FrameProcessor):
|
|||||||
if isinstance(frame, TranscriptionFrame):
|
if isinstance(frame, TranscriptionFrame):
|
||||||
print(f"Transcription: {frame.text}")
|
print(f"Transcription: {frame.text}")
|
||||||
|
|
||||||
# Push all frames through
|
|
||||||
await self.push_frame(frame, direction)
|
|
||||||
|
|
||||||
|
|
||||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||||
# instantiated. The function will be called when the desired transport gets
|
# instantiated. The function will be called when the desired transport gets
|
||||||
|
|||||||
@@ -21,8 +21,8 @@ from pipecat.runner.types import RunnerArguments
|
|||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.sambanova.stt import SambaNovaSTTService
|
from pipecat.services.sambanova.stt import SambaNovaSTTService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -53,9 +53,6 @@ class TranscriptionLogger(FrameProcessor):
|
|||||||
if isinstance(frame, TranscriptionFrame):
|
if isinstance(frame, TranscriptionFrame):
|
||||||
self._last_transcription_time = time.time()
|
self._last_transcription_time = time.time()
|
||||||
|
|
||||||
# Push all frames through
|
|
||||||
await self.push_frame(frame, direction)
|
|
||||||
|
|
||||||
|
|
||||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||||
# instantiated. The function will be called when the desired transport gets
|
# instantiated. The function will be called when the desired transport gets
|
||||||
|
|||||||
@@ -19,8 +19,8 @@ from pipecat.runner.utils import create_transport
|
|||||||
from pipecat.services.speechmatics.stt import SpeechmaticsSTTService
|
from pipecat.services.speechmatics.stt import SpeechmaticsSTTService
|
||||||
from pipecat.transcriptions.language import Language
|
from pipecat.transcriptions.language import Language
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -32,9 +32,6 @@ class TranscriptionLogger(FrameProcessor):
|
|||||||
if isinstance(frame, TranscriptionFrame):
|
if isinstance(frame, TranscriptionFrame):
|
||||||
print(f"Transcription: {frame.text}")
|
print(f"Transcription: {frame.text}")
|
||||||
|
|
||||||
# Push all frames through
|
|
||||||
await self.push_frame(frame, direction)
|
|
||||||
|
|
||||||
|
|
||||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||||
# instantiated. The function will be called when the desired transport gets
|
# instantiated. The function will be called when the desired transport gets
|
||||||
|
|||||||
@@ -19,8 +19,8 @@ from pipecat.runner.types import RunnerArguments
|
|||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.soniox.stt import SonioxSTTService
|
from pipecat.services.soniox.stt import SonioxSTTService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -32,9 +32,6 @@ class TranscriptionLogger(FrameProcessor):
|
|||||||
if isinstance(frame, TranscriptionFrame):
|
if isinstance(frame, TranscriptionFrame):
|
||||||
print(f"Transcription: {frame.text}")
|
print(f"Transcription: {frame.text}")
|
||||||
|
|
||||||
# Push all frames through
|
|
||||||
await self.push_frame(frame, direction)
|
|
||||||
|
|
||||||
|
|
||||||
transport_params = {
|
transport_params = {
|
||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
|
|||||||
@@ -19,8 +19,8 @@ from pipecat.runner.types import RunnerArguments
|
|||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.azure.stt import AzureSTTService
|
from pipecat.services.azure.stt import AzureSTTService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -32,9 +32,6 @@ class TranscriptionLogger(FrameProcessor):
|
|||||||
if isinstance(frame, TranscriptionFrame):
|
if isinstance(frame, TranscriptionFrame):
|
||||||
print(f"Transcription: {frame.text}")
|
print(f"Transcription: {frame.text}")
|
||||||
|
|
||||||
# Push all frames through
|
|
||||||
await self.push_frame(frame, direction)
|
|
||||||
|
|
||||||
|
|
||||||
transport_params = {
|
transport_params = {
|
||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
|
|||||||
@@ -1,89 +0,0 @@
|
|||||||
#
|
|
||||||
# Copyright (c) 2024–2025, Daily
|
|
||||||
#
|
|
||||||
# SPDX-License-Identifier: BSD 2-Clause License
|
|
||||||
#
|
|
||||||
|
|
||||||
import os
|
|
||||||
|
|
||||||
import aiohttp
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
from loguru import logger
|
|
||||||
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
|
||||||
from pipecat.frames.frames import Frame, TranscriptionFrame
|
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
|
||||||
from pipecat.pipeline.task import PipelineTask
|
|
||||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
|
||||||
from pipecat.runner.utils import create_transport
|
|
||||||
from pipecat.services.elevenlabs.stt import ElevenLabsSTTService
|
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
|
||||||
|
|
||||||
load_dotenv(override=True)
|
|
||||||
|
|
||||||
|
|
||||||
class TranscriptionLogger(FrameProcessor):
|
|
||||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
|
||||||
await super().process_frame(frame, direction)
|
|
||||||
|
|
||||||
if isinstance(frame, TranscriptionFrame):
|
|
||||||
print(f"Transcription: {frame.text}")
|
|
||||||
|
|
||||||
# Push all frames through
|
|
||||||
await self.push_frame(frame, direction)
|
|
||||||
|
|
||||||
|
|
||||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
|
||||||
# instantiated. The function will be called when the desired transport gets
|
|
||||||
# selected.
|
|
||||||
transport_params = {
|
|
||||||
"daily": lambda: DailyParams(audio_in_enabled=True, vad_analyzer=SileroVADAnalyzer()),
|
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
|
||||||
audio_in_enabled=True, vad_analyzer=SileroVADAnalyzer()
|
|
||||||
),
|
|
||||||
"webrtc": lambda: TransportParams(audio_in_enabled=True, vad_analyzer=SileroVADAnalyzer()),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|
||||||
logger.info(f"Starting bot")
|
|
||||||
|
|
||||||
async with aiohttp.ClientSession() as session:
|
|
||||||
stt = ElevenLabsSTTService(
|
|
||||||
api_key=os.getenv("ELEVENLABS_API_KEY"),
|
|
||||||
aiohttp_session=session,
|
|
||||||
)
|
|
||||||
|
|
||||||
tl = TranscriptionLogger()
|
|
||||||
|
|
||||||
pipeline = Pipeline([transport.input(), stt, tl])
|
|
||||||
|
|
||||||
task = PipelineTask(
|
|
||||||
pipeline,
|
|
||||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
|
||||||
)
|
|
||||||
|
|
||||||
@transport.event_handler("on_client_disconnected")
|
|
||||||
async def on_client_disconnected(transport, client):
|
|
||||||
logger.info(f"Client disconnected")
|
|
||||||
await task.cancel()
|
|
||||||
|
|
||||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
|
||||||
|
|
||||||
await runner.run(task)
|
|
||||||
|
|
||||||
|
|
||||||
async def bot(runner_args: RunnerArguments):
|
|
||||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
|
||||||
transport = await create_transport(runner_args, transport_params)
|
|
||||||
await run_bot(transport, runner_args)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
from pipecat.runner.run import main
|
|
||||||
|
|
||||||
main()
|
|
||||||
@@ -11,16 +11,12 @@ from loguru import logger
|
|||||||
|
|
||||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame, TTSSpeakFrame
|
from pipecat.frames.frames import LLMRunFrame, TTSSpeakFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
@@ -28,8 +24,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService
|
|||||||
from pipecat.services.llm_service import FunctionCallParams
|
from pipecat.services.llm_service import FunctionCallParams
|
||||||
from pipecat.services.openai.llm import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -49,20 +45,17 @@ transport_params = {
|
|||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -124,8 +117,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages, tools)
|
context = OpenAILLMContext(messages, tools)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -12,16 +12,12 @@ from loguru import logger
|
|||||||
|
|
||||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame
|
from pipecat.frames.frames import LLMRunFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.anthropic.llm import AnthropicLLMService
|
from pipecat.services.anthropic.llm import AnthropicLLMService
|
||||||
@@ -29,8 +25,8 @@ from pipecat.services.cartesia.tts import CartesiaTTSService
|
|||||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||||
from pipecat.services.llm_service import FunctionCallParams
|
from pipecat.services.llm_service import FunctionCallParams
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -51,20 +47,17 @@ transport_params = {
|
|||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -119,8 +112,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
|
|
||||||
messages = [{"role": "user", "content": "Say 'hello' to start the conversation."}]
|
messages = [{"role": "user", "content": "Say 'hello' to start the conversation."}]
|
||||||
|
|
||||||
context = LLMContext(messages, tools)
|
context = OpenAILLMContext(messages, tools)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -13,16 +13,12 @@ from loguru import logger
|
|||||||
|
|
||||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame
|
from pipecat.frames.frames import LLMRunFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import (
|
from pipecat.runner.utils import (
|
||||||
create_transport,
|
create_transport,
|
||||||
@@ -34,7 +30,7 @@ from pipecat.services.cartesia.tts import CartesiaTTSService
|
|||||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||||
from pipecat.services.llm_service import FunctionCallParams
|
from pipecat.services.llm_service import FunctionCallParams
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -77,15 +73,13 @@ transport_params = {
|
|||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
video_in_enabled=True,
|
video_in_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
video_in_enabled=True,
|
video_in_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -103,7 +97,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
llm = AnthropicLLMService(
|
llm = AnthropicLLMService(
|
||||||
api_key=os.getenv("ANTHROPIC_API_KEY"),
|
api_key=os.getenv("ANTHROPIC_API_KEY"),
|
||||||
model="claude-3-7-sonnet-latest",
|
model="claude-3-7-sonnet-latest",
|
||||||
params=AnthropicLLMService.InputParams(enable_prompt_caching=True),
|
enable_prompt_caching_beta=True,
|
||||||
)
|
)
|
||||||
llm.register_function("get_weather", get_weather)
|
llm.register_function("get_weather", get_weather)
|
||||||
llm.register_function("get_image", get_image)
|
llm.register_function("get_image", get_image)
|
||||||
@@ -166,8 +160,8 @@ If you need to use a tool, simply use the tool. Do not tell the user the tool yo
|
|||||||
{"role": "user", "content": "Start the conversation by introducing yourself."},
|
{"role": "user", "content": "Start the conversation by introducing yourself."},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages, tools)
|
context = OpenAILLMContext(messages, tools)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -12,16 +12,12 @@ from loguru import logger
|
|||||||
|
|
||||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame, TTSSpeakFrame
|
from pipecat.frames.frames import LLMRunFrame, TTSSpeakFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
@@ -29,8 +25,8 @@ from pipecat.services.deepgram.stt import DeepgramSTTService
|
|||||||
from pipecat.services.llm_service import FunctionCallParams
|
from pipecat.services.llm_service import FunctionCallParams
|
||||||
from pipecat.services.together.llm import TogetherLLMService
|
from pipecat.services.together.llm import TogetherLLMService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -46,20 +42,17 @@ transport_params = {
|
|||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -110,8 +103,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages, tools)
|
context = OpenAILLMContext(messages, tools)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -13,16 +13,12 @@ from loguru import logger
|
|||||||
|
|
||||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame
|
from pipecat.frames.frames import LLMRunFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import (
|
from pipecat.runner.utils import (
|
||||||
create_transport,
|
create_transport,
|
||||||
@@ -34,7 +30,7 @@ from pipecat.services.deepgram.stt import DeepgramSTTService
|
|||||||
from pipecat.services.llm_service import FunctionCallParams
|
from pipecat.services.llm_service import FunctionCallParams
|
||||||
from pipecat.services.openai.llm import OpenAILLMService
|
from pipecat.services.openai.llm import OpenAILLMService
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -77,15 +73,13 @@ transport_params = {
|
|||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
video_in_enabled=True,
|
video_in_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
video_in_enabled=True,
|
video_in_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -155,8 +149,8 @@ indicate you should use the get_image tool are:
|
|||||||
{"role": "system", "content": system_prompt},
|
{"role": "system", "content": system_prompt},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages, tools)
|
context = OpenAILLMContext(messages, tools)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -13,16 +13,12 @@ from loguru import logger
|
|||||||
|
|
||||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame, TTSSpeakFrame
|
from pipecat.frames.frames import LLMRunFrame, TTSSpeakFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import (
|
from pipecat.runner.utils import (
|
||||||
create_transport,
|
create_transport,
|
||||||
@@ -34,7 +30,7 @@ from pipecat.services.deepgram.stt import DeepgramSTTService
|
|||||||
from pipecat.services.google.llm import GoogleLLMService
|
from pipecat.services.google.llm import GoogleLLMService
|
||||||
from pipecat.services.llm_service import FunctionCallParams
|
from pipecat.services.llm_service import FunctionCallParams
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -81,15 +77,13 @@ transport_params = {
|
|||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
video_in_enabled=True,
|
video_in_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
video_in_enabled=True,
|
video_in_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -176,8 +170,8 @@ indicate you should use the get_image tool are:
|
|||||||
{"role": "user", "content": "Say hello."},
|
{"role": "user", "content": "Say hello."},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages, tools)
|
context = OpenAILLMContext(messages, tools)
|
||||||
context_aggregator = LLMContextAggregatorPair(context)
|
context_aggregator = llm.create_context_aggregator(context)
|
||||||
|
|
||||||
pipeline = Pipeline(
|
pipeline = Pipeline(
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -12,17 +12,13 @@ from loguru import logger
|
|||||||
|
|
||||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||||
from pipecat.audio.turn.smart_turn.base_smart_turn import SmartTurnParams
|
|
||||||
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
|
|
||||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
|
||||||
from pipecat.frames.frames import LLMRunFrame, TTSSpeakFrame
|
from pipecat.frames.frames import LLMRunFrame, TTSSpeakFrame
|
||||||
from pipecat.pipeline.pipeline import Pipeline
|
from pipecat.pipeline.pipeline import Pipeline
|
||||||
from pipecat.pipeline.runner import PipelineRunner
|
from pipecat.pipeline.runner import PipelineRunner
|
||||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
|
||||||
from pipecat.processors.aggregators.llm_response import LLMUserAggregatorParams
|
from pipecat.processors.aggregators.llm_response import LLMUserAggregatorParams
|
||||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||||
from pipecat.runner.types import RunnerArguments
|
from pipecat.runner.types import RunnerArguments
|
||||||
from pipecat.runner.utils import create_transport
|
from pipecat.runner.utils import create_transport
|
||||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||||
@@ -30,8 +26,8 @@ from pipecat.services.groq.llm import GroqLLMService
|
|||||||
from pipecat.services.groq.stt import GroqSTTService
|
from pipecat.services.groq.stt import GroqSTTService
|
||||||
from pipecat.services.llm_service import FunctionCallParams
|
from pipecat.services.llm_service import FunctionCallParams
|
||||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||||
from pipecat.transports.daily.transport import DailyParams
|
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
from pipecat.transports.services.daily import DailyParams
|
||||||
|
|
||||||
load_dotenv(override=True)
|
load_dotenv(override=True)
|
||||||
|
|
||||||
@@ -47,20 +43,17 @@ transport_params = {
|
|||||||
"daily": lambda: DailyParams(
|
"daily": lambda: DailyParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"twilio": lambda: FastAPIWebsocketParams(
|
"twilio": lambda: FastAPIWebsocketParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
"webrtc": lambda: TransportParams(
|
"webrtc": lambda: TransportParams(
|
||||||
audio_in_enabled=True,
|
audio_in_enabled=True,
|
||||||
audio_out_enabled=True,
|
audio_out_enabled=True,
|
||||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
vad_analyzer=SileroVADAnalyzer(),
|
||||||
turn_analyzer=LocalSmartTurnAnalyzerV3(params=SmartTurnParams()),
|
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -108,8 +101,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
context = LLMContext(messages, tools)
|
context = OpenAILLMContext(messages, tools)
|
||||||
context_aggregator = LLMContextAggregatorPair(
|
context_aggregator = llm.create_context_aggregator(
|
||||||
context, user_params=LLMUserAggregatorParams(aggregation_timeout=0.05)
|
context, user_params=LLMUserAggregatorParams(aggregation_timeout=0.05)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user