Compare commits
1 Commits
v0.0.75
...
aleix/audi
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5a682f8c1f |
6
.github/workflows/format.yaml
vendored
6
.github/workflows/format.yaml
vendored
@@ -17,7 +17,7 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
ruff-format:
|
||||
name: "Code quality checks"
|
||||
name: "Formatting checker"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repo
|
||||
@@ -39,8 +39,8 @@ jobs:
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
ruff format --diff
|
||||
- name: Ruff linter (all rules)
|
||||
- name: Ruff import linter
|
||||
id: ruff-check
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
ruff check
|
||||
ruff check --select I
|
||||
|
||||
2
.github/workflows/publish.yaml
vendored
2
.github/workflows/publish.yaml
vendored
@@ -5,7 +5,7 @@ on:
|
||||
inputs:
|
||||
gitref:
|
||||
type: string
|
||||
description: "what git tag to build (e.g. v0.0.74)"
|
||||
description: "what git ref to build"
|
||||
required: true
|
||||
|
||||
jobs:
|
||||
|
||||
248
CHANGELOG.md
248
CHANGELOG.md
@@ -5,202 +5,12 @@ All notable changes to **Pipecat** will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [0.0.75] - 2025-07-08
|
||||
## [Unreleased]
|
||||
|
||||
### Added
|
||||
|
||||
- Added an `aggregate_sentences` arg in `CartesiaTTSService`,
|
||||
`ElevenLabsTTSService`, `NeuphonicTTSService` and `RimeTTSService`, where the
|
||||
default value is True. When `aggregate_sentences` is True, the `TTSService`
|
||||
aggregates the LLM streamed tokens into sentences by default. Note: setting
|
||||
the value to False requires a custom processor before the `TTSService` to
|
||||
aggregate LLM tokens.
|
||||
|
||||
- Added `kwargs` to the `OLLamaLLMService` to allow for configuration args to
|
||||
be passed to Ollama.
|
||||
|
||||
- Added call hang-up error handling in `TwilioFrameSerializer`, which handles
|
||||
the case where the user has hung up before the `TwilioFrameSerializer` hangs
|
||||
up the call.
|
||||
|
||||
### Changed
|
||||
|
||||
- Updated `RTVIObserver` and `RTVIProcessor` to match the new RTVI 1.0.0 protocol.
|
||||
This includes:
|
||||
|
||||
- Deprecating support for all messages related to service configuaration and
|
||||
actions.
|
||||
- Adding support for obtaining and logging data about client, including its
|
||||
RTVI version and optionally included system information (OS/browser/etc.)
|
||||
- Adding support for handling the new `client-message` RTVI message through
|
||||
either a `on_client_message` event handler or listening for a new
|
||||
`RTVIClientMessageFrame`
|
||||
- Adding support for responding to a `client-message` with a `server-response`
|
||||
via either a direct call on the `RTVIProcessor` or via pushing a new
|
||||
`RTVIServerResponseFrame`
|
||||
- Adding built-in support for handling the new `append-to-context` RTVI message
|
||||
which allows a client to add to the user or assistant llm context. No extra
|
||||
code is required for supporting this behavior.
|
||||
- Updating all JavaScript and React client RTVI examples to use versions 1.0.0
|
||||
of the clients.
|
||||
|
||||
Get started migrating to RTVI protocol 1.0.0 by following the migration guide:
|
||||
https://docs.pipecat.ai/client/migration-guide
|
||||
|
||||
- Refactored `AWSBedrockLLMService` and `AWSPollyTTSService` to work
|
||||
asynchronously using `aioboto3` instead of the `boto3` library.
|
||||
|
||||
- The `UserIdleProcessor` now handles the scenario where function calls take
|
||||
longer than the idle timeout duration. This allows you to use the
|
||||
`UserIdleProcessor` in conjunction with function calls that take a while to
|
||||
return a result.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Updated the `NeuphonicTTSService` to work with the updated websocket API.
|
||||
|
||||
- Fixed an issue with `RivaSTTService` where the watchdog feature was causing
|
||||
an error on initialization.
|
||||
|
||||
### Performance
|
||||
|
||||
- Remove unncessary push task in each `FrameProcessor`.
|
||||
|
||||
## [0.0.74] - 2025-07-03
|
||||
|
||||
### Added
|
||||
|
||||
- Added a new STT service, `SpeechmaticsSTTService`. This service provides
|
||||
real-time speech-to-text transcription using the Speechmatics API. It supports
|
||||
partial and final transcriptions, multiple languages, various audio formats,
|
||||
and speaker diarization.
|
||||
|
||||
- Added `normalize` and `model_id` to `FishAudioTTSService`.
|
||||
|
||||
- Added `http_options` argument to `GoogleLLMService`.
|
||||
|
||||
- Added `run_llm` field to `LLMMessagesAppendFrame` and `LLMMessagesUpdateFrame`
|
||||
frames. If true, a context frame will be pushed triggering the LLM to respond.
|
||||
|
||||
- Added a new `SOXRStreamAudioResampler` for processing audio in chunks or
|
||||
streams. If you write your own processor and need to use an audio resampler,
|
||||
use the new `create_stream_resampler()`.
|
||||
|
||||
- Added new `DailyParams.audio_in_user_tracks` to allow receiving one track per
|
||||
user (default) or a single track from the room (all participants mixed).
|
||||
|
||||
- Added support for providing "direct" functions, which don't need an
|
||||
accompanying `FunctionSchema` or function definition dict. Instead, metadata
|
||||
(i.e. `name`, `description`, `properties`, and `required`) are automatically
|
||||
extracted from a combination of the function signature and docstring.
|
||||
|
||||
Usage:
|
||||
|
||||
```python
|
||||
# "Direct" function
|
||||
# `params` must be the first parameter
|
||||
async def do_something(params: FunctionCallParams, foo: int, bar: str = ""):
|
||||
"""
|
||||
Do something interesting.
|
||||
|
||||
Args:
|
||||
foo (int): The foo to do something interesting with.
|
||||
bar (string): The bar to do something interesting with.
|
||||
"""
|
||||
|
||||
result = await process(foo, bar)
|
||||
await params.result_callback({"result": result})
|
||||
|
||||
# ...
|
||||
|
||||
llm.register_direct_function(do_something)
|
||||
|
||||
# ...
|
||||
|
||||
tools = ToolsSchema(standard_tools=[do_something])
|
||||
```
|
||||
|
||||
- `user_id` is now populated in the `TranscriptionFrame` and
|
||||
`InterimTranscriptionFrame` when using a transport that provides a `user_id`,
|
||||
like `DailyTransport` or `LiveKitTransport`.
|
||||
|
||||
- Added `watchdog_coroutine()`. This is a watchdog helper for couroutines. So,
|
||||
if you have a coroutine that is waiting for a result and that takes a long
|
||||
time, you will need to wrap it with `watchdog_coroutine()` so the watchdog
|
||||
timers are reset regularly.
|
||||
|
||||
- Added `session_token` parameter to `AWSNovaSonicLLMService`.
|
||||
|
||||
- Added Gemini Multimodal Live File API for uploading, fetching, listing, and
|
||||
deleting files. See `26f-gemini-multimodal-live-files-api.py` for example usage.
|
||||
|
||||
### Changed
|
||||
|
||||
- Updated all the services to use the new `SOXRStreamAudioResampler`, ensuring smooth
|
||||
transitions and eliminating clicks.
|
||||
|
||||
- Upgraded `daily-python` to 0.19.4.
|
||||
|
||||
- Updated `google` optional dependency to use `google-genai` version `1.24.0`.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue where audio would get stuck in the queue when an interrupt occurs
|
||||
during Azure TTS synthesis.
|
||||
|
||||
- Fixed a race condition that occurs in Python 3.10+ where the task could miss
|
||||
the `CancelledError` and continue running indefinitely, freezing the pipeline.
|
||||
|
||||
- Fixed a `AWSNovaSonicLLMService` issue introduced in 0.0.72.
|
||||
|
||||
### Deprecated
|
||||
|
||||
- In `FishAudioTTSService`, deprecated `model` and replaced with
|
||||
`reference_id`. This change is to better align with Fish Audio's variable
|
||||
naming and to reduce confusion about what functionality the variable
|
||||
controls.
|
||||
|
||||
## [0.0.73] - 2025-06-26
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue introduced in 0.0.72 that would cause `ElevenLabsTTSService`,
|
||||
`GladiaSTTService`, `NeuphonicTTSService` and `OpenAIRealtimeBetaLLMService`
|
||||
to throw an error.
|
||||
|
||||
## [0.0.72] - 2025-06-26
|
||||
|
||||
### Added
|
||||
|
||||
- Added logging and improved error handling to help diagnose and prevent potential
|
||||
Pipeline freezes.
|
||||
|
||||
- Added `WatchdogQueue`, `WatchdogPriorityQueue`, `WatchdogEvent` and
|
||||
`WatchdogAsyncIterator`. These helper utilities reset watchdog timers
|
||||
appropriately before they expire. When watchdog timers are disabled, the
|
||||
utilities behave as standard counterparts without side effects.
|
||||
|
||||
- Introduce task watchdog timers. Watchdog timers are used to detect if a
|
||||
Pipecat task is taking longer than expected (by default 5 seconds). Watchdog
|
||||
timers are disabled by default and can be enabled globally by passing
|
||||
`enable_watchdog_timers` argument to `PipelineTask` constructor. It is
|
||||
possible to change the default watchdog timer timeout by using the
|
||||
`watchdog_timeout` argument. You can also log how long it takes to reset the
|
||||
watchdog timers which is done with the `enable_watchdog_logging`. You can
|
||||
control all these settings per each frame processor or even per task. That is,
|
||||
you can set `enable_watchdog_timers`, `enable_watchdog_logging` and
|
||||
`watchdog_timeout` when creating any frame processor through their constructor
|
||||
arguments or when you create a task with `FrameProcessor.create_task()`. Note
|
||||
that watchdog timers only work with Pipecat tasks and will not work if you use
|
||||
`asycio.create_task()` or similar.
|
||||
|
||||
- Added `lexicon_names` parameter to `AWSPollyTTSService.InputParams`.
|
||||
|
||||
- Added reconnection logic and audio buffer management to `GladiaSTTService`.
|
||||
|
||||
- The `TurnTrackingObserver` now ends a turn upon observing an `EndFrame` or
|
||||
`CancelFrame`.
|
||||
|
||||
- Added Polish support to `AWSTranscribeSTTService`.
|
||||
|
||||
- Added new frames `FrameProcessorPauseFrame` and `FrameProcessorResumeFrame`
|
||||
@@ -217,28 +27,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
`LLMAssistantContextAggregator` that exposes whether a function call is in
|
||||
progress.
|
||||
|
||||
- Added `SambaNovaLLMService` which provides llm api integration with an
|
||||
OpenAI-compatible interface.
|
||||
|
||||
- Added `SambaNovaTTSService` which provides speech-to-text functionality using
|
||||
SambaNovas's (whisper) API.
|
||||
|
||||
- Add fundational examples for function calling and transcription
|
||||
`14s-function-calling-sambanova.py`, `13g-sambanova-transcription.py`
|
||||
|
||||
### Changed
|
||||
|
||||
- `HeartbeatFrame`s are now control frames. This will make it easier to detect
|
||||
pipeline freezes. Previously, heartbeat frames were system frames which meant
|
||||
they were not get queued with other frames, making it difficult to detect
|
||||
pipeline stalls.
|
||||
|
||||
- Updated `OpenAIRealtimeBetaLLMService` to accept `language` in the
|
||||
`InputAudioTranscription` class for all models.
|
||||
|
||||
- Updated the default model for `OpenAIRealtimeBetaLLMService` to
|
||||
`gpt-4o-realtime-preview-2025-06-03`.
|
||||
|
||||
- The `PipelineParams` arg `allow_interruptions` now defaults to `True`.
|
||||
|
||||
- `TavusTransport` and `TavusVideoService` now send audio to Tavus using WebRTC
|
||||
@@ -247,35 +37,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
- Upgraded `daily-python` to 0.19.3.
|
||||
|
||||
### Deprecated
|
||||
|
||||
- `AudioBufferProcessor` parameter `user_continuos_stream` is deprecated.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue that would cause heartbeat frames to be sent before processors
|
||||
were started.
|
||||
|
||||
- Fixed an event loop blocking issue when using `SentryMetrics`.
|
||||
|
||||
- Fixed an issue in `FastAPIWebsocketClient` to ensure proper disconnection
|
||||
when the websocket is already closed.
|
||||
|
||||
- Fixed an issue where the `UserStoppedSpeakingFrame` was not received if the
|
||||
transport was not receiving new audio frames.
|
||||
|
||||
- Fixed an edge case where if the user interrupted the bot but no new aggregation
|
||||
was received, the bot would not resume speaking.
|
||||
|
||||
- Fixed an issue with `TelnyxFrameSerializer` where it would throw an exception
|
||||
when the user hung up the call.
|
||||
|
||||
- Fixed an issue with `ElevenLabsTTSService` where the context was not being
|
||||
closed.
|
||||
|
||||
- Fixed function calling in `AWSNovaSonicLLMService`.
|
||||
- Fixed an `AudioBufferProcessor` issue that was causing crackling on the audio
|
||||
stream with lower sample rate (due to upsampling the other stream). We now
|
||||
record with the lowest sample rate to avoid upsampling.
|
||||
|
||||
- Fixed an issue that would cause multiple `PipelineTask.on_idle_timeout`
|
||||
events to be triggered repeatedly.
|
||||
|
||||
- Fixed an issue that was causing user and bot speech to not be synchronized
|
||||
during recordings.
|
||||
- Fixed an `AudioBufferProcessor` issue that was causing user and bot speech to
|
||||
not be synchronized during recordings.
|
||||
|
||||
- Fixed an issue where voice settings weren't applied to ElevenLabsTTSService.
|
||||
|
||||
@@ -287,10 +63,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
- Fixed an issue where `GoogleLLMService`'s TTFB value was incorrect.
|
||||
|
||||
### Deprecated
|
||||
|
||||
- `AudioBufferProcessor` parameter `user_continuos_stream` is deprecated.
|
||||
|
||||
### Other
|
||||
|
||||
- Rename `14e-function-calling-gemini.py` to `14e-function-calling-google.py`.
|
||||
|
||||
150
CONTRIBUTING.md
150
CONTRIBUTING.md
@@ -41,150 +41,36 @@ We use Ruff for code linting and formatting. Please ensure your code passes all
|
||||
|
||||
We follow Google-style docstrings with these specific conventions:
|
||||
|
||||
**Regular Classes:**
|
||||
- Class docstrings should fully document all parameters used in `__init__`
|
||||
- We don't require separate docstrings for `__init__` methods when parameters are documented in the class docstring
|
||||
- Property methods should have docstrings explaining their purpose and return value
|
||||
|
||||
- Class docstring describes the class purpose and key functionality
|
||||
- `__init__` method has its own docstring with complete `Args:` section documenting all parameters
|
||||
- All public methods must have docstrings with `Args:` and `Returns:` sections as appropriate
|
||||
|
||||
**Dataclasses:**
|
||||
|
||||
- Class docstring describes the purpose and documents all fields in a `Parameters:` section
|
||||
- No `__init__` docstring (auto-generated)
|
||||
|
||||
**Properties:**
|
||||
|
||||
- Must have docstrings with `Returns:` section
|
||||
|
||||
**Abstract Methods:**
|
||||
|
||||
- Must have docstrings explaining what subclasses should implement
|
||||
|
||||
**`__init__.py` Files:**
|
||||
|
||||
- **Skip docstrings** for pure import/re-export modules
|
||||
- **Add brief docstrings** for top-level packages or those with initialization logic
|
||||
|
||||
**Enums:**
|
||||
|
||||
- Class docstring describes the enumeration purpose
|
||||
- Use `Parameters:` section to document each enum value and its meaning
|
||||
- No `__init__` docstring (Enums don't have custom constructors)
|
||||
|
||||
**Code Examples in Docstrings:**
|
||||
|
||||
- Use `Examples:` as a section header for multiple examples
|
||||
- Use descriptive text followed by double colons (`::`) for each example
|
||||
- **Always include a blank line after the `::"`**
|
||||
- Indent all code consistently within each block
|
||||
- Separate multiple examples with blank lines for readability
|
||||
|
||||
**Lists and Bullets in Docstrings:**
|
||||
|
||||
- Use dashes (`-`) for bullet points, not asterisks (`*`)
|
||||
- **Add a blank line before bullet lists** when they follow a colon
|
||||
- Use section headers like "Supported features:" or "Behavior:" before lists
|
||||
- For complex nested information, consider using paragraph format instead
|
||||
|
||||
**Deprecations:**
|
||||
|
||||
- Use `warnings.warn()` in code for runtime deprecation warnings
|
||||
- Add `.. deprecated::` directive in docstrings for documentation visibility
|
||||
- Include version information and describe current status
|
||||
- Describe parameters in present tense, use directive to indicate deprecation status
|
||||
|
||||
#### Examples:
|
||||
Example of correctly documented class:
|
||||
|
||||
```python
|
||||
# Regular class
|
||||
class MyService(BaseService):
|
||||
"""Description of what the service does.
|
||||
class MyClass:
|
||||
"""Class description.
|
||||
|
||||
Provides detailed explanation of the service's functionality,
|
||||
key features, and usage patterns.
|
||||
Additional details about the class.
|
||||
|
||||
Supported features:
|
||||
|
||||
- Feature one with detailed explanation
|
||||
- Feature two with additional context
|
||||
- Feature three for advanced use cases
|
||||
Args:
|
||||
param1: Description of first parameter.
|
||||
param2: Description of second parameter.
|
||||
"""
|
||||
|
||||
def __init__(self, param1: str, old_param: str = None, **kwargs):
|
||||
"""Initialize the service.
|
||||
|
||||
Args:
|
||||
param1: Description of param1.
|
||||
old_param: Controls legacy behavior.
|
||||
|
||||
.. deprecated:: 1.2.0
|
||||
This parameter no longer has any effect and will be removed in version 2.0.
|
||||
|
||||
**kwargs: Additional arguments passed to parent.
|
||||
"""
|
||||
if old_param is not None:
|
||||
import warnings
|
||||
warnings.warn(
|
||||
"Parameter 'old_param' is deprecated and will be removed in version 2.0.",
|
||||
DeprecationWarning,
|
||||
)
|
||||
super().__init__(**kwargs)
|
||||
def __init__(self, param1, param2):
|
||||
# No docstring required here as parameters are documented above
|
||||
self.param1 = param1
|
||||
self.param2 = param2
|
||||
|
||||
@property
|
||||
def sample_rate(self) -> int:
|
||||
"""Get the current sample rate.
|
||||
def some_property(self) -> str:
|
||||
"""Get the formatted property value.
|
||||
|
||||
Returns:
|
||||
The sample rate in Hz.
|
||||
A string representation of the property.
|
||||
"""
|
||||
return self._sample_rate
|
||||
|
||||
async def process_data(self, data: str) -> bool:
|
||||
"""Process the provided data.
|
||||
|
||||
Args:
|
||||
data: The data to process.
|
||||
|
||||
Returns:
|
||||
True if processing succeeded.
|
||||
"""
|
||||
pass
|
||||
|
||||
# Dataclass with code examples
|
||||
@dataclass
|
||||
class MessageFrame:
|
||||
"""Frame containing messages in OpenAI format.
|
||||
|
||||
Supports both simple and content list message formats.
|
||||
|
||||
Example::
|
||||
|
||||
[
|
||||
{"role": "user", "content": "Hello"},
|
||||
{"role": "assistant", "content": "Hi there!"}
|
||||
]
|
||||
|
||||
Parameters:
|
||||
messages: List of messages in OpenAI format.
|
||||
"""
|
||||
|
||||
messages: List[dict]
|
||||
|
||||
# Enum class
|
||||
class Status(Enum):
|
||||
"""Status codes for processing operations.
|
||||
|
||||
Parameters:
|
||||
PENDING: Operation is queued but not started.
|
||||
RUNNING: Operation is currently in progress.
|
||||
COMPLETED: Operation finished successfully.
|
||||
FAILED: Operation encountered an error.
|
||||
"""
|
||||
|
||||
PENDING = "pending"
|
||||
RUNNING = "running"
|
||||
COMPLETED = "completed"
|
||||
FAILED = "failed"
|
||||
return f"Property: {self.param1}"
|
||||
```
|
||||
|
||||
# Contributor Covenant Code of Conduct
|
||||
|
||||
26
README.md
26
README.md
@@ -51,19 +51,19 @@ You can connect to Pipecat from any platform using our official SDKs:
|
||||
|
||||
## 🧩 Available services
|
||||
|
||||
| Category | Services |
|
||||
| ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [Parakeet (NVIDIA)](https://docs.pipecat.ai/server/services/stt/parakeet), [SambaNova (Whisper)](https://docs.pipecat.ai/server/services/stt/sambanova), [Speechmatics](https://docs.pipecat.ai/server/services/stt/speechmatics), [Ultravox](https://docs.pipecat.ai/server/services/stt/ultravox), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) |
|
||||
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/server/services/llm/sambanova) [Together AI](https://docs.pipecat.ai/server/services/llm/together) |
|
||||
| Text-to-Speech | [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [FastPitch (NVIDIA)](https://docs.pipecat.ai/server/services/tts/fastpitch), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
|
||||
| Speech-to-Speech | [AWS Nova Sonic](https://docs.pipecat.ai/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai) |
|
||||
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local |
|
||||
| Serializers | [Plivo](https://docs.pipecat.ai/server/utilities/serializers/plivo), [Twilio](https://docs.pipecat.ai/server/utilities/serializers/twilio), [Telnyx](https://docs.pipecat.ai/server/utilities/serializers/telnyx) |
|
||||
| Video | [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) |
|
||||
| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) |
|
||||
| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/fal), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) |
|
||||
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [Noisereduce](https://docs.pipecat.ai/server/utilities/audio/noisereduce-filter) |
|
||||
| Analytics & Metrics | [OpenTelemetry](https://docs.pipecat.ai/server/utilities/opentelemetry), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) |
|
||||
| Category | Services |
|
||||
| ------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [Parakeet (NVIDIA)](https://docs.pipecat.ai/server/services/stt/parakeet), [Ultravox](https://docs.pipecat.ai/server/services/stt/ultravox), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) |
|
||||
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [Together AI](https://docs.pipecat.ai/server/services/llm/together) |
|
||||
| Text-to-Speech | [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [FastPitch (NVIDIA)](https://docs.pipecat.ai/server/services/tts/fastpitch), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
|
||||
| Speech-to-Speech | [AWS Nova Sonic](https://docs.pipecat.ai/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai) |
|
||||
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local |
|
||||
| Serializers | [Plivo](https://docs.pipecat.ai/server/utilities/serializers/plivo), [Twilio](https://docs.pipecat.ai/server/utilities/serializers/twilio), [Telnyx](https://docs.pipecat.ai/server/utilities/serializers/telnyx) |
|
||||
| Video | [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) |
|
||||
| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) |
|
||||
| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/fal), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) |
|
||||
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [Noisereduce](https://docs.pipecat.ai/server/utilities/audio/noisereduce-filter) |
|
||||
| Analytics & Metrics | [OpenTelemetry](https://docs.pipecat.ai/server/utilities/opentelemetry), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) |
|
||||
|
||||
📚 [View full services documentation →](https://docs.pipecat.ai/server/services/supported-services)
|
||||
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
build~=1.2.2
|
||||
coverage~=7.9.1
|
||||
coverage~=7.6.12
|
||||
grpcio-tools~=1.67.1
|
||||
pip-tools~=7.4.1
|
||||
pre-commit~=4.2.0
|
||||
pyright~=1.1.402
|
||||
pytest~=8.4.1
|
||||
pytest-asyncio~=1.0.0
|
||||
pre-commit~=4.0.1
|
||||
pyright~=1.1.400
|
||||
pytest~=8.3.4
|
||||
pytest-asyncio~=0.25.3
|
||||
pytest-aiohttp==1.1.0
|
||||
ruff~=0.12.1
|
||||
setuptools~=78.1.1
|
||||
setuptools_scm~=8.3.1
|
||||
python-dotenv~=1.1.1
|
||||
ruff~=0.11.13
|
||||
setuptools~=70.0.0
|
||||
setuptools_scm~=8.1.0
|
||||
python-dotenv~=1.0.1
|
||||
|
||||
190
docs/api/conf.py
190
docs/api/conf.py
@@ -1,6 +1,5 @@
|
||||
import logging
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
# Configure logging
|
||||
@@ -14,8 +13,7 @@ sys.path.insert(0, str(project_root / "src"))
|
||||
|
||||
# Project information
|
||||
project = "pipecat-ai"
|
||||
current_year = datetime.now().year
|
||||
copyright = f"2024-{current_year}, Daily" if current_year > 2024 else "2024, Daily"
|
||||
copyright = "2024, Daily"
|
||||
author = "Daily"
|
||||
|
||||
# General configuration
|
||||
@@ -26,20 +24,19 @@ extensions = [
|
||||
"sphinx.ext.intersphinx",
|
||||
]
|
||||
|
||||
suppress_warnings = [
|
||||
"autodoc.mocked_object",
|
||||
]
|
||||
|
||||
# Napoleon settings
|
||||
napoleon_google_docstring = True
|
||||
napoleon_numpy_docstring = False
|
||||
napoleon_include_init_with_doc = True
|
||||
|
||||
# AutoDoc settings
|
||||
autodoc_default_options = {
|
||||
"members": True,
|
||||
"member-order": "bysource",
|
||||
"undoc-members": False,
|
||||
"exclude-members": "__weakref__,model_config",
|
||||
"special-members": "__init__",
|
||||
"undoc-members": True,
|
||||
"exclude-members": "__weakref__",
|
||||
"no-index": True,
|
||||
"show-inheritance": True,
|
||||
}
|
||||
|
||||
@@ -74,6 +71,7 @@ autodoc_mock_imports = [
|
||||
"langchain",
|
||||
"lmnt",
|
||||
"noisereduce",
|
||||
"openai",
|
||||
"openpipe",
|
||||
"simli",
|
||||
"soundfile",
|
||||
@@ -83,6 +81,10 @@ autodoc_mock_imports = [
|
||||
"tkinter",
|
||||
"daily",
|
||||
"daily_python",
|
||||
"pydantic.BaseModel",
|
||||
"pydantic.Field",
|
||||
"pydantic._internal._model_construction",
|
||||
"pydantic._internal._fields",
|
||||
# Moondream dependencies
|
||||
"torch",
|
||||
"transformers",
|
||||
@@ -143,76 +145,85 @@ autodoc_mock_imports = [
|
||||
"transformers.AutoFeatureExtractor",
|
||||
# Also add specific classes that are imported
|
||||
"AutoFeatureExtractor",
|
||||
# Sentry dependencies
|
||||
"sentry_sdk",
|
||||
# AWS Nova Sonic dependencies
|
||||
"aws_sdk_bedrock_runtime",
|
||||
"aws_sdk_bedrock_runtime.client",
|
||||
"aws_sdk_bedrock_runtime.config",
|
||||
"aws_sdk_bedrock_runtime.models",
|
||||
"smithy_aws_core",
|
||||
"smithy_aws_core.credentials_resolvers",
|
||||
"smithy_aws_core.credentials_resolvers.static",
|
||||
"smithy_aws_core.identity",
|
||||
"smithy_core",
|
||||
"smithy_core.aio",
|
||||
"smithy_core.aio.eventstream",
|
||||
# MCP dependencies (you may already have these)
|
||||
"mcp",
|
||||
"mcp.client",
|
||||
"mcp.client.session_group",
|
||||
"mcp.client.sse",
|
||||
"mcp.client.stdio",
|
||||
"mcp.ClientSession",
|
||||
"mcp.StdioServerParameters",
|
||||
# gstreamer
|
||||
"gi",
|
||||
"gi.require_version",
|
||||
"gi.repository",
|
||||
# Protobuf mocks
|
||||
"pipecat.frames.protobufs.frames_pb2",
|
||||
"pipecat.serializers.protobuf",
|
||||
"google.protobuf",
|
||||
"google.protobuf.descriptor",
|
||||
"google.protobuf.descriptor_pool",
|
||||
"google.protobuf.runtime_version",
|
||||
"google.protobuf.symbol_database",
|
||||
"google.protobuf.internal.builder",
|
||||
]
|
||||
|
||||
# HTML output settings
|
||||
html_theme = "sphinx_rtd_theme"
|
||||
html_static_path = ["_static"]
|
||||
autodoc_typehints = "signature" # Show type hints in the signature only, not in the docstring
|
||||
autodoc_typehints = "description"
|
||||
html_show_sphinx = False
|
||||
|
||||
|
||||
def import_core_modules():
|
||||
"""Import core pipecat modules for autodoc to discover."""
|
||||
core_modules = [
|
||||
"pipecat",
|
||||
"pipecat.frames",
|
||||
"pipecat.pipeline",
|
||||
"pipecat.processors",
|
||||
"pipecat.services",
|
||||
"pipecat.transports",
|
||||
"pipecat.audio",
|
||||
"pipecat.adapters",
|
||||
"pipecat.clocks",
|
||||
"pipecat.metrics",
|
||||
"pipecat.observers",
|
||||
"pipecat.serializers",
|
||||
"pipecat.sync",
|
||||
"pipecat.transcriptions",
|
||||
"pipecat.utils",
|
||||
]
|
||||
def verify_modules():
|
||||
"""Verify that required modules are available."""
|
||||
required_modules = {
|
||||
"services": [
|
||||
"assemblyai",
|
||||
"aws",
|
||||
"cartesia",
|
||||
"deepgram",
|
||||
"google",
|
||||
"lmnt",
|
||||
"riva",
|
||||
"simli",
|
||||
],
|
||||
"serializers": ["livekit"],
|
||||
"vad": ["silero", "vad_analyzer"],
|
||||
"transports": {
|
||||
"services": ["daily", "livekit"],
|
||||
"local": ["audio", "tk"],
|
||||
"network": ["fastapi_websocket", "websocket_server"],
|
||||
},
|
||||
}
|
||||
|
||||
for module_name in core_modules:
|
||||
try:
|
||||
__import__(module_name)
|
||||
logger.info(f"Successfully imported {module_name}")
|
||||
except ImportError as e:
|
||||
logger.warning(f"Failed to import {module_name}: {e}")
|
||||
# Skip importing modules that are in autodoc_mock_imports
|
||||
skipped_modules = set(autodoc_mock_imports)
|
||||
|
||||
missing = []
|
||||
for category, modules in required_modules.items():
|
||||
if isinstance(modules, dict):
|
||||
# Handle nested structure
|
||||
for subcategory, submodules in modules.items():
|
||||
for module in submodules:
|
||||
# Check if module is in autodoc_mock_imports
|
||||
if (
|
||||
f"pipecat.{category}.{subcategory}.{module}" in skipped_modules
|
||||
or module in skipped_modules
|
||||
):
|
||||
logger.info(
|
||||
f"Skipping import of mocked module: pipecat.{category}.{subcategory}.{module}"
|
||||
)
|
||||
continue
|
||||
|
||||
try:
|
||||
__import__(f"pipecat.{category}.{subcategory}.{module}")
|
||||
logger.info(
|
||||
f"Successfully imported pipecat.{category}.{subcategory}.{module}"
|
||||
)
|
||||
except (ImportError, TypeError, NameError) as e:
|
||||
missing.append(f"pipecat.{category}.{subcategory}.{module}")
|
||||
logger.warning(
|
||||
f"Optional module not available: pipecat.{category}.{subcategory}.{module} - {str(e)}"
|
||||
)
|
||||
else:
|
||||
# Handle flat structure
|
||||
for module in modules:
|
||||
# Check if module is in autodoc_mock_imports
|
||||
if f"pipecat.{category}.{module}" in skipped_modules or module in skipped_modules:
|
||||
logger.info(f"Skipping import of mocked module: pipecat.{category}.{module}")
|
||||
continue
|
||||
|
||||
try:
|
||||
__import__(f"pipecat.{category}.{module}")
|
||||
logger.info(f"Successfully imported pipecat.{category}.{module}")
|
||||
except (ImportError, TypeError, NameError) as e:
|
||||
missing.append(f"pipecat.{category}.{module}")
|
||||
logger.warning(
|
||||
f"Optional module not available: pipecat.{category}.{module} - {str(e)}"
|
||||
)
|
||||
|
||||
if missing:
|
||||
logger.warning(f"Some optional modules are not available: {missing}")
|
||||
|
||||
|
||||
def clean_title(title: str) -> str:
|
||||
@@ -224,7 +235,36 @@ def clean_title(title: str) -> str:
|
||||
parts = title.split(".")
|
||||
title = parts[-1]
|
||||
|
||||
return title
|
||||
# Special cases for service names and common acronyms
|
||||
special_cases = {
|
||||
"ai": "AI",
|
||||
"aws": "AWS",
|
||||
"api": "API",
|
||||
"vad": "VAD",
|
||||
"assemblyai": "AssemblyAI",
|
||||
"deepgram": "Deepgram",
|
||||
"elevenlabs": "ElevenLabs",
|
||||
"openai": "OpenAI",
|
||||
"openpipe": "OpenPipe",
|
||||
"playht": "PlayHT",
|
||||
"xtts": "XTTS",
|
||||
"lmnt": "LMNT",
|
||||
}
|
||||
|
||||
# Check if the entire title is a special case
|
||||
if title.lower() in special_cases:
|
||||
return special_cases[title.lower()]
|
||||
|
||||
# Otherwise, capitalize each word
|
||||
words = title.split("_")
|
||||
cleaned_words = []
|
||||
for word in words:
|
||||
if word.lower() in special_cases:
|
||||
cleaned_words.append(special_cases[word.lower()])
|
||||
else:
|
||||
cleaned_words.append(word.capitalize())
|
||||
|
||||
return " ".join(cleaned_words)
|
||||
|
||||
|
||||
def setup(app):
|
||||
@@ -249,8 +289,9 @@ def setup(app):
|
||||
|
||||
excludes = [
|
||||
str(project_root / "src/pipecat/pipeline/to_be_updated"),
|
||||
str(project_root / "src/pipecat/examples"),
|
||||
str(project_root / "src/pipecat/tests"),
|
||||
str(project_root / "src/pipecat/processors/gstreamer"),
|
||||
str(project_root / "src/pipecat/services/to_be_updated"),
|
||||
str(project_root / "src/pipecat/vad"), # deprecated
|
||||
"**/test_*.py",
|
||||
"**/tests/*.py",
|
||||
]
|
||||
@@ -291,4 +332,5 @@ def setup(app):
|
||||
logger.error(f"Error generating API documentation: {e}", exc_info=True)
|
||||
|
||||
|
||||
import_core_modules()
|
||||
# Run module verification
|
||||
verify_modules()
|
||||
|
||||
@@ -1,17 +1,57 @@
|
||||
Pipecat API Reference
|
||||
=====================
|
||||
Pipecat API Reference Docs
|
||||
==========================
|
||||
|
||||
Welcome to the Pipecat API reference.
|
||||
Welcome to Pipecat's API reference documentation!
|
||||
|
||||
Use the navigation on the left to browse modules, or search using the search box.
|
||||
|
||||
**New to Pipecat?** Check out the `main documentation <https://docs.pipecat.ai>`_ for tutorials, guides, and client SDK information.
|
||||
Pipecat is an open source framework for building voice and multimodal assistants.
|
||||
It provides a flexible pipeline architecture for connecting various AI services,
|
||||
audio processing, and transport layers.
|
||||
|
||||
Quick Links
|
||||
-----------
|
||||
|
||||
* `GitHub Repository <https://github.com/pipecat-ai/pipecat>`_
|
||||
* `Join our Community <https://discord.gg/pipecat>`_
|
||||
* `Website <https://pipecat.ai>`_
|
||||
|
||||
API Reference
|
||||
-------------
|
||||
|
||||
Core Components
|
||||
~~~~~~~~~~~~~~~
|
||||
|
||||
* :mod:`Frames <pipecat.frames>`
|
||||
* :mod:`Processors <pipecat.processors>`
|
||||
* :mod:`Pipeline <pipecat.pipeline>`
|
||||
|
||||
Audio Processing
|
||||
~~~~~~~~~~~~~~~~
|
||||
|
||||
* :mod:`Audio <pipecat.audio>`
|
||||
|
||||
Services
|
||||
~~~~~~~~
|
||||
|
||||
* :mod:`Services <pipecat.services>`
|
||||
|
||||
Transport & Serialization
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
* :mod:`Transports <pipecat.transports>`
|
||||
* :mod:`Local <pipecat.transports.local>`
|
||||
* :mod:`Network <pipecat.transports.network>`
|
||||
* :mod:`Services <pipecat.transports.services>`
|
||||
* :mod:`Serializers <pipecat.serializers>`
|
||||
|
||||
Utilities
|
||||
~~~~~~~~~
|
||||
|
||||
* :mod:`Adapters <pipecat.adapters>`
|
||||
* :mod:`Clocks <pipecat.clocks>`
|
||||
* :mod:`Metrics <pipecat.metrics>`
|
||||
* :mod:`Observers <pipecat.observers>`
|
||||
* :mod:`Sync <pipecat.sync>`
|
||||
* :mod:`Transcriptions <pipecat.transcriptions>`
|
||||
* :mod:`Utils <pipecat.utils>`
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 3
|
||||
@@ -31,4 +71,11 @@ Quick Links
|
||||
Sync <api/pipecat.sync>
|
||||
Transcriptions <api/pipecat.transcriptions>
|
||||
Transports <api/pipecat.transports>
|
||||
Utils <api/pipecat.utils>
|
||||
Utils <api/pipecat.utils>
|
||||
|
||||
Indices and tables
|
||||
==================
|
||||
|
||||
* :ref:`genindex`
|
||||
* :ref:`modindex`
|
||||
* :ref:`search`
|
||||
@@ -42,11 +42,9 @@ pipecat-ai[openai]
|
||||
pipecat-ai[qwen]
|
||||
pipecat-ai[remote-smart-turn]
|
||||
# pipecat-ai[riva] # Mocked
|
||||
pipecat-ai[sambanova]
|
||||
pipecat-ai[silero]
|
||||
pipecat-ai[simli]
|
||||
pipecat-ai[soundfile]
|
||||
pipecat-ai[speechmatics]
|
||||
pipecat-ai[tavus]
|
||||
pipecat-ai[together]
|
||||
# pipecat-ai[ultravox] # Mocked
|
||||
|
||||
@@ -107,14 +107,4 @@ MINIMAX_API_KEY=...
|
||||
MINIMAX_GROUP_ID=...
|
||||
|
||||
# Sarvam AI
|
||||
SARVAM_API_KEY=...
|
||||
|
||||
# Speechmatics
|
||||
SPEECHMATICS_API_KEY=...
|
||||
|
||||
|
||||
# SambaNova
|
||||
SAMBANOVA_API_KEY=...
|
||||
|
||||
# Sentry
|
||||
SENTRY_DSN=...
|
||||
SARVAM_API_KEY=...
|
||||
@@ -4364,9 +4364,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/brace-expansion": {
|
||||
"version": "1.1.12",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
|
||||
"integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
|
||||
"version": "1.1.11",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
|
||||
"integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0",
|
||||
"concat-map": "0.0.1"
|
||||
@@ -6081,9 +6081,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/glob/node_modules/brace-expansion": {
|
||||
"version": "2.0.2",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz",
|
||||
"integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==",
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
|
||||
"integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0"
|
||||
}
|
||||
|
||||
@@ -2,4 +2,4 @@ aiofiles
|
||||
python-dotenv
|
||||
fastapi[all]
|
||||
uvicorn
|
||||
pipecat-ai[daily,deepgram,openai,silero,cartesia,soundfile]
|
||||
pipecat-ai[daily,deepgram,openai,silero,cartesia]
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -15,7 +15,7 @@
|
||||
"vite": "^6.3.5"
|
||||
},
|
||||
"dependencies": {
|
||||
"@pipecat-ai/client-js": "^1.0.0",
|
||||
"@pipecat-ai/daily-transport": "^1.0.0"
|
||||
"@pipecat-ai/client-js": "^0.3.5",
|
||||
"@pipecat-ai/daily-transport": "^0.3.10"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
*/
|
||||
|
||||
/**
|
||||
* Pipecat Client Implementation
|
||||
* RTVI Client Implementation
|
||||
*
|
||||
* This client connects to an RTVI-compatible bot server using WebRTC (via Daily).
|
||||
* It handles audio/video streaming and manages the connection lifecycle.
|
||||
@@ -16,7 +16,7 @@
|
||||
* - Browser with WebRTC support
|
||||
*/
|
||||
|
||||
import { PipecatClient, RTVIEvent } from '@pipecat-ai/client-js';
|
||||
import { RTVIClient, RTVIEvent } from '@pipecat-ai/client-js';
|
||||
import { DailyTransport } from '@pipecat-ai/daily-transport';
|
||||
|
||||
/**
|
||||
@@ -26,7 +26,7 @@ import { DailyTransport } from '@pipecat-ai/daily-transport';
|
||||
class ChatbotClient {
|
||||
constructor() {
|
||||
// Initialize client state
|
||||
this.pcClient = null;
|
||||
this.rtviClient = null;
|
||||
this.setupDOMElements();
|
||||
this.initializeClientAndTransport();
|
||||
this.setupEventListeners();
|
||||
@@ -59,7 +59,7 @@ class ChatbotClient {
|
||||
this.disconnectBtn.addEventListener('click', () => this.disconnect());
|
||||
|
||||
// Populate device selector
|
||||
this.pcClient.getAllMics().then((mics) => {
|
||||
this.rtviClient.getAllMics().then((mics) => {
|
||||
console.log('Available mics:', mics);
|
||||
mics.forEach((device) => {
|
||||
const option = document.createElement('option');
|
||||
@@ -71,16 +71,16 @@ class ChatbotClient {
|
||||
this.deviceSelector.addEventListener('change', (event) => {
|
||||
const selectedDeviceId = event.target.value;
|
||||
console.log('Selected device ID:', selectedDeviceId);
|
||||
this.pcClient.updateMic(selectedDeviceId);
|
||||
this.rtviClient.updateMic(selectedDeviceId);
|
||||
});
|
||||
|
||||
// Handle mic mute/unmute toggle
|
||||
const micToggleBtn = document.getElementById('mic-toggle-btn');
|
||||
|
||||
micToggleBtn.addEventListener('click', () => {
|
||||
let micEnabled = this.pcClient.isMicEnabled;
|
||||
let micEnabled = this.rtviClient.isMicEnabled;
|
||||
micToggleBtn.textContent = micEnabled ? 'Unmute Mic' : 'Mute Mic';
|
||||
this.pcClient.enableMic(!micEnabled);
|
||||
this.rtviClient.enableMic(!micEnabled);
|
||||
// Add logic to mute/unmute the mic
|
||||
if (micEnabled) {
|
||||
console.log('Mic muted');
|
||||
@@ -93,12 +93,23 @@ class ChatbotClient {
|
||||
}
|
||||
|
||||
/**
|
||||
* Set up the Pipecat client and Daily transport
|
||||
* Set up the RTVI client and Daily transport
|
||||
*/
|
||||
async initializeClientAndTransport() {
|
||||
// Initialize the Pipecat client with a DailyTransport and our configuration
|
||||
this.pcClient = new PipecatClient({
|
||||
// Initialize the RTVI client with a DailyTransport and our configuration
|
||||
this.rtviClient = new RTVIClient({
|
||||
transport: new DailyTransport(),
|
||||
params: {
|
||||
// REPLACE WITH YOUR MODAL URL ENDPOINT
|
||||
baseUrl:
|
||||
'https://<Modal workspace>--pipecat-modal-bot-launcher.modal.run',
|
||||
endpoints: {
|
||||
connect: '/connect',
|
||||
},
|
||||
requestData: {
|
||||
bot_name: 'openai',
|
||||
},
|
||||
},
|
||||
enableMic: true, // Enable microphone for user input
|
||||
enableCam: false,
|
||||
callbacks: {
|
||||
@@ -165,8 +176,8 @@ class ChatbotClient {
|
||||
// Set up listeners for media track events
|
||||
this.setupTrackListeners();
|
||||
|
||||
await this.pcClient.initDevices();
|
||||
window.client = this.pcClient;
|
||||
await this.rtviClient.initDevices();
|
||||
window.client = this.rtviClient;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -201,10 +212,10 @@ class ChatbotClient {
|
||||
* This is called when the bot is ready or when the transport state changes to ready
|
||||
*/
|
||||
setupMediaTracks() {
|
||||
if (!this.pcClient) return;
|
||||
if (!this.rtviClient) return;
|
||||
|
||||
// Get current tracks from the client
|
||||
const tracks = this.pcClient.tracks();
|
||||
const tracks = this.rtviClient.tracks();
|
||||
|
||||
// Set up any available bot tracks
|
||||
if (tracks.bot?.audio) {
|
||||
@@ -220,10 +231,10 @@ class ChatbotClient {
|
||||
* This handles new tracks being added during the session
|
||||
*/
|
||||
setupTrackListeners() {
|
||||
if (!this.pcClient) return;
|
||||
if (!this.rtviClient) return;
|
||||
|
||||
// Listen for new tracks starting
|
||||
this.pcClient.on(RTVIEvent.TrackStarted, (track, participant) => {
|
||||
this.rtviClient.on(RTVIEvent.TrackStarted, (track, participant) => {
|
||||
// Only handle non-local (bot) tracks
|
||||
if (!participant?.local) {
|
||||
if (track.kind === 'audio') {
|
||||
@@ -242,7 +253,7 @@ class ChatbotClient {
|
||||
});
|
||||
|
||||
// Listen for tracks stopping
|
||||
this.pcClient.on(RTVIEvent.TrackStopped, (track, participant) => {
|
||||
this.rtviClient.on(RTVIEvent.TrackStopped, (track, participant) => {
|
||||
if (participant.local) {
|
||||
this.log('Local mic muted');
|
||||
return;
|
||||
@@ -300,27 +311,21 @@ class ChatbotClient {
|
||||
|
||||
/**
|
||||
* Initialize and connect to the bot
|
||||
* This sets up the Pipecat client, initializes devices, and establishes the connection
|
||||
* This sets up the RTVI client, initializes devices, and establishes the connection
|
||||
*/
|
||||
async connect() {
|
||||
try {
|
||||
const botSelector = document.getElementById('bot-selector');
|
||||
const selectedBot = botSelector.value;
|
||||
this.rtviClient.params.requestData.bot_name = selectedBot;
|
||||
|
||||
// Initialize audio/video devices
|
||||
this.log('Initializing devices...');
|
||||
await this.pcClient.initDevices();
|
||||
await this.rtviClient.initDevices();
|
||||
|
||||
// Connect to the bot
|
||||
this.log(`Connecting to bot: ${selectedBot}`);
|
||||
await this.pcClient.connect({
|
||||
// REPLACE WITH YOUR MODAL URL ENDPOINT
|
||||
endpoint:
|
||||
'https://<your-workspace>--pipecat-modal-fastapi-app.modal.run/connect',
|
||||
requestData: {
|
||||
bot_name: selectedBot,
|
||||
},
|
||||
});
|
||||
await this.rtviClient.connect();
|
||||
|
||||
this.log('Connection complete');
|
||||
} catch (error) {
|
||||
@@ -331,9 +336,9 @@ class ChatbotClient {
|
||||
this.updateStatus('Error');
|
||||
|
||||
// Clean up if there's an error
|
||||
if (this.pcClient) {
|
||||
if (this.rtviClient) {
|
||||
try {
|
||||
await this.pcClient.disconnect();
|
||||
await this.rtviClient.disconnect();
|
||||
} catch (disconnectError) {
|
||||
this.log(`Error during disconnect: ${disconnectError.message}`);
|
||||
}
|
||||
@@ -345,10 +350,10 @@ class ChatbotClient {
|
||||
* Disconnect from the bot and clean up media resources
|
||||
*/
|
||||
async disconnect() {
|
||||
if (this.pcClient) {
|
||||
if (this.rtviClient) {
|
||||
try {
|
||||
// Disconnect the Pipecat client
|
||||
await this.pcClient.disconnect();
|
||||
// Disconnect the RTVI client
|
||||
await this.rtviClient.disconnect();
|
||||
|
||||
// Clean up audio
|
||||
if (this.botAudio.srcObject) {
|
||||
|
||||
@@ -1,3 +1,2 @@
|
||||
python-dotenv==1.0.1
|
||||
modal==1.0.5
|
||||
fastapi[all]
|
||||
modal==0.71.3
|
||||
|
||||
@@ -215,9 +215,10 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/env": {
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/env/-/env-14.2.30.tgz",
|
||||
"integrity": "sha512-KBiBKrDY6kxTQWGzKjQB7QirL3PiiOkV7KW98leHFjtVRKtft76Ra5qSA/SL75xT44dp6hOcqiiJ6iievLOYug=="
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/env/-/env-14.2.26.tgz",
|
||||
"integrity": "sha512-vO//GJ/YBco+H7xdQhzJxF7ub3SUwft76jwaeOyVVQFHCi5DCnkP16WHB+JBylo4vOKPoZBlR94Z8xBxNBdNJA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@next/eslint-plugin-next": {
|
||||
"version": "14.2.25",
|
||||
@@ -230,12 +231,13 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-darwin-arm64": {
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-14.2.30.tgz",
|
||||
"integrity": "sha512-EAqfOTb3bTGh9+ewpO/jC59uACadRHM6TSA9DdxJB/6gxOpyV+zrbqeXiFTDy9uV6bmipFDkfpAskeaDcO+7/g==",
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-14.2.26.tgz",
|
||||
"integrity": "sha512-zDJY8gsKEseGAxG+C2hTMT0w9Nk9N1Sk1qV7vXYz9MEiyRoF5ogQX2+vplyUMIfygnjn9/A04I6yrUTRTuRiyQ==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
@@ -245,12 +247,13 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-darwin-x64": {
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-14.2.30.tgz",
|
||||
"integrity": "sha512-TyO7Wz1IKE2kGv8dwQ0bmPL3s44EKVencOqwIY69myoS3rdpO1NPg5xPM5ymKu7nfX4oYJrpMxv8G9iqLsnL4A==",
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-14.2.26.tgz",
|
||||
"integrity": "sha512-U0adH5ryLfmTDkahLwG9sUQG2L0a9rYux8crQeC92rPhi3jGQEY47nByQHrVrt3prZigadwj/2HZ1LUUimuSbg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
@@ -260,12 +263,13 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-arm64-gnu": {
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-14.2.30.tgz",
|
||||
"integrity": "sha512-I5lg1fgPJ7I5dk6mr3qCH1hJYKJu1FsfKSiTKoYwcuUf53HWTrEkwmMI0t5ojFKeA6Vu+SfT2zVy5NS0QLXV4Q==",
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-14.2.26.tgz",
|
||||
"integrity": "sha512-SINMl1I7UhfHGM7SoRiw0AbwnLEMUnJ/3XXVmhyptzriHbWvPPbbm0OEVG24uUKhuS1t0nvN/DBvm5kz6ZIqpg==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
@@ -275,12 +279,13 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-arm64-musl": {
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-14.2.30.tgz",
|
||||
"integrity": "sha512-8GkNA+sLclQyxgzCDs2/2GSwBc92QLMrmYAmoP2xehe5MUKBLB2cgo34Yu242L1siSkwQkiV4YLdCnjwc/Micw==",
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-14.2.26.tgz",
|
||||
"integrity": "sha512-s6JaezoyJK2DxrwHWxLWtJKlqKqTdi/zaYigDXUJ/gmx/72CrzdVZfMvUc6VqnZ7YEvRijvYo+0o4Z9DencduA==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
@@ -290,12 +295,13 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-x64-gnu": {
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-14.2.30.tgz",
|
||||
"integrity": "sha512-8Ly7okjssLuBoe8qaRCcjGtcMsv79hwzn/63wNeIkzJVFVX06h5S737XNr7DZwlsbTBDOyI6qbL2BJB5n6TV/w==",
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-14.2.26.tgz",
|
||||
"integrity": "sha512-FEXeUQi8/pLr/XI0hKbe0tgbLmHFRhgXOUiPScz2hk0hSmbGiU8aUqVslj/6C6KA38RzXnWoJXo4FMo6aBxjzg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
@@ -305,12 +311,13 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-x64-musl": {
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-14.2.30.tgz",
|
||||
"integrity": "sha512-dBmV1lLNeX4mR7uI7KNVHsGQU+OgTG5RGFPi3tBJpsKPvOPtg9poyav/BYWrB3GPQL4dW5YGGgalwZ79WukbKQ==",
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-14.2.26.tgz",
|
||||
"integrity": "sha512-BUsomaO4d2DuXhXhgQCVt2jjX4B4/Thts8nDoIruEJkhE5ifeQFtvW5c9JkdOtYvE5p2G0hcwQ0UbRaQmQwaVg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
@@ -320,12 +327,13 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-win32-arm64-msvc": {
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-14.2.30.tgz",
|
||||
"integrity": "sha512-6MMHi2Qc1Gkq+4YLXAgbYslE1f9zMGBikKMdmQRHXjkGPot1JY3n5/Qrbg40Uvbi8//wYnydPnyvNhI1DMUW1g==",
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-14.2.26.tgz",
|
||||
"integrity": "sha512-5auwsMVzT7wbB2CZXQxDctpWbdEnEW/e66DyXO1DcgHxIyhP06awu+rHKshZE+lPLIGiwtjo7bsyeuubewwxMw==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
@@ -335,12 +343,13 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-win32-ia32-msvc": {
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-ia32-msvc/-/swc-win32-ia32-msvc-14.2.30.tgz",
|
||||
"integrity": "sha512-pVZMnFok5qEX4RT59mK2hEVtJX+XFfak+/rjHpyFh7juiT52r177bfFKhnlafm0UOSldhXjj32b+LZIOdswGTg==",
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-ia32-msvc/-/swc-win32-ia32-msvc-14.2.26.tgz",
|
||||
"integrity": "sha512-GQWg/Vbz9zUGi9X80lOeGsz1rMH/MtFO/XqigDznhhhTfDlDoynCM6982mPCbSlxJ/aveZcKtTlwfAjwhyxDpg==",
|
||||
"cpu": [
|
||||
"ia32"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
@@ -350,12 +359,13 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-win32-x64-msvc": {
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-14.2.30.tgz",
|
||||
"integrity": "sha512-4KCo8hMZXMjpTzs3HOqOGYYwAXymXIy7PEPAXNEcEOyKqkjiDlECumrWziy+JEF0Oi4ILHGxzgQ3YiMGG2t/Lg==",
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-14.2.26.tgz",
|
||||
"integrity": "sha512-2rdB3T1/Gp7bv1eQTTm9d1Y1sv9UuJ2LAwOE0Pe2prHKe32UNscj7YS13fRB37d0GAiGNR+Y7ZcW8YjDI8Ns0w==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
@@ -610,10 +620,11 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@typescript-eslint/typescript-estree/node_modules/brace-expansion": {
|
||||
"version": "2.0.2",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz",
|
||||
"integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==",
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
|
||||
"integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0"
|
||||
}
|
||||
@@ -1213,10 +1224,11 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/brace-expansion": {
|
||||
"version": "1.1.12",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
|
||||
"integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
|
||||
"version": "1.1.11",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
|
||||
"integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0",
|
||||
"concat-map": "0.0.1"
|
||||
@@ -2602,10 +2614,11 @@
|
||||
}
|
||||
},
|
||||
"node_modules/glob/node_modules/brace-expansion": {
|
||||
"version": "2.0.2",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz",
|
||||
"integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==",
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
|
||||
"integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0"
|
||||
}
|
||||
@@ -3600,11 +3613,12 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/next": {
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/next/-/next-14.2.30.tgz",
|
||||
"integrity": "sha512-+COdu6HQrHHFQ1S/8BBsCag61jZacmvbuL2avHvQFbWa2Ox7bE+d8FyNgxRLjXQ5wtPyQwEmk85js/AuaG2Sbg==",
|
||||
"version": "14.2.26",
|
||||
"resolved": "https://registry.npmjs.org/next/-/next-14.2.26.tgz",
|
||||
"integrity": "sha512-b81XSLihMwCfwiUVRRja3LphLo4uBBMZEzBBWMaISbKTwOmq3wPknIETy/8000tr7Gq4WmbuFYPS7jOYIf+ZJw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@next/env": "14.2.30",
|
||||
"@next/env": "14.2.26",
|
||||
"@swc/helpers": "0.5.5",
|
||||
"busboy": "1.6.0",
|
||||
"caniuse-lite": "^1.0.30001579",
|
||||
@@ -3619,15 +3633,15 @@
|
||||
"node": ">=18.17.0"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@next/swc-darwin-arm64": "14.2.30",
|
||||
"@next/swc-darwin-x64": "14.2.30",
|
||||
"@next/swc-linux-arm64-gnu": "14.2.30",
|
||||
"@next/swc-linux-arm64-musl": "14.2.30",
|
||||
"@next/swc-linux-x64-gnu": "14.2.30",
|
||||
"@next/swc-linux-x64-musl": "14.2.30",
|
||||
"@next/swc-win32-arm64-msvc": "14.2.30",
|
||||
"@next/swc-win32-ia32-msvc": "14.2.30",
|
||||
"@next/swc-win32-x64-msvc": "14.2.30"
|
||||
"@next/swc-darwin-arm64": "14.2.26",
|
||||
"@next/swc-darwin-x64": "14.2.26",
|
||||
"@next/swc-linux-arm64-gnu": "14.2.26",
|
||||
"@next/swc-linux-arm64-musl": "14.2.26",
|
||||
"@next/swc-linux-x64-gnu": "14.2.26",
|
||||
"@next/swc-linux-x64-musl": "14.2.26",
|
||||
"@next/swc-win32-arm64-msvc": "14.2.26",
|
||||
"@next/swc-win32-ia32-msvc": "14.2.26",
|
||||
"@next/swc-win32-x64-msvc": "14.2.26"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@opentelemetry/api": "^1.1.0",
|
||||
|
||||
@@ -44,7 +44,7 @@ Try the hosted version of the demo here: https://pcc-smart-turn.vercel.app/.
|
||||
4. Run the server:
|
||||
|
||||
```bash
|
||||
LOCAL_RUN=1 python server.py
|
||||
LOCAL=1 python server.py
|
||||
```
|
||||
|
||||
### Run the client
|
||||
|
||||
1281
examples/fal-smart-turn/client/package-lock.json
generated
1281
examples/fal-smart-turn/client/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -9,9 +9,9 @@
|
||||
"lint": "next lint"
|
||||
},
|
||||
"dependencies": {
|
||||
"@pipecat-ai/client-js": "^1.0.0",
|
||||
"@pipecat-ai/client-react": "^1.0.0",
|
||||
"@pipecat-ai/daily-transport": "^1.0.0",
|
||||
"@pipecat-ai/client-js": "^0.3.5",
|
||||
"@pipecat-ai/client-react": "^0.3.5",
|
||||
"@pipecat-ai/daily-transport": "^0.3.10",
|
||||
"next": "15.3.1",
|
||||
"react": "^19.0.0",
|
||||
"react-dom": "^19.0.0"
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import './globals.css';
|
||||
import { PipecatProvider } from '@/providers/PipecatProvider';
|
||||
import { RTVIProvider } from '@/providers/RTVIProvider';
|
||||
|
||||
export const metadata = {
|
||||
title: 'Pipecat React Client',
|
||||
@@ -20,7 +20,7 @@ export default function RootLayout({
|
||||
<link rel="icon" href="/favicon.svg" type="image/svg+xml" />
|
||||
</head>
|
||||
<body>
|
||||
<PipecatProvider>{children}</PipecatProvider>
|
||||
<RTVIProvider>{children}</RTVIProvider>
|
||||
</body>
|
||||
</html>
|
||||
);
|
||||
|
||||
@@ -1,22 +1,22 @@
|
||||
'use client';
|
||||
|
||||
import {
|
||||
PipecatClientAudio,
|
||||
PipecatClientVideo,
|
||||
usePipecatClientTransportState,
|
||||
RTVIClientAudio,
|
||||
RTVIClientVideo,
|
||||
useRTVIClientTransportState,
|
||||
} from '@pipecat-ai/client-react';
|
||||
import { ConnectButton } from '../components/ConnectButton';
|
||||
import { StatusDisplay } from '../components/StatusDisplay';
|
||||
import { DebugDisplay } from '../components/DebugDisplay';
|
||||
|
||||
function BotVideo() {
|
||||
const transportState = usePipecatClientTransportState();
|
||||
const transportState = useRTVIClientTransportState();
|
||||
const isConnected = transportState !== 'disconnected';
|
||||
|
||||
return (
|
||||
<div className="bot-container">
|
||||
<div className="video-container">
|
||||
{isConnected && <PipecatClientVideo participant="bot" fit="cover" />}
|
||||
{isConnected && <RTVIClientVideo participant="bot" fit="cover" />}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
@@ -35,7 +35,7 @@ export default function Home() {
|
||||
</div>
|
||||
|
||||
<DebugDisplay />
|
||||
<PipecatClientAudio />
|
||||
<RTVIClientAudio />
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1,17 +1,11 @@
|
||||
import {
|
||||
usePipecatClient,
|
||||
usePipecatClientTransportState,
|
||||
useRTVIClient,
|
||||
useRTVIClientTransportState,
|
||||
} from '@pipecat-ai/client-react';
|
||||
|
||||
// Get the API base URL from environment variables
|
||||
// Default to "/api" if not specified
|
||||
// "/api" is the default for Next.js API routes and used
|
||||
// for the Pipecat Cloud deployed agent
|
||||
const API_BASE_URL = process.env.NEXT_PUBLIC_API_BASE_URL || '/api';
|
||||
|
||||
export function ConnectButton() {
|
||||
const client = usePipecatClient();
|
||||
const transportState = usePipecatClientTransportState();
|
||||
const client = useRTVIClient();
|
||||
const transportState = useRTVIClientTransportState();
|
||||
const isConnected = ['connected', 'ready'].includes(transportState);
|
||||
|
||||
const handleClick = async () => {
|
||||
@@ -24,10 +18,7 @@ export function ConnectButton() {
|
||||
if (isConnected) {
|
||||
await client.disconnect();
|
||||
} else {
|
||||
await client.connect({
|
||||
endpoint: `${API_BASE_URL}/connect`,
|
||||
requestData: { foo: 'bar' },
|
||||
});
|
||||
await client.connect();
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Connection error:', error);
|
||||
|
||||
@@ -6,7 +6,7 @@ import {
|
||||
TranscriptData,
|
||||
BotLLMTextData,
|
||||
} from '@pipecat-ai/client-js';
|
||||
import { usePipecatClient, useRTVIClientEvent } from '@pipecat-ai/client-react';
|
||||
import { useRTVIClient, useRTVIClientEvent } from '@pipecat-ai/client-react';
|
||||
import './DebugDisplay.css';
|
||||
|
||||
interface SmartTurnResultData {
|
||||
@@ -20,7 +20,7 @@ interface SmartTurnResultData {
|
||||
|
||||
export function DebugDisplay() {
|
||||
const debugLogRef = useRef<HTMLDivElement>(null);
|
||||
const client = usePipecatClient();
|
||||
const client = useRTVIClient();
|
||||
|
||||
const log = useCallback((message: string) => {
|
||||
if (!debugLogRef.current) return;
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { usePipecatClientTransportState } from '@pipecat-ai/client-react';
|
||||
import { useRTVIClientTransportState } from '@pipecat-ai/client-react';
|
||||
|
||||
export function StatusDisplay() {
|
||||
const transportState = usePipecatClientTransportState();
|
||||
const transportState = useRTVIClientTransportState();
|
||||
|
||||
return (
|
||||
<div className="status">
|
||||
|
||||
@@ -1,28 +0,0 @@
|
||||
'use client';
|
||||
|
||||
import { PipecatClient } from '@pipecat-ai/client-js';
|
||||
import { DailyTransport } from '@pipecat-ai/daily-transport';
|
||||
import { PipecatClientProvider } from '@pipecat-ai/client-react';
|
||||
import { PropsWithChildren, useEffect, useState } from 'react';
|
||||
|
||||
export function PipecatProvider({ children }: PropsWithChildren) {
|
||||
const [client, setClient] = useState<PipecatClient | null>(null);
|
||||
|
||||
useEffect(() => {
|
||||
const pcClient = new PipecatClient({
|
||||
transport: new DailyTransport(),
|
||||
enableMic: true,
|
||||
enableCam: false,
|
||||
});
|
||||
|
||||
setClient(pcClient);
|
||||
}, []);
|
||||
|
||||
if (!client) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return (
|
||||
<PipecatClientProvider client={client}>{children}</PipecatClientProvider>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
'use client';
|
||||
|
||||
import { RTVIClient } from '@pipecat-ai/client-js';
|
||||
import { DailyTransport } from '@pipecat-ai/daily-transport';
|
||||
import { RTVIClientProvider } from '@pipecat-ai/client-react';
|
||||
import { PropsWithChildren, useEffect, useState } from 'react';
|
||||
|
||||
// Get the API base URL from environment variables
|
||||
// Default to "/api" if not specified
|
||||
// "/api" is the default for Next.js API routes and used
|
||||
// for the Pipecat Cloud deployed agent
|
||||
const API_BASE_URL = process.env.NEXT_PUBLIC_API_BASE_URL || '/api';
|
||||
|
||||
console.log('Using API base URL:', API_BASE_URL);
|
||||
|
||||
export function RTVIProvider({ children }: PropsWithChildren) {
|
||||
const [client, setClient] = useState<RTVIClient | null>(null);
|
||||
|
||||
useEffect(() => {
|
||||
const transport = new DailyTransport();
|
||||
|
||||
const rtviClient = new RTVIClient({
|
||||
transport,
|
||||
params: {
|
||||
baseUrl: API_BASE_URL,
|
||||
endpoints: {
|
||||
connect: '/connect',
|
||||
},
|
||||
requestData: { foo: 'bar' },
|
||||
},
|
||||
enableMic: true,
|
||||
enableCam: false,
|
||||
});
|
||||
|
||||
setClient(rtviClient);
|
||||
}, []);
|
||||
|
||||
if (!client) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return <RTVIClientProvider client={client}>{children}</RTVIClientProvider>;
|
||||
}
|
||||
@@ -45,7 +45,7 @@ from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
load_dotenv(override=True)
|
||||
|
||||
# Check if we're in local development mode
|
||||
LOCAL = os.getenv("LOCAL_RUN")
|
||||
LOCAL = os.getenv("LOCAL")
|
||||
|
||||
logger.remove()
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
@@ -1,153 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import (
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
|
||||
from pipecat.services.openai.base_llm import BaseOpenAILLMService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.services.speechmatics.stt import SpeechmaticsSTTService
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool):
|
||||
"""Run example using Speechmatics STT.
|
||||
|
||||
This example will use diarization within our STT service and output the words spoken by
|
||||
each individual speaker and wrap them with XML tags for the LLM to process. Note the
|
||||
instructions in the system context for the LLM. This greatly improves the conversation
|
||||
experience by allowing the LLM to understand who is speaking in a multi-party call.
|
||||
|
||||
If you do not wish to use diarization, then set the `enable_speaker_diarization` parameter
|
||||
to `False` or omit it altogether. The `text_format` will only be used if diarization is enabled.
|
||||
|
||||
By default, this example will use our ENHANCED operating point, which is optimized for
|
||||
high accuracy. You can change this by setting the `operating_point` parameter to a different
|
||||
value.
|
||||
|
||||
For more information on operating points, see the Speechmatics documentation:
|
||||
https://docs.speechmatics.com/rt-api-ref
|
||||
"""
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = SpeechmaticsSTTService(
|
||||
api_key=os.getenv("SPEECHMATICS_API_KEY"),
|
||||
language=Language.EN,
|
||||
enable_speaker_diarization=True,
|
||||
text_format="<{speaker_id}>{text}</{speaker_id}>",
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY", ""),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID", ""),
|
||||
model="eleven_turbo_v2_5",
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
params=BaseOpenAILLMService.InputParams(temperature=0.75),
|
||||
)
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": (
|
||||
"You are a helpful British assistant called Alfred. "
|
||||
"Your goal is to demonstrate your capabilities in a succinct way. "
|
||||
"Your output will be converted to audio so don't include special characters in your answers. "
|
||||
"Always include punctuation in your responses. "
|
||||
"Give very short replies - do not give longer replies unless strictly necessary. "
|
||||
"Respond to what the user said in a concise, funny, creative and helpful way. "
|
||||
"Use `<Sn/>` tags to identify different speakers - do not use tags in your replies."
|
||||
),
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(aggregation_timeout=0.005),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Say a short hello to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
@@ -35,7 +35,7 @@ transport_params = {
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
"twilio": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
|
||||
@@ -61,12 +61,7 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
credentials=os.getenv("GOOGLE_TEST_CREDENTIALS"),
|
||||
)
|
||||
|
||||
llm = GoogleLLMService(
|
||||
api_key=os.getenv("GOOGLE_API_KEY"),
|
||||
model="gemini-2.5-flash",
|
||||
# turn on thinking if you want it
|
||||
# params=GoogleLLMService.InputParams(extra={"thinking_config": {"thinking_budget": 4096}}),)
|
||||
)
|
||||
llm = GoogleLLMService(api_key=os.getenv("GOOGLE_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
|
||||
@@ -8,8 +8,8 @@ import argparse
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
|
||||
import google.ai.generativelanguage as glm
|
||||
from dotenv import load_dotenv
|
||||
from google.genai.types import Content, Part
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
@@ -164,7 +164,9 @@ class TanscriptionContextFixup(FrameProcessor):
|
||||
and last_part.inline_data
|
||||
and last_part.inline_data.mime_type == "audio/wav"
|
||||
):
|
||||
self._context.messages[-2] = Content(role="user", parts=[Part(text=self._transcript)])
|
||||
self._context.messages[-2] = glm.Content(
|
||||
role="user", parts=[glm.Part(text=self._transcript)]
|
||||
)
|
||||
|
||||
def add_transcript_back_to_inference_output(self):
|
||||
if not self._transcript:
|
||||
@@ -214,12 +216,7 @@ transport_params = {
|
||||
async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
llm = GoogleLLMService(
|
||||
api_key=os.getenv("GOOGLE_API_KEY"),
|
||||
model="gemini-2.5-flash",
|
||||
# turn on thinking if you want it
|
||||
# params=GoogleLLMService.InputParams(extra={"thinking_config": {"thinking_budget": 4096}}),
|
||||
)
|
||||
llm = GoogleLLMService(api_key=os.getenv("GOOGLE_API_KEY"), model="gemini-2.0-flash-001")
|
||||
|
||||
tts = GoogleTTSService(
|
||||
voice_id="en-US-Chirp3-HD-Charon",
|
||||
|
||||
@@ -1,108 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import time
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import Frame, TranscriptionFrame, UserStoppedSpeakingFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.sambanova.stt import SambaNovaSTTService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
STOP_SECS = 2.0
|
||||
|
||||
|
||||
class TranscriptionLogger(FrameProcessor):
|
||||
"""Measures transcription latency.
|
||||
|
||||
Uses the (intentionally) long STOP_SECS parameter to give the transcription time to finish,
|
||||
then outputs the timing between when the VAD first classified audio input as not-speech and
|
||||
the delivery of the last transcription frame.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self._last_transcription_time = time.time()
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, UserStoppedSpeakingFrame):
|
||||
logger.debug(
|
||||
f"Transcription latency: {(STOP_SECS - (time.time() - self._last_transcription_time)):.2f}"
|
||||
)
|
||||
|
||||
if isinstance(frame, TranscriptionFrame):
|
||||
self._last_transcription_time = time.time()
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=STOP_SECS)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=STOP_SECS)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=STOP_SECS)),
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = SambaNovaSTTService(
|
||||
model="Whisper-Large-v3",
|
||||
api_key=os.getenv("SAMBANOVA_API_KEY"),
|
||||
)
|
||||
|
||||
tl = TranscriptionLogger()
|
||||
|
||||
pipeline = Pipeline([transport.input(), stt, tl])
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
@@ -1,89 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.frames.frames import Frame, TranscriptionFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.speechmatics.stt import SpeechmaticsSTTService
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
class TranscriptionLogger(FrameProcessor):
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, TranscriptionFrame):
|
||||
print(f"Transcription: {frame.text}")
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(audio_in_enabled=True),
|
||||
"twilio": lambda: FastAPIWebsocketParams(audio_in_enabled=True),
|
||||
"webrtc": lambda: TransportParams(audio_in_enabled=True),
|
||||
}
|
||||
|
||||
|
||||
async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool):
|
||||
"""Run example using Speechmatics STT.
|
||||
|
||||
This example will use diarization within our STT service and output the words spoken by
|
||||
each individual speaker and wrap them with XML tags.
|
||||
|
||||
If you do not wish to use diarization, then set the `enable_speaker_diarization` parameter
|
||||
to `False` or omit it altogether. The `text_format` will only be used if diarization is enabled.
|
||||
|
||||
By default, this example will use our ENHANCED operating point, which is optimized for
|
||||
high accuracy. You can change this by setting the `operating_point` parameter to a different
|
||||
value.
|
||||
|
||||
For more information on operating points, see the Speechmatics documentation:
|
||||
https://docs.speechmatics.com/rt-api-ref
|
||||
"""
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = SpeechmaticsSTTService(
|
||||
api_key=os.getenv("SPEECHMATICS_API_KEY"),
|
||||
language=Language.EN,
|
||||
enable_speaker_diarization=True,
|
||||
text_format="<{speaker_id}>{text}</{speaker_id}>",
|
||||
)
|
||||
|
||||
tl = TranscriptionLogger()
|
||||
|
||||
pipeline = Pipeline([transport.input(), stt, tl])
|
||||
|
||||
task = PipelineTask(pipeline)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
@@ -42,7 +42,7 @@ transport_params = {
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
"twilio": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
|
||||
@@ -1,152 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import LLMUserAggregatorParams
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.llm_service import FunctionCallParams
|
||||
from pipecat.services.sambanova.llm import SambaNovaLLMService
|
||||
from pipecat.services.sambanova.stt import SambaNovaSTTService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
async def fetch_weather_from_api(params: FunctionCallParams):
|
||||
await params.result_callback({"conditions": "nice", "temperature": "75"})
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = SambaNovaSTTService(
|
||||
model="Whisper-Large-v3",
|
||||
api_key=os.getenv("SAMBANOVA_API_KEY"),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
llm = SambaNovaLLMService(
|
||||
api_key=os.getenv("SAMBANOVA_API_KEY"),
|
||||
model="Llama-4-Maverick-17B-128E-Instruct",
|
||||
)
|
||||
# You can also register a function_name of None to get all functions
|
||||
# sent to the same callback with an additional function_name parameter.
|
||||
llm.register_function("get_current_weather", fetch_weather_from_api)
|
||||
|
||||
@llm.event_handler("on_function_calls_started")
|
||||
async def on_function_calls_started(service, function_calls):
|
||||
await tts.queue_frame(TTSSpeakFrame("Let me check on that."))
|
||||
|
||||
weather_function = FunctionSchema(
|
||||
name="get_current_weather",
|
||||
description="Get the current weather",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||
},
|
||||
},
|
||||
required=["location"],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[weather_function])
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
context_aggregator = llm.create_context_aggregator(
|
||||
context, user_params=LLMUserAggregatorParams(aggregation_timeout=0.05)
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
@@ -1,146 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.llm_service import FunctionCallParams
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
async def get_current_weather(params: FunctionCallParams, location: str, format: str):
|
||||
"""
|
||||
Get the current weather.
|
||||
|
||||
Args:
|
||||
location (str): The city and state, e.g. "San Francisco, CA".
|
||||
format (str): The temperature unit to use. Must be either "celsius" or "fahrenheit". Infer this from the user's location.
|
||||
"""
|
||||
await params.result_callback({"conditions": "nice", "temperature": "75"})
|
||||
|
||||
|
||||
async def get_restaurant_recommendation(params: FunctionCallParams, location: str):
|
||||
"""
|
||||
Get a restaurant recommendation.
|
||||
|
||||
Args:
|
||||
location (str): The city and state, e.g. "San Francisco, CA".
|
||||
"""
|
||||
await params.result_callback({"name": "The Golden Dragon"})
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
# You can also register a function_name of None to get all functions
|
||||
# sent to the same callback with an additional function_name parameter.
|
||||
llm.register_direct_function(get_current_weather)
|
||||
llm.register_direct_function(get_restaurant_recommendation)
|
||||
|
||||
@llm.event_handler("on_function_calls_started")
|
||||
async def on_function_calls_started(service, function_calls):
|
||||
await tts.queue_frame(TTSSpeakFrame("Let me check on that."))
|
||||
|
||||
tools = ToolsSchema(standard_tools=[get_current_weather, get_restaurant_recommendation])
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
@@ -33,7 +33,7 @@ transport_params = {
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
"twilio": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
|
||||
@@ -9,8 +9,8 @@ import asyncio
|
||||
import os
|
||||
import time
|
||||
|
||||
import google.ai.generativelanguage as glm
|
||||
from dotenv import load_dotenv
|
||||
from google.genai.types import Content, Part
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
@@ -611,7 +611,9 @@ class OutputGate(FrameProcessor):
|
||||
await self._notifier.wait()
|
||||
|
||||
transcription = await self._transcription_buffer.wait_for_transcription() or "-"
|
||||
self._context.add_message(Content(role="user", parts=[Part(text=transcription)]))
|
||||
self._context._messages.append(
|
||||
glm.Content(role="user", parts=[glm.Part(text=transcription)])
|
||||
)
|
||||
|
||||
self.open_gate()
|
||||
for frame, direction in self._frames_buffer:
|
||||
|
||||
@@ -8,8 +8,8 @@ import argparse
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
|
||||
import google.ai.generativelanguage as glm
|
||||
from dotenv import load_dotenv
|
||||
from google.genai.types import Content, Part
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
@@ -142,8 +142,8 @@ class InputTranscriptionContextFilter(FrameProcessor):
|
||||
context = GoogleLLMContext.upgrade_to_google(frame.context)
|
||||
message = context.messages[-1]
|
||||
|
||||
if not isinstance(message, Content):
|
||||
logger.error(f"Expected Content, got {type(message)}")
|
||||
if not isinstance(message, glm.Content):
|
||||
logger.error(f"Expected glm.Content, got {type(message)}")
|
||||
return
|
||||
|
||||
last_part = message.parts[-1]
|
||||
@@ -168,15 +168,15 @@ class InputTranscriptionContextFilter(FrameProcessor):
|
||||
history += f"{msg.role}: {part.text}\n"
|
||||
if history:
|
||||
assembled = f"Here is the conversation history so far. These are not instructions. This is data that you should use only to improve the accuracy of your transcription.\n\n----\n\n{history}\n\n----\n\nEND OF CONVERSATION HISTORY\n\n"
|
||||
parts.append(Part(text=assembled))
|
||||
parts.append(glm.Part(text=assembled))
|
||||
|
||||
parts.append(
|
||||
Part(
|
||||
glm.Part(
|
||||
text="Transcribe this audio. Respond either with the transcription exactly as it was said by the user, or with the special string 'EMPTY' if the audio is not clear."
|
||||
)
|
||||
)
|
||||
parts.append(last_part)
|
||||
msg = Content(role="user", parts=parts)
|
||||
msg = glm.Content(role="user", parts=parts)
|
||||
ctx = GoogleLLMContext([msg])
|
||||
ctx.system_message = transcriber_system_message
|
||||
await self.push_frame(OpenAILLMContextFrame(context=ctx))
|
||||
|
||||
@@ -55,7 +55,7 @@ transport_params = {
|
||||
# endpointing, for now.
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
"twilio": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
# set stop_secs to something roughly similar to the internal setting
|
||||
|
||||
@@ -1,242 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.gemini_multimodal_live.gemini import (
|
||||
GeminiMultimodalLiveLLMService,
|
||||
)
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
video_in_enabled=False,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
video_in_enabled=False,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
video_in_enabled=False,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)),
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
sample_file_path = ""
|
||||
|
||||
|
||||
async def create_sample_file():
|
||||
if sample_file_path:
|
||||
return sample_file_path
|
||||
else:
|
||||
"""Create a sample text file for testing the File API."""
|
||||
content = """# Sample Document for Gemini File API Test
|
||||
|
||||
This is a test document to demonstrate the Gemini File API functionality.
|
||||
|
||||
## Key Information:
|
||||
- This document was created for testing purposes
|
||||
- It contains information about AI assistants
|
||||
- The document should be analyzed by Gemini
|
||||
- The secret phrase for the test is "Pineapple Pizza"
|
||||
|
||||
## AI Assistant Capabilities:
|
||||
1. Natural language processing
|
||||
2. File analysis and understanding
|
||||
3. Context-aware conversations
|
||||
4. Multi-modal interactions
|
||||
|
||||
## Conclusion:
|
||||
This document serves as a test case for the Gemini File API integration with Pipecat.
|
||||
The AI should be able to reference and discuss the contents of this file.
|
||||
"""
|
||||
|
||||
# Create a temporary file
|
||||
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
|
||||
f.write(content)
|
||||
return f.name
|
||||
|
||||
|
||||
async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool):
|
||||
logger.info(f"Starting File API bot")
|
||||
|
||||
# Create a sample file to upload
|
||||
sample_file_path = await create_sample_file()
|
||||
logger.info(f"Created sample file: {sample_file_path}")
|
||||
|
||||
system_instruction = """
|
||||
You are a helpful AI assistant with access to a document that has been uploaded for analysis.
|
||||
|
||||
The document contains test information.
|
||||
You should be able to:
|
||||
- Reference and discuss the contents of the uploaded document
|
||||
- Answer questions about what's in the document
|
||||
- Use the information from the document in our conversation
|
||||
|
||||
Your output will be converted to audio so don't include special characters in your answers.
|
||||
Be friendly and demonstrate your ability to work with the uploaded file.
|
||||
"""
|
||||
|
||||
# Initialize Gemini service with File API support
|
||||
llm = GeminiMultimodalLiveLLMService(
|
||||
api_key=os.getenv("GOOGLE_API_KEY"),
|
||||
system_instruction=system_instruction,
|
||||
voice_id="Charon", # Aoede, Charon, Fenrir, Kore, Puck
|
||||
transcribe_user_audio=True,
|
||||
)
|
||||
|
||||
# Upload the sample file to Gemini File API
|
||||
logger.info("Uploading file to Gemini File API...")
|
||||
file_info = None
|
||||
try:
|
||||
file_info = await llm.file_api.upload_file(
|
||||
sample_file_path, display_name="Sample Test Document"
|
||||
)
|
||||
logger.info(f"File uploaded successfully: {file_info['file']['name']}")
|
||||
|
||||
# Get file URI and mime type
|
||||
file_uri = file_info["file"]["uri"]
|
||||
mime_type = "text/plain"
|
||||
|
||||
# Create context with file reference
|
||||
context = OpenAILLMContext(
|
||||
[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Greet the user and let them know you have access to a document they can ask you about. Mention that you can discuss its contents.",
|
||||
},
|
||||
{
|
||||
"type": "file_data",
|
||||
"file_data": {"mime_type": mime_type, "file_uri": file_uri},
|
||||
},
|
||||
],
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
logger.info("File reference added to conversation context")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error uploading file: {e}")
|
||||
# Continue with a basic context if file upload fails
|
||||
context = OpenAILLMContext(
|
||||
[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Greet the user and explain that there was an issue with file upload, but you're ready to help with other tasks.",
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
# Create context aggregator
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
# Build the pipeline
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
transport.output(),
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
# Configure the pipeline task
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
# Handle client connection event
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation using standard context frame
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
# Handle client disconnection events
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
|
||||
@transport.event_handler("on_client_closed")
|
||||
async def on_client_closed(transport, client):
|
||||
logger.info(f"Client closed connection")
|
||||
await task.cancel()
|
||||
|
||||
# Run the pipeline
|
||||
runner = PipelineRunner(handle_sigint=False)
|
||||
await runner.run(task)
|
||||
|
||||
# Clean up: delete the uploaded file and temporary file
|
||||
if file_info:
|
||||
try:
|
||||
await llm.file_api.delete_file(file_info["file"]["name"])
|
||||
logger.info("Cleaned up uploaded file from Gemini")
|
||||
except Exception as e:
|
||||
logger.error(f"Error cleaning up file: {e}")
|
||||
|
||||
# Remove temporary file
|
||||
try:
|
||||
os.unlink(sample_file_path)
|
||||
logger.info("Cleaned up temporary file")
|
||||
except Exception as e:
|
||||
logger.error(f"Error removing temporary file: {e}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
|
||||
upload_example_file = input("""
|
||||
|
||||
Please pass in a TEXT filepath to test upload.
|
||||
NOTE: Files are stored on Google's servers for 48 hours.
|
||||
|
||||
Press Enter to use a default test file.
|
||||
|
||||
text filepath : """)
|
||||
if upload_example_file:
|
||||
print(f"Uploading file: {upload_example_file}")
|
||||
sample_file_path = upload_example_file.strip()
|
||||
else:
|
||||
print(f"Using default file")
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
@@ -27,6 +27,7 @@ from pipecat.transports.services.daily import DailyParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
aiohttp_session = aiohttp.ClientSession()
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
@@ -37,7 +38,7 @@ transport_params = {
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=FalSmartTurnAnalyzer(
|
||||
api_key=os.getenv("FAL_SMART_TURN_API_KEY"), aiohttp_session=aiohttp.ClientSession()
|
||||
api_key=os.getenv("FAL_SMART_TURN_API_KEY"), aiohttp_session=aiohttp_session
|
||||
),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
@@ -45,7 +46,7 @@ transport_params = {
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=FalSmartTurnAnalyzer(
|
||||
api_key=os.getenv("FAL_SMART_TURN_API_KEY"), aiohttp_session=aiohttp.ClientSession()
|
||||
api_key=os.getenv("FAL_SMART_TURN_API_KEY"), aiohttp_session=aiohttp_session
|
||||
),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
@@ -53,7 +54,7 @@ transport_params = {
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
|
||||
turn_analyzer=FalSmartTurnAnalyzer(
|
||||
api_key=os.getenv("FAL_SMART_TURN_API_KEY"), aiohttp_session=aiohttp.ClientSession()
|
||||
api_key=os.getenv("FAL_SMART_TURN_API_KEY"), aiohttp_session=aiohttp_session
|
||||
),
|
||||
),
|
||||
}
|
||||
@@ -117,6 +118,8 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
await aiohttp_session.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
|
||||
@@ -9,7 +9,6 @@ import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from mcp.client.session_group import SseServerParameters
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
@@ -64,7 +63,7 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
|
||||
try:
|
||||
# https://docs.mcp.run/integrating/tutorials/mcp-run-sse-openai-agents/
|
||||
mcp = MCPClient(server_params=SseServerParameters(url=os.getenv("MCP_RUN_SSE_URL")))
|
||||
mcp = MCPClient(server_params=os.getenv("MCP_RUN_SSE_URL"))
|
||||
except Exception as e:
|
||||
logger.error(f"error setting up mcp")
|
||||
logger.exception("error trace:")
|
||||
|
||||
@@ -15,7 +15,6 @@ import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from mcp import StdioServerParameters
|
||||
from mcp.client.session_group import SseServerParameters
|
||||
from PIL import Image
|
||||
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
@@ -150,7 +149,7 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
# https://docs.mcp.run/integrating/tutorials/mcp-run-sse-openai-agents/
|
||||
# ie. "https://www.mcp.run/api/mcp/sse?..."
|
||||
# ensure the profile has a tool or few installed
|
||||
mcp_run = MCPClient(server_params=SseServerParameters(url=os.getenv("MCP_RUN_SSE_URL")))
|
||||
mcp_run = MCPClient(server_params=os.getenv("MCP_RUN_SSE_URL"))
|
||||
except Exception as e:
|
||||
logger.error(f"error setting up mcp.run")
|
||||
logger.exception("error trace:")
|
||||
|
||||
@@ -1,133 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from mcp.client.session_group import StreamableHttpParameters
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.google.llm import GoogleLLMService
|
||||
from pipecat.services.mcp_service import MCPClient
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We store functions so objects (e.g. SileroVADAnalyzer) don't get
|
||||
# instantiated. The function will be called when the desired transport gets
|
||||
# selected.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_sigint: bool):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
)
|
||||
|
||||
llm = GoogleLLMService(api_key=os.getenv("GOOGLE_API_KEY"), model="gemini-2.0-flash")
|
||||
|
||||
try:
|
||||
# Github MCP docs: https://github.com/github/github-mcp-server
|
||||
# Enable Github Copilot on your GitHub account. Free tier is ok. (https://github.com/settings/copilot)
|
||||
# Generate a personal access token. It must be a Fine-grained token, classic tokens are not supported. (https://github.com/settings/personal-access-tokens)
|
||||
# Set permissions you want to use (eg. "all repositories", "profile: read/write", etc)
|
||||
mcp = MCPClient(
|
||||
server_params=StreamableHttpParameters(
|
||||
url="https://api.githubcopilot.com/mcp/",
|
||||
headers={"Authorization": f"Bearer {os.getenv('GITHUB_PERSONAL_ACCESS_TOKEN')}"},
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"error setting up mcp")
|
||||
logger.exception("error trace:")
|
||||
|
||||
tools = await mcp.register_tools(llm)
|
||||
|
||||
system = f"""
|
||||
You are a helpful LLM in a WebRTC call.
|
||||
Your goal is to answer questions about the user's GitHub repositories and account.
|
||||
You have access to a number of tools provided by Github. Use any and all tools to help users.
|
||||
Your output will be converted to audio so don't include special characters in your answers.
|
||||
Don't overexplain what you are doing.
|
||||
Just respond with short sentences when you are carrying out tool calls.
|
||||
"""
|
||||
|
||||
messages = [{"role": "system", "content": system}]
|
||||
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt,
|
||||
context_aggregator.user(), # User spoken responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses and tool context
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected: {client}")
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.examples.run import main
|
||||
|
||||
main(run_example, transport_params=transport_params)
|
||||
@@ -102,7 +102,6 @@ async def run_example(transport: BaseTransport, _: argparse.Namespace, handle_si
|
||||
secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
|
||||
access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
|
||||
region=os.getenv("AWS_REGION"), # as of 2025-05-06, us-east-1 is the only supported region
|
||||
session_token=os.getenv("AWS_SESSION_TOKEN"),
|
||||
voice_id="tiffany", # matthew, tiffany, amy
|
||||
# you could choose to pass instruction here rather than via context
|
||||
# system_instruction=system_instruction
|
||||
|
||||
@@ -10,8 +10,8 @@ import os
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.interruptions.min_words_interruption_strategy import MinWordsInterruptionStrategy
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import MinWordsInterruptionStrategy
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
|
||||
@@ -1,59 +0,0 @@
|
||||
# Freeze Test Client
|
||||
|
||||
The purpose of this example is to create an environment for testing the bot and try to create freezing conditions.
|
||||
|
||||
### Approach 1: Server-Side Testing with `SimulateFreezeInput`
|
||||
|
||||
- Utilize only the bot `freeze_test_bot.py` with the `SimulateFreezeInput` processor. This input continuously injects frames, simulating user speech interruptions at random intervals.
|
||||
- This approach excludes the use of input transport and speech-to-text (STT) functionalities.
|
||||
|
||||
### Approach 2: Server-Side with TypeScript Client
|
||||
|
||||
- Combine server-side operations with a TypeScript client.
|
||||
- The client initially records a segment of audio, e.g., 5–10 seconds long. It can be anything.
|
||||
- After that, it replays this recorded audio to the server at random intervals, mimicking user input interruptions.
|
||||
- This helps testing interruptions in the pipeline as if real users were interacting with the bot.
|
||||
|
||||
## Setup
|
||||
|
||||
Follow these steps to set up and run the Freeze Test Client:
|
||||
|
||||
1. **Run the Bot Server**
|
||||
- Set up and activate your virtual environment:
|
||||
```bash
|
||||
python3 -m venv venv
|
||||
source venv/bin/activate # On Windows: venv\Scripts\activate
|
||||
```
|
||||
|
||||
- Install dependencies:
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
- Create your `.env` file and set your env vars:
|
||||
```bash
|
||||
cp env.example .env
|
||||
```
|
||||
|
||||
- Run the server:
|
||||
```bash
|
||||
python freeze_test_bot.py
|
||||
```
|
||||
|
||||
2. **Navigate to the Client Directory**
|
||||
```bash
|
||||
cd client
|
||||
```
|
||||
|
||||
3. **Install Dependencies**
|
||||
```bash
|
||||
npm install
|
||||
```
|
||||
|
||||
4. **Run the Client Application**
|
||||
```bash
|
||||
npm run dev
|
||||
```
|
||||
|
||||
5. **Access the Client in Your Browser**
|
||||
Visit [http://localhost:5173](http://localhost:5173) to interact with the Freeze Test Client.
|
||||
@@ -1,43 +0,0 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>AI Chatbot</title>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<div class="container">
|
||||
<div class="status-bar">
|
||||
<div class="status">
|
||||
Transport: <span id="connection-status">Disconnected</span>
|
||||
</div>
|
||||
<div class="controls">
|
||||
<button id="connect-btn">Connect</button>
|
||||
<button id="disconnect-btn" disabled>Disconnect</button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="status-bar">
|
||||
<div class="status">
|
||||
Playing audio: <span id="play-audio-status"></span>
|
||||
</div>
|
||||
<div class="controls">
|
||||
<button id="play-btn">Start</button>
|
||||
<button id="stop-btn" disabled>Stop</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<audio id="bot-audio" autoplay></audio>
|
||||
|
||||
<div class="debug-panel">
|
||||
<h3>Debug Info</h3>
|
||||
<div id="debug-log"></div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script type="module" src="/src/app.ts"></script>
|
||||
<link rel="stylesheet" href="/src/style.css">
|
||||
</body>
|
||||
|
||||
</html>
|
||||
1770
examples/freeze-test/client/package-lock.json
generated
1770
examples/freeze-test/client/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -1,26 +0,0 @@
|
||||
{
|
||||
"name": "client",
|
||||
"version": "1.0.0",
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
"dev": "vite",
|
||||
"build": "tsc && vite build",
|
||||
"preview": "vite preview"
|
||||
},
|
||||
"keywords": [],
|
||||
"author": "",
|
||||
"license": "ISC",
|
||||
"description": "",
|
||||
"devDependencies": {
|
||||
"@types/node": "^22.15.30",
|
||||
"@types/protobufjs": "^6.0.0",
|
||||
"@vitejs/plugin-react-swc": "^3.10.1",
|
||||
"typescript": "^5.8.3",
|
||||
"vite": "^6.3.5"
|
||||
},
|
||||
"dependencies": {
|
||||
"@pipecat-ai/client-js": "^0.4.0",
|
||||
"@pipecat-ai/websocket-transport": "^0.4.1",
|
||||
"protobufjs": "^7.4.0"
|
||||
}
|
||||
}
|
||||
@@ -1,355 +0,0 @@
|
||||
/**
|
||||
* Copyright (c) 2024–2025, Daily
|
||||
*
|
||||
* SPDX-License-Identifier: BSD 2-Clause License
|
||||
*/
|
||||
|
||||
/**
|
||||
* RTVI Client Implementation
|
||||
*
|
||||
* This client connects to an RTVI-compatible bot server using WebSocket.
|
||||
*
|
||||
* Requirements:
|
||||
* - A running RTVI bot server (defaults to http://localhost:7860)
|
||||
*/
|
||||
|
||||
import {
|
||||
RTVIClient,
|
||||
RTVIClientOptions,
|
||||
RTVIEvent,
|
||||
} from '@pipecat-ai/client-js';
|
||||
import {
|
||||
ProtobufFrameSerializer,
|
||||
WebSocketTransport,
|
||||
} from '@pipecat-ai/websocket-transport';
|
||||
|
||||
class RecordingSerializer extends ProtobufFrameSerializer {
|
||||
private lastTimestamp: number | null = null;
|
||||
private recordingAudioToSend: boolean = false;
|
||||
private _recordedAudio: { data: ArrayBuffer; delay: number }[] = [];
|
||||
|
||||
public startRecording() {
|
||||
this.recordingAudioToSend = true;
|
||||
this._recordedAudio = [];
|
||||
this.lastTimestamp = null;
|
||||
}
|
||||
|
||||
public stopRecording() {
|
||||
this.recordingAudioToSend = false;
|
||||
}
|
||||
|
||||
// @ts-ignore
|
||||
serializeAudio(
|
||||
data: ArrayBuffer,
|
||||
sampleRate: number,
|
||||
numChannels: number
|
||||
): Uint8Array | null {
|
||||
if (this.recordingAudioToSend) {
|
||||
const now = Date.now();
|
||||
// Compute delay since last packet
|
||||
const delay = this.lastTimestamp ? now - this.lastTimestamp : 0;
|
||||
this.lastTimestamp = now;
|
||||
// Save audio chunk and delay
|
||||
this._recordedAudio.push({ data, delay });
|
||||
return null;
|
||||
} else {
|
||||
return super.serializeAudio(data, sampleRate, numChannels);
|
||||
}
|
||||
}
|
||||
|
||||
public get recordedAudio() {
|
||||
return this._recordedAudio;
|
||||
}
|
||||
}
|
||||
|
||||
class WebsocketClientApp {
|
||||
private ENABLE_RECORDING_MODE = false;
|
||||
private RECORDING_TIME_MS = 10000;
|
||||
|
||||
private rtviClient: RTVIClient | null = null;
|
||||
private connectBtn: HTMLButtonElement | null = null;
|
||||
private disconnectBtn: HTMLButtonElement | null = null;
|
||||
private statusSpan: HTMLElement | null = null;
|
||||
private debugLog: HTMLElement | null = null;
|
||||
private botAudio: HTMLAudioElement;
|
||||
|
||||
private declare websocketTransport: WebSocketTransport;
|
||||
private sendRecordedAudio: boolean = false;
|
||||
private declare recordingSerializer: RecordingSerializer;
|
||||
|
||||
private playBtn: HTMLButtonElement | null = null;
|
||||
private stopBtn: HTMLButtonElement | null = null;
|
||||
|
||||
constructor() {
|
||||
this.botAudio = document.createElement('audio');
|
||||
this.botAudio.autoplay = true;
|
||||
//this.botAudio.playsInline = true;
|
||||
document.body.appendChild(this.botAudio);
|
||||
|
||||
this.setupDOMElements();
|
||||
this.setupEventListeners();
|
||||
}
|
||||
|
||||
/**
|
||||
* Set up references to DOM elements and create necessary media elements
|
||||
*/
|
||||
private setupDOMElements(): void {
|
||||
this.connectBtn = document.getElementById(
|
||||
'connect-btn'
|
||||
) as HTMLButtonElement;
|
||||
this.disconnectBtn = document.getElementById(
|
||||
'disconnect-btn'
|
||||
) as HTMLButtonElement;
|
||||
this.statusSpan = document.getElementById('connection-status');
|
||||
this.debugLog = document.getElementById('debug-log');
|
||||
this.playBtn = document.getElementById('play-btn') as HTMLButtonElement;
|
||||
this.stopBtn = document.getElementById('stop-btn') as HTMLButtonElement;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set up event listeners for connect/disconnect buttons
|
||||
*/
|
||||
private setupEventListeners(): void {
|
||||
this.connectBtn?.addEventListener('click', () => this.connect());
|
||||
this.disconnectBtn?.addEventListener('click', () => this.disconnect());
|
||||
this.playBtn?.addEventListener('click', () =>
|
||||
this.startSendingRecordedAudio()
|
||||
);
|
||||
this.stopBtn?.addEventListener('click', () =>
|
||||
this.stopSendingRecordedAudio()
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a timestamped message to the debug log
|
||||
*/
|
||||
private log(message: string): void {
|
||||
if (!this.debugLog) return;
|
||||
const entry = document.createElement('div');
|
||||
entry.textContent = `${new Date().toISOString()} - ${message}`;
|
||||
if (message.startsWith('User: ')) {
|
||||
entry.style.color = '#2196F3';
|
||||
} else if (message.startsWith('Bot: ')) {
|
||||
entry.style.color = '#4CAF50';
|
||||
}
|
||||
this.debugLog.appendChild(entry);
|
||||
this.debugLog.scrollTop = this.debugLog.scrollHeight;
|
||||
console.log(message);
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the connection status display
|
||||
*/
|
||||
private updateStatus(status: string): void {
|
||||
if (this.statusSpan) {
|
||||
this.statusSpan.textContent = status;
|
||||
}
|
||||
this.log(`Status: ${status}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check for available media tracks and set them up if present
|
||||
* This is called when the bot is ready or when the transport state changes to ready
|
||||
*/
|
||||
setupMediaTracks() {
|
||||
if (!this.rtviClient) return;
|
||||
const tracks = this.rtviClient.tracks();
|
||||
if (tracks.bot?.audio) {
|
||||
this.setupAudioTrack(tracks.bot.audio);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set up listeners for track events (start/stop)
|
||||
* This handles new tracks being added during the session
|
||||
*/
|
||||
setupTrackListeners() {
|
||||
if (!this.rtviClient) return;
|
||||
|
||||
// Listen for new tracks starting
|
||||
this.rtviClient.on(RTVIEvent.TrackStarted, (track, participant) => {
|
||||
// Only handle non-local (bot) tracks
|
||||
if (!participant?.local && track.kind === 'audio') {
|
||||
this.setupAudioTrack(track);
|
||||
}
|
||||
});
|
||||
|
||||
// Listen for tracks stopping
|
||||
this.rtviClient.on(RTVIEvent.TrackStopped, (track, participant) => {
|
||||
this.log(
|
||||
`Track stopped: ${track.kind} from ${participant?.name || 'unknown'}`
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Set up an audio track for playback
|
||||
* Handles both initial setup and track updates
|
||||
*/
|
||||
private setupAudioTrack(track: MediaStreamTrack): void {
|
||||
this.log('Setting up audio track');
|
||||
if (
|
||||
this.botAudio.srcObject &&
|
||||
'getAudioTracks' in this.botAudio.srcObject
|
||||
) {
|
||||
const oldTrack = this.botAudio.srcObject.getAudioTracks()[0];
|
||||
if (oldTrack?.id === track.id) return;
|
||||
}
|
||||
this.botAudio.srcObject = new MediaStream([track]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize and connect to the bot
|
||||
* This sets up the RTVI client, initializes devices, and establishes the connection
|
||||
*/
|
||||
public async connect(): Promise<void> {
|
||||
try {
|
||||
const startTime = Date.now();
|
||||
|
||||
this.recordingSerializer = new RecordingSerializer();
|
||||
const ws_opts = {
|
||||
serializer: this.ENABLE_RECORDING_MODE
|
||||
? this.recordingSerializer
|
||||
: new ProtobufFrameSerializer(),
|
||||
recorderSampleRate: 8000,
|
||||
playerSampleRate: 8000,
|
||||
};
|
||||
|
||||
const RTVIConfig: RTVIClientOptions = {
|
||||
transport: new WebSocketTransport(ws_opts),
|
||||
enableMic: true,
|
||||
enableCam: false,
|
||||
callbacks: {
|
||||
onConnected: () => {
|
||||
this.updateStatus('Connected');
|
||||
if (this.connectBtn) this.connectBtn.disabled = true;
|
||||
if (this.disconnectBtn) this.disconnectBtn.disabled = false;
|
||||
},
|
||||
onDisconnected: () => {
|
||||
this.updateStatus('Disconnected');
|
||||
if (this.connectBtn) this.connectBtn.disabled = false;
|
||||
if (this.disconnectBtn) this.disconnectBtn.disabled = true;
|
||||
this.log('Client disconnected');
|
||||
},
|
||||
onBotReady: (data) => {
|
||||
this.log(`Bot ready: ${JSON.stringify(data)}`);
|
||||
this.setupMediaTracks();
|
||||
},
|
||||
onUserTranscript: (data) => {
|
||||
if (data.final) {
|
||||
this.log(`User: ${data.text}`);
|
||||
}
|
||||
},
|
||||
onBotTranscript: (data) => this.log(`Bot: ${data.text}`),
|
||||
onMessageError: (error) => console.error('Message error:', error),
|
||||
onError: (error) => console.error('Error:', error),
|
||||
},
|
||||
};
|
||||
this.rtviClient = new RTVIClient(RTVIConfig);
|
||||
this.websocketTransport = this.rtviClient.transport;
|
||||
this.setupTrackListeners();
|
||||
|
||||
this.log('Initializing devices...');
|
||||
await this.rtviClient.initDevices();
|
||||
|
||||
this.log('Connecting to bot...');
|
||||
await this.rtviClient.connect({
|
||||
endpoint: 'http://localhost:7860/connect',
|
||||
});
|
||||
|
||||
const timeTaken = Date.now() - startTime;
|
||||
this.log(`Connection complete, timeTaken: ${timeTaken}`);
|
||||
|
||||
if (this.ENABLE_RECORDING_MODE) {
|
||||
this.log(
|
||||
`Starting to recording the next ${
|
||||
this.RECORDING_TIME_MS / 1000
|
||||
}s of audio`
|
||||
);
|
||||
this.recordingSerializer.startRecording();
|
||||
await this.sleep(this.RECORDING_TIME_MS);
|
||||
this.recordingSerializer.stopRecording();
|
||||
this.log('Recording stopped');
|
||||
this.rtviClient.enableMic(false);
|
||||
this.startSendingRecordedAudio();
|
||||
}
|
||||
} catch (error) {
|
||||
this.log(`Error connecting: ${(error as Error).message}`);
|
||||
this.updateStatus('Error');
|
||||
// Clean up if there's an error
|
||||
if (this.rtviClient) {
|
||||
try {
|
||||
await this.rtviClient.disconnect();
|
||||
} catch (disconnectError) {
|
||||
this.log(`Error during disconnect: ${disconnectError}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Disconnect from the bot and clean up media resources
|
||||
*/
|
||||
public async disconnect(): Promise<void> {
|
||||
if (this.rtviClient) {
|
||||
try {
|
||||
this.stopSendingRecordedAudio();
|
||||
await this.rtviClient.disconnect();
|
||||
this.rtviClient = null;
|
||||
if (
|
||||
this.botAudio.srcObject &&
|
||||
'getAudioTracks' in this.botAudio.srcObject
|
||||
) {
|
||||
this.botAudio.srcObject
|
||||
.getAudioTracks()
|
||||
.forEach((track) => track.stop());
|
||||
this.botAudio.srcObject = null;
|
||||
}
|
||||
} catch (error) {
|
||||
this.log(`Error disconnecting: ${(error as Error).message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private startSendingRecordedAudio() {
|
||||
this.sendRecordedAudio = true;
|
||||
if (this.playBtn) this.playBtn.disabled = true;
|
||||
if (this.stopBtn) this.stopBtn.disabled = false;
|
||||
void this.replayAudio();
|
||||
}
|
||||
|
||||
private stopSendingRecordedAudio() {
|
||||
if (this.stopBtn) this.stopBtn.disabled = true;
|
||||
if (this.playBtn) this.playBtn.disabled = false;
|
||||
this.sendRecordedAudio = false;
|
||||
}
|
||||
|
||||
private async replayAudio() {
|
||||
if (this.sendRecordedAudio) {
|
||||
this.log('Sending recorded audio');
|
||||
for (const chunk of this.recordingSerializer.recordedAudio) {
|
||||
await this.sleep(chunk.delay);
|
||||
this.websocketTransport.handleUserAudioStream(chunk.data);
|
||||
}
|
||||
const randomDelay = 1000 + Math.random() * (10000 - 500);
|
||||
await this.sleep(randomDelay);
|
||||
|
||||
void this.replayAudio();
|
||||
}
|
||||
}
|
||||
|
||||
private sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
}
|
||||
|
||||
declare global {
|
||||
interface Window {
|
||||
WebsocketClientApp: typeof WebsocketClientApp;
|
||||
}
|
||||
}
|
||||
|
||||
window.addEventListener('DOMContentLoaded', () => {
|
||||
window.WebsocketClientApp = WebsocketClientApp;
|
||||
new WebsocketClientApp();
|
||||
});
|
||||
@@ -1,98 +0,0 @@
|
||||
body {
|
||||
margin: 0;
|
||||
padding: 20px;
|
||||
font-family: Arial, sans-serif;
|
||||
background-color: #f0f0f0;
|
||||
}
|
||||
|
||||
.container {
|
||||
max-width: 1200px;
|
||||
margin: 0 auto;
|
||||
}
|
||||
|
||||
.status-bar {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
padding: 10px;
|
||||
background-color: #fff;
|
||||
border-radius: 8px;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
|
||||
.controls button {
|
||||
padding: 8px 16px;
|
||||
margin-left: 10px;
|
||||
border: none;
|
||||
border-radius: 4px;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
#connect-btn {
|
||||
background-color: #4caf50;
|
||||
color: white;
|
||||
}
|
||||
|
||||
#disconnect-btn {
|
||||
background-color: #f44336;
|
||||
color: white;
|
||||
}
|
||||
|
||||
button:disabled {
|
||||
opacity: 0.5;
|
||||
cursor: not-allowed;
|
||||
}
|
||||
|
||||
.main-content {
|
||||
background-color: #fff;
|
||||
border-radius: 8px;
|
||||
padding: 20px;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
|
||||
.bot-container {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
#bot-video-container {
|
||||
width: 640px;
|
||||
height: 360px;
|
||||
background-color: #e0e0e0;
|
||||
border-radius: 8px;
|
||||
margin: 20px auto;
|
||||
overflow: hidden;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
}
|
||||
|
||||
#bot-video-container video {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
object-fit: cover;
|
||||
}
|
||||
|
||||
.debug-panel {
|
||||
background-color: #fff;
|
||||
border-radius: 8px;
|
||||
padding: 20px;
|
||||
}
|
||||
|
||||
.debug-panel h3 {
|
||||
margin: 0 0 10px 0;
|
||||
font-size: 16px;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
#debug-log {
|
||||
height: 500px;
|
||||
overflow-y: auto;
|
||||
background-color: #f8f8f8;
|
||||
padding: 10px;
|
||||
border-radius: 4px;
|
||||
font-family: monospace;
|
||||
font-size: 12px;
|
||||
line-height: 1.4;
|
||||
}
|
||||
@@ -1,111 +0,0 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
/* Visit https://aka.ms/tsconfig to read more about this file */
|
||||
|
||||
/* Projects */
|
||||
// "incremental": true, /* Save .tsbuildinfo files to allow for incremental compilation of projects. */
|
||||
// "composite": true, /* Enable constraints that allow a TypeScript project to be used with project references. */
|
||||
// "tsBuildInfoFile": "./.tsbuildinfo", /* Specify the path to .tsbuildinfo incremental compilation file. */
|
||||
// "disableSourceOfProjectReferenceRedirect": true, /* Disable preferring source files instead of declaration files when referencing composite projects. */
|
||||
// "disableSolutionSearching": true, /* Opt a project out of multi-project reference checking when editing. */
|
||||
// "disableReferencedProjectLoad": true, /* Reduce the number of projects loaded automatically by TypeScript. */
|
||||
|
||||
/* Language and Environment */
|
||||
"target": "es2016", /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */
|
||||
// "lib": [], /* Specify a set of bundled library declaration files that describe the target runtime environment. */
|
||||
// "jsx": "preserve", /* Specify what JSX code is generated. */
|
||||
// "experimentalDecorators": true, /* Enable experimental support for legacy experimental decorators. */
|
||||
// "emitDecoratorMetadata": true, /* Emit design-type metadata for decorated declarations in source files. */
|
||||
// "jsxFactory": "", /* Specify the JSX factory function used when targeting React JSX emit, e.g. 'React.createElement' or 'h'. */
|
||||
// "jsxFragmentFactory": "", /* Specify the JSX Fragment reference used for fragments when targeting React JSX emit e.g. 'React.Fragment' or 'Fragment'. */
|
||||
// "jsxImportSource": "", /* Specify module specifier used to import the JSX factory functions when using 'jsx: react-jsx*'. */
|
||||
// "reactNamespace": "", /* Specify the object invoked for 'createElement'. This only applies when targeting 'react' JSX emit. */
|
||||
// "noLib": true, /* Disable including any library files, including the default lib.d.ts. */
|
||||
// "useDefineForClassFields": true, /* Emit ECMAScript-standard-compliant class fields. */
|
||||
// "moduleDetection": "auto", /* Control what method is used to detect module-format JS files. */
|
||||
|
||||
/* Modules */
|
||||
"module": "commonjs", /* Specify what module code is generated. */
|
||||
// "rootDir": "./", /* Specify the root folder within your source files. */
|
||||
// "moduleResolution": "node10", /* Specify how TypeScript looks up a file from a given module specifier. */
|
||||
// "baseUrl": "./", /* Specify the base directory to resolve non-relative module names. */
|
||||
// "paths": {}, /* Specify a set of entries that re-map imports to additional lookup locations. */
|
||||
// "rootDirs": [], /* Allow multiple folders to be treated as one when resolving modules. */
|
||||
// "typeRoots": [], /* Specify multiple folders that act like './node_modules/@types'. */
|
||||
// "types": [], /* Specify type package names to be included without being referenced in a source file. */
|
||||
// "allowUmdGlobalAccess": true, /* Allow accessing UMD globals from modules. */
|
||||
// "moduleSuffixes": [], /* List of file name suffixes to search when resolving a module. */
|
||||
// "allowImportingTsExtensions": true, /* Allow imports to include TypeScript file extensions. Requires '--moduleResolution bundler' and either '--noEmit' or '--emitDeclarationOnly' to be set. */
|
||||
// "rewriteRelativeImportExtensions": true, /* Rewrite '.ts', '.tsx', '.mts', and '.cts' file extensions in relative import paths to their JavaScript equivalent in output files. */
|
||||
// "resolvePackageJsonExports": true, /* Use the package.json 'exports' field when resolving package imports. */
|
||||
// "resolvePackageJsonImports": true, /* Use the package.json 'imports' field when resolving imports. */
|
||||
// "customConditions": [], /* Conditions to set in addition to the resolver-specific defaults when resolving imports. */
|
||||
// "noUncheckedSideEffectImports": true, /* Check side effect imports. */
|
||||
// "resolveJsonModule": true, /* Enable importing .json files. */
|
||||
// "allowArbitraryExtensions": true, /* Enable importing files with any extension, provided a declaration file is present. */
|
||||
// "noResolve": true, /* Disallow 'import's, 'require's or '<reference>'s from expanding the number of files TypeScript should add to a project. */
|
||||
|
||||
/* JavaScript Support */
|
||||
// "allowJs": true, /* Allow JavaScript files to be a part of your program. Use the 'checkJS' option to get errors from these files. */
|
||||
// "checkJs": true, /* Enable error reporting in type-checked JavaScript files. */
|
||||
// "maxNodeModuleJsDepth": 1, /* Specify the maximum folder depth used for checking JavaScript files from 'node_modules'. Only applicable with 'allowJs'. */
|
||||
|
||||
/* Emit */
|
||||
// "declaration": true, /* Generate .d.ts files from TypeScript and JavaScript files in your project. */
|
||||
// "declarationMap": true, /* Create sourcemaps for d.ts files. */
|
||||
// "emitDeclarationOnly": true, /* Only output d.ts files and not JavaScript files. */
|
||||
// "sourceMap": true, /* Create source map files for emitted JavaScript files. */
|
||||
// "inlineSourceMap": true, /* Include sourcemap files inside the emitted JavaScript. */
|
||||
// "noEmit": true, /* Disable emitting files from a compilation. */
|
||||
// "outFile": "./", /* Specify a file that bundles all outputs into one JavaScript file. If 'declaration' is true, also designates a file that bundles all .d.ts output. */
|
||||
// "outDir": "./", /* Specify an output folder for all emitted files. */
|
||||
// "removeComments": true, /* Disable emitting comments. */
|
||||
// "importHelpers": true, /* Allow importing helper functions from tslib once per project, instead of including them per-file. */
|
||||
// "downlevelIteration": true, /* Emit more compliant, but verbose and less performant JavaScript for iteration. */
|
||||
// "sourceRoot": "", /* Specify the root path for debuggers to find the reference source code. */
|
||||
// "mapRoot": "", /* Specify the location where debugger should locate map files instead of generated locations. */
|
||||
// "inlineSources": true, /* Include source code in the sourcemaps inside the emitted JavaScript. */
|
||||
// "emitBOM": true, /* Emit a UTF-8 Byte Order Mark (BOM) in the beginning of output files. */
|
||||
// "newLine": "crlf", /* Set the newline character for emitting files. */
|
||||
// "stripInternal": true, /* Disable emitting declarations that have '@internal' in their JSDoc comments. */
|
||||
// "noEmitHelpers": true, /* Disable generating custom helper functions like '__extends' in compiled output. */
|
||||
// "noEmitOnError": true, /* Disable emitting files if any type checking errors are reported. */
|
||||
// "preserveConstEnums": true, /* Disable erasing 'const enum' declarations in generated code. */
|
||||
// "declarationDir": "./", /* Specify the output directory for generated declaration files. */
|
||||
|
||||
/* Interop Constraints */
|
||||
// "isolatedModules": true, /* Ensure that each file can be safely transpiled without relying on other imports. */
|
||||
// "verbatimModuleSyntax": true, /* Do not transform or elide any imports or exports not marked as type-only, ensuring they are written in the output file's format based on the 'module' setting. */
|
||||
// "isolatedDeclarations": true, /* Require sufficient annotation on exports so other tools can trivially generate declaration files. */
|
||||
// "allowSyntheticDefaultImports": true, /* Allow 'import x from y' when a module doesn't have a default export. */
|
||||
"esModuleInterop": true, /* Emit additional JavaScript to ease support for importing CommonJS modules. This enables 'allowSyntheticDefaultImports' for type compatibility. */
|
||||
// "preserveSymlinks": true, /* Disable resolving symlinks to their realpath. This correlates to the same flag in node. */
|
||||
"forceConsistentCasingInFileNames": true, /* Ensure that casing is correct in imports. */
|
||||
|
||||
/* Type Checking */
|
||||
"strict": true, /* Enable all strict type-checking options. */
|
||||
// "noImplicitAny": true, /* Enable error reporting for expressions and declarations with an implied 'any' type. */
|
||||
// "strictNullChecks": true, /* When type checking, take into account 'null' and 'undefined'. */
|
||||
// "strictFunctionTypes": true, /* When assigning functions, check to ensure parameters and the return values are subtype-compatible. */
|
||||
// "strictBindCallApply": true, /* Check that the arguments for 'bind', 'call', and 'apply' methods match the original function. */
|
||||
// "strictPropertyInitialization": true, /* Check for class properties that are declared but not set in the constructor. */
|
||||
// "strictBuiltinIteratorReturn": true, /* Built-in iterators are instantiated with a 'TReturn' type of 'undefined' instead of 'any'. */
|
||||
// "noImplicitThis": true, /* Enable error reporting when 'this' is given the type 'any'. */
|
||||
// "useUnknownInCatchVariables": true, /* Default catch clause variables as 'unknown' instead of 'any'. */
|
||||
// "alwaysStrict": true, /* Ensure 'use strict' is always emitted. */
|
||||
// "noUnusedLocals": true, /* Enable error reporting when local variables aren't read. */
|
||||
// "noUnusedParameters": true, /* Raise an error when a function parameter isn't read. */
|
||||
// "exactOptionalPropertyTypes": true, /* Interpret optional property types as written, rather than adding 'undefined'. */
|
||||
// "noImplicitReturns": true, /* Enable error reporting for codepaths that do not explicitly return in a function. */
|
||||
// "noFallthroughCasesInSwitch": true, /* Enable error reporting for fallthrough cases in switch statements. */
|
||||
// "noUncheckedIndexedAccess": true, /* Add 'undefined' to a type when accessed using an index. */
|
||||
// "noImplicitOverride": true, /* Ensure overriding members in derived classes are marked with an override modifier. */
|
||||
// "noPropertyAccessFromIndexSignature": true, /* Enforces using indexed accessors for keys declared using an indexed type. */
|
||||
// "allowUnusedLabels": true, /* Disable error reporting for unused labels. */
|
||||
// "allowUnreachableCode": true, /* Disable error reporting for unreachable code. */
|
||||
|
||||
/* Completeness */
|
||||
// "skipDefaultLibCheck": true, /* Skip type checking .d.ts files that are included with TypeScript. */
|
||||
"skipLibCheck": true /* Skip type checking all .d.ts files. */
|
||||
}
|
||||
}
|
||||
@@ -1,15 +0,0 @@
|
||||
import { defineConfig } from 'vite';
|
||||
import react from '@vitejs/plugin-react-swc';
|
||||
|
||||
export default defineConfig({
|
||||
plugins: [react()],
|
||||
server: {
|
||||
proxy: {
|
||||
// Proxy /api requests to the backend server
|
||||
'/connect': {
|
||||
target: 'http://0.0.0.0:7860', // Replace with your backend URL
|
||||
changeOrigin: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
@@ -1,4 +0,0 @@
|
||||
SENTRY_DSN=
|
||||
DEEPGRAM_API_KEY=
|
||||
CARTESIA_API_KEY=
|
||||
OPENAI_API_KEY=
|
||||
@@ -1,359 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import os
|
||||
import random
|
||||
from contextlib import asynccontextmanager
|
||||
from typing import Any, Dict
|
||||
|
||||
import sentry_sdk
|
||||
import uvicorn
|
||||
from dotenv import load_dotenv
|
||||
from fastapi import FastAPI, Request, WebSocket
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import RedirectResponse
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import (
|
||||
CancelFrame,
|
||||
EndFrame,
|
||||
Frame,
|
||||
InterimTranscriptionFrame,
|
||||
LLMFullResponseEndFrame,
|
||||
LLMMessagesFrame,
|
||||
StartFrame,
|
||||
StartInterruptionFrame,
|
||||
StopFrame,
|
||||
StopInterruptionFrame,
|
||||
TranscriptionFrame,
|
||||
TTSSpeakFrame,
|
||||
UserStartedSpeakingFrame,
|
||||
UserStoppedSpeakingFrame,
|
||||
)
|
||||
from pipecat.observers.loggers.debug_log_observer import DebugLogObserver
|
||||
from pipecat.pipeline.parallel_pipeline import ParallelPipeline
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import (
|
||||
OpenAILLMContext,
|
||||
OpenAILLMContextFrame,
|
||||
)
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.processors.frameworks.rtvi import RTVIConfig, RTVIProcessor
|
||||
from pipecat.processors.metrics.sentry import SentryMetrics
|
||||
from pipecat.processors.user_idle_processor import UserIdleProcessor
|
||||
from pipecat.serializers.protobuf import ProtobufFrameSerializer
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.network.fastapi_websocket import (
|
||||
FastAPIWebsocketParams,
|
||||
FastAPIWebsocketTransport,
|
||||
)
|
||||
from pipecat.utils.time import time_now_iso8601
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""Handles FastAPI startup and shutdown."""
|
||||
yield # Run app
|
||||
|
||||
|
||||
# Initialize FastAPI app with lifespan manager
|
||||
app = FastAPI(lifespan=lifespan)
|
||||
|
||||
# Configure CORS to allow requests from any origin
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
|
||||
class SimulateFreezeInput(FrameProcessor):
|
||||
def __init__(
|
||||
self,
|
||||
**kwargs,
|
||||
):
|
||||
super().__init__(**kwargs)
|
||||
# Whether we have seen a StartFrame already.
|
||||
self._initialized = False
|
||||
self._send_frames_task = None
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
if isinstance(frame, StartFrame):
|
||||
# Push StartFrame before start(), because we want StartFrame to be
|
||||
# processed by every processor before any other frame is processed.
|
||||
await self.push_frame(frame, direction)
|
||||
await self._start(frame)
|
||||
elif isinstance(frame, CancelFrame):
|
||||
logger.info("SimulateFreezeInput: Received cancel frame")
|
||||
await self._stop()
|
||||
await self.push_frame(frame, direction)
|
||||
elif isinstance(frame, EndFrame):
|
||||
logger.info("SimulateFreezeInput: Received end frame")
|
||||
await self.push_frame(frame, direction)
|
||||
await self._stop()
|
||||
elif isinstance(frame, StopFrame):
|
||||
logger.info("SimulateFreezeInput: Received stop frame")
|
||||
await self.push_frame(frame, direction)
|
||||
await self._stop()
|
||||
|
||||
async def _start(self, frame: StartFrame):
|
||||
if self._initialized:
|
||||
return
|
||||
logger.info(f"Starting SimulateFreezeInput")
|
||||
self._initialized = True
|
||||
if not self._send_frames_task:
|
||||
self._send_frames_task = self.create_task(self._send_frames())
|
||||
|
||||
async def _stop(self):
|
||||
logger.info(f"Stopping SimulateFreezeInput")
|
||||
self._initialized = False
|
||||
if self._send_frames_task:
|
||||
await self.cancel_task(self._send_frames_task)
|
||||
self._send_frames_task = None
|
||||
|
||||
async def _send_user_text(self, text: str):
|
||||
self.reset_watchdog()
|
||||
# Emulation as if the user has spoken and the stt transcribed
|
||||
await self.push_frame(UserStartedSpeakingFrame())
|
||||
await self.push_frame(StartInterruptionFrame())
|
||||
await self.push_frame(
|
||||
TranscriptionFrame(
|
||||
text,
|
||||
"",
|
||||
time_now_iso8601(),
|
||||
)
|
||||
)
|
||||
# Need to wait before sending the UserStoppedSpeakingFrame,
|
||||
# otherwise TranscriptionFrame will be processed
|
||||
# later than the UserStoppedSpeakingFrame
|
||||
await asyncio.sleep(0.1)
|
||||
await self.push_frame(UserStoppedSpeakingFrame())
|
||||
await self.push_frame(StopInterruptionFrame())
|
||||
|
||||
async def _send_frames(self):
|
||||
try:
|
||||
i = 0
|
||||
while True:
|
||||
logger.debug("SimulateFreezeInput _send_frames")
|
||||
await self._send_user_text("Tell me a brief history of Brazil!")
|
||||
await asyncio.sleep(3)
|
||||
await self._send_user_text("and who has discovered it")
|
||||
i += 1
|
||||
if i >= 20:
|
||||
break
|
||||
# sleeping 1s before interrupting
|
||||
wait_time = random.uniform(1, 10)
|
||||
await asyncio.sleep(wait_time)
|
||||
except Exception as e:
|
||||
logger.error(f"{self} exception receiving data: {e.__class__.__name__} ({e})")
|
||||
|
||||
|
||||
async def run_example(websocket_client):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
# Create a transport using the WebRTC connection
|
||||
transport = FastAPIWebsocketTransport(
|
||||
websocket=websocket_client,
|
||||
params=FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
add_wav_header=False,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
serializer=ProtobufFrameSerializer(),
|
||||
),
|
||||
)
|
||||
|
||||
sentry_sdk.init(
|
||||
dsn=os.getenv("SENTRY_DSN"),
|
||||
traces_sample_rate=1.0,
|
||||
)
|
||||
|
||||
freeze = SimulateFreezeInput()
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
async def handle_user_idle(user_idle: UserIdleProcessor, retry_count: int) -> bool:
|
||||
if retry_count == 1:
|
||||
# First attempt: Add a gentle prompt to the conversation
|
||||
messages.append(
|
||||
{
|
||||
"role": "system",
|
||||
"content": "The user has been quiet. Politely and briefly ask if they're still there.",
|
||||
}
|
||||
)
|
||||
await user_idle.push_frame(LLMMessagesFrame(messages))
|
||||
return True
|
||||
elif retry_count == 2:
|
||||
# Second attempt: More direct prompt
|
||||
messages.append(
|
||||
{
|
||||
"role": "system",
|
||||
"content": "The user is still inactive. Ask if they'd like to continue our conversation.",
|
||||
}
|
||||
)
|
||||
await user_idle.push_frame(LLMMessagesFrame(messages))
|
||||
return True
|
||||
else:
|
||||
# Third attempt: End the conversation
|
||||
await user_idle.push_frame(
|
||||
TTSSpeakFrame("It seems like you're busy right now. Have a nice day!")
|
||||
)
|
||||
await task.queue_frame(EndFrame())
|
||||
return False
|
||||
|
||||
user_idle = UserIdleProcessor(callback=handle_user_idle, timeout=10.0)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady
|
||||
metrics=SentryMetrics(),
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
metrics=SentryMetrics(),
|
||||
)
|
||||
|
||||
rtvi = RTVIProcessor(config=RTVIConfig(config=[]))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
ParallelPipeline(
|
||||
[
|
||||
freeze,
|
||||
],
|
||||
[
|
||||
transport.input(),
|
||||
stt,
|
||||
],
|
||||
),
|
||||
user_idle,
|
||||
rtvi,
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
audio_in_sample_rate=8000,
|
||||
audio_out_sample_rate=8000,
|
||||
),
|
||||
idle_timeout_secs=120,
|
||||
observers=[
|
||||
DebugLogObserver(
|
||||
frame_types={
|
||||
InterimTranscriptionFrame: None,
|
||||
TranscriptionFrame: None,
|
||||
# TTSTextFrame: None,
|
||||
# LLMTextFrame: None,
|
||||
OpenAILLMContextFrame: None,
|
||||
LLMFullResponseEndFrame: None,
|
||||
UserStartedSpeakingFrame: None,
|
||||
UserStoppedSpeakingFrame: None,
|
||||
StartInterruptionFrame: None,
|
||||
StopInterruptionFrame: None,
|
||||
},
|
||||
exclude_fields={
|
||||
"result",
|
||||
"metadata",
|
||||
"audio",
|
||||
"image",
|
||||
"images",
|
||||
},
|
||||
),
|
||||
],
|
||||
enable_watchdog_timers=True,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
|
||||
@rtvi.event_handler("on_client_ready")
|
||||
async def on_client_ready(rtvi):
|
||||
logger.info(f"Client ready")
|
||||
await rtvi.set_bot_ready()
|
||||
# Kick off the conversation.
|
||||
# messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
# await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=False)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
@app.get("/", include_in_schema=False)
|
||||
async def root_redirect():
|
||||
return RedirectResponse(url="/client/")
|
||||
|
||||
|
||||
@app.websocket("/ws")
|
||||
async def websocket_endpoint(websocket: WebSocket):
|
||||
await websocket.accept()
|
||||
print("WebSocket connection accepted")
|
||||
try:
|
||||
await run_example(websocket)
|
||||
except Exception as e:
|
||||
print(f"Exception in run_bot: {e}")
|
||||
|
||||
|
||||
@app.post("/connect")
|
||||
async def bot_connect(request: Request) -> Dict[Any, Any]:
|
||||
server_mode = os.getenv("WEBSOCKET_SERVER", "fast_api")
|
||||
if server_mode == "websocket_server":
|
||||
ws_url = "ws://localhost:8765"
|
||||
else:
|
||||
ws_url = "ws://localhost:7860/ws"
|
||||
return {"ws_url": ws_url}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Pipecat Bot Runner")
|
||||
parser.add_argument(
|
||||
"--host", default="localhost", help="Host for HTTP server (default: localhost)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--port", type=int, default=7860, help="Port for HTTP server (default: 7860)"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
uvicorn.run(app, host=args.host, port=args.port)
|
||||
@@ -1,4 +0,0 @@
|
||||
python-dotenv
|
||||
fastapi[all]
|
||||
uvicorn
|
||||
pipecat-ai[silero,websocket,openai, deepgram, cartesia, sentry]
|
||||
File diff suppressed because it is too large
Load Diff
@@ -18,7 +18,7 @@
|
||||
"vite": "^6.3.5"
|
||||
},
|
||||
"dependencies": {
|
||||
"@pipecat-ai/client-js": "^1.0.0",
|
||||
"@pipecat-ai/daily-transport": "^1.0.0"
|
||||
"@pipecat-ai/client-js": "^0.3.5",
|
||||
"@pipecat-ai/daily-transport": "^0.3.8"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
*/
|
||||
|
||||
/**
|
||||
* Pipecat Client Implementation
|
||||
* RTVI Client Implementation
|
||||
*
|
||||
* This client connects to an RTVI-compatible bot server using WebRTC (via Daily).
|
||||
* It handles audio/video streaming and manages the connection lifecycle.
|
||||
@@ -18,22 +18,20 @@
|
||||
|
||||
import {
|
||||
Participant,
|
||||
PipecatClient,
|
||||
PipecatClientOptions,
|
||||
RTVIClient,
|
||||
RTVIClientOptions,
|
||||
RTVIEvent,
|
||||
} from '@pipecat-ai/client-js';
|
||||
import {
|
||||
DailyEventCallbacks,
|
||||
DailyTransport,
|
||||
} from '@pipecat-ai/daily-transport';
|
||||
import { DailyTransport } from '@pipecat-ai/daily-transport';
|
||||
import SoundUtils from './util/soundUtils';
|
||||
import { InstantVoiceHelper } from './util/instantVoiceHelper';
|
||||
|
||||
/**
|
||||
* InstantVoiceClient handles the connection and media management for a real-time
|
||||
* voice and video interaction with an AI bot.
|
||||
*/
|
||||
class InstantVoiceClient {
|
||||
private declare pcClient: PipecatClient;
|
||||
private declare rtviClient: RTVIClient;
|
||||
private connectBtn: HTMLButtonElement | null = null;
|
||||
private disconnectBtn: HTMLButtonElement | null = null;
|
||||
private statusSpan: HTMLElement | null = null;
|
||||
@@ -48,7 +46,7 @@ class InstantVoiceClient {
|
||||
document.body.appendChild(this.botAudio);
|
||||
this.setupDOMElements();
|
||||
this.setupEventListeners();
|
||||
this.initializePipecatClient();
|
||||
this.initializeRTVIClient();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -74,11 +72,16 @@ class InstantVoiceClient {
|
||||
this.disconnectBtn?.addEventListener('click', () => this.disconnect());
|
||||
}
|
||||
|
||||
private initializePipecatClient(): void {
|
||||
const PipecatConfig: PipecatClientOptions = {
|
||||
private initializeRTVIClient(): void {
|
||||
const RTVIConfig: RTVIClientOptions = {
|
||||
transport: new DailyTransport({
|
||||
bufferLocalAudioUntilBotReady: true,
|
||||
}),
|
||||
params: {
|
||||
// The baseURL and endpoint of your bot server that the client will connect to
|
||||
baseUrl: 'http://localhost:7860',
|
||||
endpoints: { connect: '/connect' },
|
||||
},
|
||||
enableMic: true,
|
||||
enableCam: false,
|
||||
callbacks: {
|
||||
@@ -110,23 +113,30 @@ class InstantVoiceClient {
|
||||
onBotTranscript: (data) => this.log(`Bot: ${data.text}`),
|
||||
onMessageError: (error) => console.error('Message error:', error),
|
||||
onError: (error) => console.error('Error:', error),
|
||||
onAudioBufferingStarted: () => {
|
||||
SoundUtils.beep();
|
||||
this.updateBufferingStatus('Yes');
|
||||
this.log(
|
||||
`onMicCaptureStarted, timeTaken: ${Date.now() - this.startTime}`
|
||||
);
|
||||
},
|
||||
onAudioBufferingStopped: () => {
|
||||
this.updateBufferingStatus('No');
|
||||
this.log(
|
||||
`onMicCaptureStopped, timeTaken: ${Date.now() - this.startTime}`
|
||||
);
|
||||
},
|
||||
} as DailyEventCallbacks,
|
||||
},
|
||||
};
|
||||
|
||||
this.pcClient = new PipecatClient(PipecatConfig);
|
||||
this.rtviClient = new RTVIClient(RTVIConfig);
|
||||
this.rtviClient.registerHelper(
|
||||
'transport',
|
||||
new InstantVoiceHelper({
|
||||
callbacks: {
|
||||
onAudioBufferingStarted: () => {
|
||||
SoundUtils.beep();
|
||||
this.updateBufferingStatus('Yes');
|
||||
this.log(
|
||||
`onMicCaptureStarted, timeTaken: ${Date.now() - this.startTime}`
|
||||
);
|
||||
},
|
||||
onAudioBufferingStopped: () => {
|
||||
this.updateBufferingStatus('No');
|
||||
this.log(
|
||||
`onMicCaptureStopped, timeTaken: ${Date.now() - this.startTime}`
|
||||
);
|
||||
},
|
||||
},
|
||||
})
|
||||
);
|
||||
this.setupTrackListeners();
|
||||
}
|
||||
|
||||
@@ -172,8 +182,8 @@ class InstantVoiceClient {
|
||||
* This is called when the bot is ready or when the transport state changes to ready
|
||||
*/
|
||||
setupMediaTracks() {
|
||||
if (!this.pcClient) return;
|
||||
const tracks = this.pcClient.tracks();
|
||||
if (!this.rtviClient) return;
|
||||
const tracks = this.rtviClient.tracks();
|
||||
if (tracks.bot?.audio) {
|
||||
this.setupAudioTrack(tracks.bot.audio);
|
||||
}
|
||||
@@ -184,10 +194,10 @@ class InstantVoiceClient {
|
||||
* This handles new tracks being added during the session
|
||||
*/
|
||||
setupTrackListeners() {
|
||||
if (!this.pcClient) return;
|
||||
if (!this.rtviClient) return;
|
||||
|
||||
// Listen for new tracks starting
|
||||
this.pcClient.on(RTVIEvent.TrackStarted, (track, participant) => {
|
||||
this.rtviClient.on(RTVIEvent.TrackStarted, (track, participant) => {
|
||||
// Only handle non-local (bot) tracks
|
||||
if (!participant?.local && track.kind === 'audio') {
|
||||
this.setupAudioTrack(track);
|
||||
@@ -195,7 +205,7 @@ class InstantVoiceClient {
|
||||
});
|
||||
|
||||
// Listen for tracks stopping
|
||||
this.pcClient.on(RTVIEvent.TrackStopped, (track, participant) => {
|
||||
this.rtviClient.on(RTVIEvent.TrackStopped, (track, participant) => {
|
||||
this.log(
|
||||
`Track stopped: ${track.kind} from ${participant?.name || 'unknown'}`
|
||||
);
|
||||
@@ -220,25 +230,22 @@ class InstantVoiceClient {
|
||||
|
||||
/**
|
||||
* Initialize and connect to the bot
|
||||
* This sets up the Pipecat client, initializes devices, and establishes the connection
|
||||
* This sets up the RTVI client, initializes devices, and establishes the connection
|
||||
*/
|
||||
public async connect(): Promise<void> {
|
||||
try {
|
||||
this.startTime = Date.now();
|
||||
this.log('Connecting to bot...');
|
||||
await this.pcClient.connect({
|
||||
// The baseURL and endpoint of your bot server that the client will connect to
|
||||
endpoint: 'http://localhost:7860/connect',
|
||||
});
|
||||
await this.rtviClient.connect();
|
||||
} catch (error) {
|
||||
this.log(`Error connecting: ${(error as Error).message}`);
|
||||
this.updateStatus('Error');
|
||||
this.updateBufferingStatus('No');
|
||||
|
||||
// Clean up if there's an error
|
||||
if (this.pcClient) {
|
||||
if (this.rtviClient) {
|
||||
try {
|
||||
await this.pcClient.disconnect();
|
||||
await this.rtviClient.disconnect();
|
||||
} catch (disconnectError) {
|
||||
this.log(`Error during disconnect: ${disconnectError}`);
|
||||
}
|
||||
@@ -251,7 +258,7 @@ class InstantVoiceClient {
|
||||
*/
|
||||
public async disconnect(): Promise<void> {
|
||||
try {
|
||||
await this.pcClient.disconnect();
|
||||
await this.rtviClient.disconnect();
|
||||
if (
|
||||
this.botAudio.srcObject &&
|
||||
'getAudioTracks' in this.botAudio.srcObject
|
||||
|
||||
@@ -0,0 +1,39 @@
|
||||
import {RTVIClientHelper, RTVIClientHelperOptions, RTVIMessage} from "@pipecat-ai/client-js";
|
||||
import {DailyRTVIMessageType} from '@pipecat-ai/daily-transport';
|
||||
|
||||
export type InstantVoiceHelperCallbacks = Partial<{
|
||||
onAudioBufferingStarted: () => void;
|
||||
onAudioBufferingStopped: () => void;
|
||||
}>;
|
||||
|
||||
// --- Interface and class
|
||||
export interface InstantVoiceHelperOptions extends RTVIClientHelperOptions {
|
||||
callbacks?: InstantVoiceHelperCallbacks;
|
||||
}
|
||||
export class InstantVoiceHelper extends RTVIClientHelper {
|
||||
|
||||
protected declare _options: InstantVoiceHelperOptions;
|
||||
|
||||
constructor(options: InstantVoiceHelperOptions) {
|
||||
super(options);
|
||||
}
|
||||
|
||||
handleMessage(rtviMessage: RTVIMessage): void {
|
||||
switch (rtviMessage.type) {
|
||||
case DailyRTVIMessageType.AUDIO_BUFFERING_STARTED:
|
||||
if (this._options.callbacks?.onAudioBufferingStarted) {
|
||||
this._options.callbacks?.onAudioBufferingStarted()
|
||||
}
|
||||
break;
|
||||
case DailyRTVIMessageType.AUDIO_BUFFERING_STOPPED:
|
||||
if (this._options.callbacks?.onAudioBufferingStopped) {
|
||||
this._options.callbacks?.onAudioBufferingStopped()
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
getMessageTypes(): string[] {
|
||||
return [DailyRTVIMessageType.AUDIO_BUFFERING_STARTED, DailyRTVIMessageType.AUDIO_BUFFERING_STOPPED];
|
||||
}
|
||||
}
|
||||
@@ -143,7 +143,6 @@ async def main():
|
||||
DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
video_in_enabled=True,
|
||||
video_out_enabled=True,
|
||||
video_out_width=1024,
|
||||
video_out_height=576,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -15,7 +15,7 @@
|
||||
"vite": "^6.3.5"
|
||||
},
|
||||
"dependencies": {
|
||||
"@pipecat-ai/client-js": "^1.0.0",
|
||||
"@pipecat-ai/daily-transport": "^1.0.0"
|
||||
"@pipecat-ai/client-js": "^0.3.5",
|
||||
"@pipecat-ai/daily-transport": "^0.3.8"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
*/
|
||||
|
||||
/**
|
||||
* Pipecat Client Implementation
|
||||
* RTVI Client Implementation
|
||||
*
|
||||
* This client connects to an RTVI-compatible bot server using WebRTC (via Daily).
|
||||
* It handles audio/video streaming and manages the connection lifecycle.
|
||||
@@ -16,9 +16,78 @@
|
||||
* - Browser with WebRTC support
|
||||
*/
|
||||
|
||||
import { LogLevel, PipecatClient, RTVIEvent } from '@pipecat-ai/client-js';
|
||||
import {
|
||||
LogLevel,
|
||||
RTVIClient,
|
||||
RTVIClientHelper,
|
||||
RTVIEvent,
|
||||
} from '@pipecat-ai/client-js';
|
||||
import { DailyTransport } from '@pipecat-ai/daily-transport';
|
||||
|
||||
class SearchResponseHelper extends RTVIClientHelper {
|
||||
constructor(contentPanel) {
|
||||
super();
|
||||
this.contentPanel = contentPanel;
|
||||
}
|
||||
|
||||
handleMessage(rtviMessage) {
|
||||
console.log('SearchResponseHelper, received message:', rtviMessage);
|
||||
if (rtviMessage.data) {
|
||||
// Clear existing content
|
||||
this.contentPanel.innerHTML = '';
|
||||
|
||||
// Create a container for all content
|
||||
const contentContainer = document.createElement('div');
|
||||
contentContainer.className = 'content-container';
|
||||
|
||||
// Add the search_result
|
||||
if (rtviMessage.data.search_result) {
|
||||
const searchResultDiv = document.createElement('div');
|
||||
searchResultDiv.className = 'search-result';
|
||||
searchResultDiv.textContent = rtviMessage.data.search_result;
|
||||
contentContainer.appendChild(searchResultDiv);
|
||||
}
|
||||
|
||||
// Add the sources
|
||||
if (rtviMessage.data.origins) {
|
||||
const sourcesDiv = document.createElement('div');
|
||||
sourcesDiv.className = 'sources';
|
||||
|
||||
const sourcesTitle = document.createElement('h3');
|
||||
sourcesTitle.className = 'sources-title';
|
||||
sourcesTitle.textContent = 'Sources:';
|
||||
sourcesDiv.appendChild(sourcesTitle);
|
||||
|
||||
rtviMessage.data.origins.forEach((origin) => {
|
||||
const sourceLink = document.createElement('a');
|
||||
sourceLink.className = 'source-link';
|
||||
sourceLink.href = origin.site_uri;
|
||||
sourceLink.target = '_blank';
|
||||
sourceLink.textContent = origin.site_title;
|
||||
sourcesDiv.appendChild(sourceLink);
|
||||
});
|
||||
|
||||
contentContainer.appendChild(sourcesDiv);
|
||||
}
|
||||
|
||||
// Add the rendered_content in an iframe
|
||||
if (rtviMessage.data.rendered_content) {
|
||||
const iframe = document.createElement('iframe');
|
||||
iframe.className = 'iframe-container';
|
||||
iframe.srcdoc = rtviMessage.data.rendered_content;
|
||||
contentContainer.appendChild(iframe);
|
||||
}
|
||||
|
||||
// Append the content container to the content panel
|
||||
this.contentPanel.appendChild(contentContainer);
|
||||
}
|
||||
}
|
||||
|
||||
getMessageTypes() {
|
||||
return ['bot-llm-search-response'];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* ChatbotClient handles the connection and media management for a real-time
|
||||
* voice and video interaction with an AI bot.
|
||||
@@ -26,7 +95,7 @@ import { DailyTransport } from '@pipecat-ai/daily-transport';
|
||||
class ChatbotClient {
|
||||
constructor() {
|
||||
// Initialize client state
|
||||
this.pcClient = null;
|
||||
this.rtviClient = null;
|
||||
this.setupDOMElements();
|
||||
this.setupEventListeners();
|
||||
}
|
||||
@@ -91,10 +160,10 @@ class ChatbotClient {
|
||||
* This is called when the bot is ready or when the transport state changes to ready
|
||||
*/
|
||||
setupMediaTracks() {
|
||||
if (!this.pcClient) return;
|
||||
if (!this.rtviClient) return;
|
||||
|
||||
// Get current tracks from the client
|
||||
const tracks = this.pcClient.tracks();
|
||||
const tracks = this.rtviClient.tracks();
|
||||
|
||||
// Set up any available bot tracks
|
||||
if (tracks.bot?.audio) {
|
||||
@@ -107,10 +176,10 @@ class ChatbotClient {
|
||||
* This handles new tracks being added during the session
|
||||
*/
|
||||
setupTrackListeners() {
|
||||
if (!this.pcClient) return;
|
||||
if (!this.rtviClient) return;
|
||||
|
||||
// Listen for new tracks starting
|
||||
this.pcClient.on(RTVIEvent.TrackStarted, (track, participant) => {
|
||||
this.rtviClient.on(RTVIEvent.TrackStarted, (track, participant) => {
|
||||
// Only handle non-local (bot) tracks
|
||||
if (!participant?.local && track.kind === 'audio') {
|
||||
this.setupAudioTrack(track);
|
||||
@@ -118,7 +187,7 @@ class ChatbotClient {
|
||||
});
|
||||
|
||||
// Listen for tracks stopping
|
||||
this.pcClient.on(RTVIEvent.TrackStopped, (track, participant) => {
|
||||
this.rtviClient.on(RTVIEvent.TrackStopped, (track, participant) => {
|
||||
this.log(
|
||||
`Track stopped event: ${track.kind} from ${
|
||||
participant?.name || 'unknown'
|
||||
@@ -144,13 +213,20 @@ class ChatbotClient {
|
||||
|
||||
/**
|
||||
* Initialize and connect to the bot
|
||||
* This sets up the Pipecat client, initializes devices, and establishes the connection
|
||||
* This sets up the RTVI client, initializes devices, and establishes the connection
|
||||
*/
|
||||
async connect() {
|
||||
try {
|
||||
// Initialize the Pipecat client with a Daily WebRTC transport and our configuration
|
||||
this.pcClient = new PipecatClient({
|
||||
// Initialize the RTVI client with a Daily WebRTC transport and our configuration
|
||||
this.rtviClient = new RTVIClient({
|
||||
transport: new DailyTransport(),
|
||||
params: {
|
||||
// The baseURL and endpoint of your bot server that the client will connect to
|
||||
baseUrl: 'http://localhost:7860',
|
||||
endpoints: {
|
||||
connect: '/connect',
|
||||
},
|
||||
},
|
||||
enableMic: true, // Enable microphone for user input
|
||||
enableCam: false,
|
||||
callbacks: {
|
||||
@@ -175,8 +251,6 @@ class ChatbotClient {
|
||||
this.setupMediaTracks();
|
||||
}
|
||||
},
|
||||
// Handle search response events
|
||||
onBotLlmSearchResponse: this.handleSearchResponse.bind(this),
|
||||
// Handle bot connection events
|
||||
onBotConnected: (participant) => {
|
||||
this.log(`Bot connected: ${JSON.stringify(participant)}`);
|
||||
@@ -207,22 +281,22 @@ class ChatbotClient {
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
//this.pcClient.setLogLevel(LogLevel.DEBUG)
|
||||
//this.rtviClient.setLogLevel(LogLevel.DEBUG)
|
||||
this.rtviClient.registerHelper(
|
||||
'llm',
|
||||
new SearchResponseHelper(this.searchResultContainer)
|
||||
);
|
||||
|
||||
// Set up listeners for media track events
|
||||
this.setupTrackListeners();
|
||||
|
||||
// Initialize audio devices
|
||||
this.log('Initializing devices...');
|
||||
await this.pcClient.initDevices();
|
||||
await this.rtviClient.initDevices();
|
||||
|
||||
// Connect to the bot
|
||||
this.log('Connecting to bot...');
|
||||
await this.pcClient.connect({
|
||||
// The baseURL and endpoint of your bot server that the client will connect to
|
||||
endpoint: 'http://localhost:7860/connect',
|
||||
});
|
||||
await this.rtviClient.connect();
|
||||
|
||||
this.log('Connection complete');
|
||||
} catch (error) {
|
||||
@@ -232,9 +306,9 @@ class ChatbotClient {
|
||||
this.updateStatus('Error');
|
||||
|
||||
// Clean up if there's an error
|
||||
if (this.pcClient) {
|
||||
if (this.rtviClient) {
|
||||
try {
|
||||
await this.pcClient.disconnect();
|
||||
await this.rtviClient.disconnect();
|
||||
} catch (disconnectError) {
|
||||
this.log(`Error during disconnect: ${disconnectError.message}`);
|
||||
}
|
||||
@@ -246,11 +320,11 @@ class ChatbotClient {
|
||||
* Disconnect from the bot and clean up media resources
|
||||
*/
|
||||
async disconnect() {
|
||||
if (this.pcClient) {
|
||||
if (this.rtviClient) {
|
||||
try {
|
||||
// Disconnect the Pipecat client
|
||||
await this.pcClient.disconnect();
|
||||
this.pcClient = null;
|
||||
// Disconnect the RTVI client
|
||||
await this.rtviClient.disconnect();
|
||||
this.rtviClient = null;
|
||||
|
||||
// Clean up audio
|
||||
if (this.botAudio.srcObject) {
|
||||
@@ -265,57 +339,6 @@ class ChatbotClient {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
handleSearchResponse(response) {
|
||||
console.log('SearchResponseHelper, received message:', response);
|
||||
// Clear existing content
|
||||
this.searchResultContainer.innerHTML = '';
|
||||
|
||||
// Create a container for all content
|
||||
const contentContainer = document.createElement('div');
|
||||
contentContainer.className = 'content-container';
|
||||
|
||||
// Add the search_result
|
||||
if (response.search_result) {
|
||||
const searchResultDiv = document.createElement('div');
|
||||
searchResultDiv.className = 'search-result';
|
||||
searchResultDiv.textContent = response.search_result;
|
||||
contentContainer.appendChild(searchResultDiv);
|
||||
}
|
||||
|
||||
// Add the sources
|
||||
if (response.origins) {
|
||||
const sourcesDiv = document.createElement('div');
|
||||
sourcesDiv.className = 'sources';
|
||||
|
||||
const sourcesTitle = document.createElement('h3');
|
||||
sourcesTitle.className = 'sources-title';
|
||||
sourcesTitle.textContent = 'Sources:';
|
||||
sourcesDiv.appendChild(sourcesTitle);
|
||||
|
||||
response.origins.forEach((origin) => {
|
||||
const sourceLink = document.createElement('a');
|
||||
sourceLink.className = 'source-link';
|
||||
sourceLink.href = origin.site_uri;
|
||||
sourceLink.target = '_blank';
|
||||
sourceLink.textContent = origin.site_title;
|
||||
sourcesDiv.appendChild(sourceLink);
|
||||
});
|
||||
|
||||
contentContainer.appendChild(sourcesDiv);
|
||||
}
|
||||
|
||||
// Add the rendered_content in an iframe
|
||||
if (response.rendered_content) {
|
||||
const iframe = document.createElement('iframe');
|
||||
iframe.className = 'iframe-container';
|
||||
iframe.srcdoc = response.rendered_content;
|
||||
contentContainer.appendChild(iframe);
|
||||
}
|
||||
|
||||
// Append the content container to the content panel
|
||||
this.searchResultContainer.appendChild(contentContainer);
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize the client when the page loads
|
||||
|
||||
@@ -24,7 +24,6 @@ from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.llm_service import FunctionCallParams
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.utils.tracing.setup import setup_tracing
|
||||
|
||||
@@ -62,7 +61,7 @@ transport_params = {
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
"twilio": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
fastapi
|
||||
uvicorn
|
||||
python-dotenv
|
||||
pipecat-ai[daily,webrtc,silero,cartesia,deepgram,openai,tracing]
|
||||
pipecat-ai[webrtc,silero,cartesia,deepgram,openai,tracing]
|
||||
pipecat-ai-small-webrtc-prebuilt
|
||||
opentelemetry-exporter-otlp-proto-grpc
|
||||
@@ -26,7 +26,7 @@ Create a `.env` file with your API keys to enable tracing:
|
||||
```
|
||||
ENABLE_TRACING=true
|
||||
# OTLP endpoint for Langfuse
|
||||
OTEL_EXPORTER_OTLP_ENDPOINT=https://cloud.langfuse.com/api/public/otel
|
||||
OTEL_EXPORTER_OTLP_ENDPOINT=http://cloud.langfuse.com/api/public/otel
|
||||
OTEL_EXPORTER_OTLP_HEADERS=Authorization=Basic%20<base64_encoded_api_key>
|
||||
# Set to any value to enable console output for debugging
|
||||
# OTEL_CONSOLE_EXPORT=true
|
||||
|
||||
@@ -24,7 +24,6 @@ from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.llm_service import FunctionCallParams
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.network.fastapi_websocket import FastAPIWebsocketParams
|
||||
from pipecat.transports.services.daily import DailyParams
|
||||
from pipecat.utils.tracing.setup import setup_tracing
|
||||
|
||||
@@ -59,7 +58,7 @@ transport_params = {
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
"twilio": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
fastapi
|
||||
uvicorn
|
||||
python-dotenv
|
||||
pipecat-ai[daily,webrtc,silero,cartesia,deepgram,openai,tracing]
|
||||
pipecat-ai[webrtc,silero,cartesia,deepgram,openai,tracing]
|
||||
pipecat-ai-small-webrtc-prebuilt
|
||||
opentelemetry-exporter-otlp-proto-http
|
||||
File diff suppressed because it is too large
Load Diff
@@ -18,7 +18,7 @@
|
||||
"vite": "^6.3.5"
|
||||
},
|
||||
"dependencies": {
|
||||
"@pipecat-ai/client-js": "^1.0.0",
|
||||
"@pipecat-ai/small-webrtc-transport": "^1.0.0"
|
||||
"@pipecat-ai/client-js": "^0.3.2",
|
||||
"@pipecat-ai/small-webrtc-transport": "^0.0.2"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,236 +1,217 @@
|
||||
import { SmallWebRTCTransport } from '@pipecat-ai/small-webrtc-transport';
|
||||
import {
|
||||
BotLLMTextData,
|
||||
Participant,
|
||||
PipecatClient,
|
||||
PipecatClientOptions,
|
||||
TranscriptData,
|
||||
TransportState,
|
||||
} from '@pipecat-ai/client-js';
|
||||
SmallWebRTCTransport
|
||||
} from "@pipecat-ai/small-webrtc-transport";
|
||||
import {Participant, RTVIClient, RTVIClientOptions, Transport} from "@pipecat-ai/client-js";
|
||||
|
||||
class WebRTCApp {
|
||||
private declare connectBtn: HTMLButtonElement;
|
||||
private declare disconnectBtn: HTMLButtonElement;
|
||||
private declare muteBtn: HTMLButtonElement;
|
||||
|
||||
private declare audioInput: HTMLSelectElement;
|
||||
private declare videoInput: HTMLSelectElement;
|
||||
private declare audioCodec: HTMLSelectElement;
|
||||
private declare videoCodec: HTMLSelectElement;
|
||||
private declare connectBtn: HTMLButtonElement;
|
||||
private declare disconnectBtn: HTMLButtonElement;
|
||||
private declare muteBtn: HTMLButtonElement;
|
||||
|
||||
private declare videoElement: HTMLVideoElement;
|
||||
private declare audioElement: HTMLAudioElement;
|
||||
private declare audioInput: HTMLSelectElement;
|
||||
private declare videoInput: HTMLSelectElement;
|
||||
private declare audioCodec: HTMLSelectElement;
|
||||
private declare videoCodec: HTMLSelectElement;
|
||||
|
||||
private debugLog: HTMLElement | null = null;
|
||||
private statusSpan: HTMLElement | null = null;
|
||||
private declare videoElement: HTMLVideoElement;
|
||||
private declare audioElement: HTMLAudioElement;
|
||||
|
||||
private declare smallWebRTCTransport: SmallWebRTCTransport;
|
||||
private declare pcClient: PipecatClient;
|
||||
private debugLog: HTMLElement | null = null;
|
||||
private statusSpan: HTMLElement | null = null;
|
||||
|
||||
constructor() {
|
||||
this.setupDOMElements();
|
||||
this.setupDOMEventListeners();
|
||||
this.initializePipecatClient();
|
||||
void this.populateDevices();
|
||||
}
|
||||
private declare smallWebRTCTransport: SmallWebRTCTransport;
|
||||
private declare rtviClient: RTVIClient;
|
||||
|
||||
private initializePipecatClient(): void {
|
||||
const opts: PipecatClientOptions = {
|
||||
transport: new SmallWebRTCTransport({ connectionUrl: '/api/offer' }),
|
||||
enableMic: true,
|
||||
enableCam: true,
|
||||
callbacks: {
|
||||
onTransportStateChanged: (state: TransportState) => {
|
||||
this.log(`Transport state: ${state}`);
|
||||
},
|
||||
onConnected: () => {
|
||||
this.onConnectedHandler();
|
||||
},
|
||||
onBotReady: () => {
|
||||
this.log('Bot is ready.');
|
||||
},
|
||||
onDisconnected: () => {
|
||||
this.onDisconnectedHandler();
|
||||
},
|
||||
onUserStartedSpeaking: () => {
|
||||
this.log('User started speaking.');
|
||||
},
|
||||
onUserStoppedSpeaking: () => {
|
||||
this.log('User stopped speaking.');
|
||||
},
|
||||
onBotStartedSpeaking: () => {
|
||||
this.log('Bot started speaking.');
|
||||
},
|
||||
onBotStoppedSpeaking: () => {
|
||||
this.log('Bot stopped speaking.');
|
||||
},
|
||||
onUserTranscript: (transcript: TranscriptData) => {
|
||||
if (transcript.final) {
|
||||
this.log(`User transcript: ${transcript.text}`);
|
||||
}
|
||||
},
|
||||
onBotTranscript: (data: BotLLMTextData) => {
|
||||
this.log(`Bot transcript: ${data.text}`);
|
||||
},
|
||||
onTrackStarted: (
|
||||
track: MediaStreamTrack,
|
||||
participant?: Participant
|
||||
) => {
|
||||
if (participant?.local) {
|
||||
return;
|
||||
}
|
||||
this.onBotTrackStarted(track);
|
||||
},
|
||||
onServerMessage: (msg: unknown) => {
|
||||
this.log(`Server message: ${msg}`);
|
||||
},
|
||||
},
|
||||
};
|
||||
this.pcClient = new PipecatClient(opts);
|
||||
this.smallWebRTCTransport = this.pcClient.transport as SmallWebRTCTransport;
|
||||
}
|
||||
|
||||
private setupDOMElements(): void {
|
||||
this.connectBtn = document.getElementById(
|
||||
'connect-btn'
|
||||
) as HTMLButtonElement;
|
||||
this.disconnectBtn = document.getElementById(
|
||||
'disconnect-btn'
|
||||
) as HTMLButtonElement;
|
||||
this.muteBtn = document.getElementById('mute-btn') as HTMLButtonElement;
|
||||
|
||||
this.audioInput = document.getElementById(
|
||||
'audio-input'
|
||||
) as HTMLSelectElement;
|
||||
this.videoInput = document.getElementById(
|
||||
'video-input'
|
||||
) as HTMLSelectElement;
|
||||
this.audioCodec = document.getElementById(
|
||||
'audio-codec'
|
||||
) as HTMLSelectElement;
|
||||
this.videoCodec = document.getElementById(
|
||||
'video-codec'
|
||||
) as HTMLSelectElement;
|
||||
|
||||
this.videoElement = document.getElementById(
|
||||
'bot-video'
|
||||
) as HTMLVideoElement;
|
||||
this.audioElement = document.getElementById(
|
||||
'bot-audio'
|
||||
) as HTMLAudioElement;
|
||||
|
||||
this.debugLog = document.getElementById('debug-log');
|
||||
this.statusSpan = document.getElementById('connection-status');
|
||||
}
|
||||
|
||||
private setupDOMEventListeners(): void {
|
||||
this.connectBtn.addEventListener('click', () => this.start());
|
||||
this.disconnectBtn.addEventListener('click', () => this.stop());
|
||||
this.audioInput.addEventListener('change', (e) => {
|
||||
// @ts-ignore
|
||||
let audioDevice = e.target?.value;
|
||||
this.pcClient.updateMic(audioDevice);
|
||||
});
|
||||
this.videoInput.addEventListener('change', (e) => {
|
||||
// @ts-ignore
|
||||
let videoDevice = e.target?.value;
|
||||
this.pcClient.updateCam(videoDevice);
|
||||
});
|
||||
this.muteBtn.addEventListener('click', () => {
|
||||
let isCamEnabled = this.pcClient.isCamEnabled;
|
||||
this.pcClient.enableCam(!isCamEnabled);
|
||||
this.muteBtn.textContent = isCamEnabled ? '📵' : '📷';
|
||||
});
|
||||
}
|
||||
|
||||
private log(message: string): void {
|
||||
if (!this.debugLog) return;
|
||||
const entry = document.createElement('div');
|
||||
entry.textContent = `${new Date().toISOString()} - ${message}`;
|
||||
if (message.startsWith('User: ')) {
|
||||
entry.style.color = '#2196F3';
|
||||
} else if (message.startsWith('Bot: ')) {
|
||||
entry.style.color = '#4CAF50';
|
||||
constructor() {
|
||||
this.setupDOMElements();
|
||||
this.setupDOMEventListeners();
|
||||
this.initializeRTVIClient()
|
||||
void this.populateDevices();
|
||||
}
|
||||
this.debugLog.appendChild(entry);
|
||||
this.debugLog.scrollTop = this.debugLog.scrollHeight;
|
||||
}
|
||||
|
||||
private clearAllLogs() {
|
||||
this.debugLog!.innerText = '';
|
||||
}
|
||||
|
||||
private updateStatus(status: string): void {
|
||||
if (this.statusSpan) {
|
||||
this.statusSpan.textContent = status;
|
||||
private initializeRTVIClient(): void {
|
||||
const transport = new SmallWebRTCTransport();
|
||||
const RTVIConfig: RTVIClientOptions = {
|
||||
params: {
|
||||
baseUrl: "/api/offer"
|
||||
},
|
||||
transport: transport as Transport,
|
||||
enableMic: true,
|
||||
enableCam: true,
|
||||
callbacks: {
|
||||
onTransportStateChanged: (state) => {
|
||||
this.log(`Transport state: ${state}`)
|
||||
},
|
||||
onConnected: () => {
|
||||
this.onConnectedHandler()
|
||||
},
|
||||
onBotReady: () => {
|
||||
this.log("Bot is ready.")
|
||||
},
|
||||
onDisconnected: () => {
|
||||
this.onDisconnectedHandler()
|
||||
},
|
||||
onUserStartedSpeaking: () => {
|
||||
this.log("User started speaking.")
|
||||
},
|
||||
onUserStoppedSpeaking: () => {
|
||||
this.log("User stopped speaking.")
|
||||
},
|
||||
onBotStartedSpeaking: () => {
|
||||
this.log("Bot started speaking.")
|
||||
},
|
||||
onBotStoppedSpeaking: () => {
|
||||
this.log("Bot stopped speaking.")
|
||||
},
|
||||
onUserTranscript: (transcript) => {
|
||||
if (transcript.final) {
|
||||
this.log(`User transcript: ${transcript.text}`)
|
||||
}
|
||||
},
|
||||
onBotTranscript: (transcript) => {
|
||||
this.log(`Bot transcript: ${transcript.text}`)
|
||||
},
|
||||
onTrackStarted: (track: MediaStreamTrack, participant?: Participant) => {
|
||||
if (participant?.local) {
|
||||
return
|
||||
}
|
||||
this.onBotTrackStarted(track)
|
||||
},
|
||||
onServerMessage: (msg) => {
|
||||
this.log(`Server message: ${msg}`)
|
||||
}
|
||||
},
|
||||
}
|
||||
RTVIConfig.customConnectHandler = () => Promise.resolve();
|
||||
this.rtviClient = new RTVIClient(RTVIConfig);
|
||||
this.smallWebRTCTransport = transport
|
||||
}
|
||||
this.log(`Status: ${status}`);
|
||||
}
|
||||
|
||||
private onConnectedHandler() {
|
||||
this.updateStatus('Connected');
|
||||
if (this.connectBtn) this.connectBtn.disabled = true;
|
||||
if (this.disconnectBtn) this.disconnectBtn.disabled = false;
|
||||
}
|
||||
private setupDOMElements(): void {
|
||||
this.connectBtn = document.getElementById('connect-btn') as HTMLButtonElement;
|
||||
this.disconnectBtn = document.getElementById('disconnect-btn') as HTMLButtonElement;
|
||||
this.muteBtn = document.getElementById('mute-btn') as HTMLButtonElement;
|
||||
|
||||
private onDisconnectedHandler() {
|
||||
this.updateStatus('Disconnected');
|
||||
if (this.connectBtn) this.connectBtn.disabled = false;
|
||||
if (this.disconnectBtn) this.disconnectBtn.disabled = true;
|
||||
}
|
||||
this.audioInput = document.getElementById('audio-input') as HTMLSelectElement;
|
||||
this.videoInput = document.getElementById('video-input') as HTMLSelectElement;
|
||||
this.audioCodec = document.getElementById('audio-codec') as HTMLSelectElement;
|
||||
this.videoCodec = document.getElementById('video-codec') as HTMLSelectElement;
|
||||
|
||||
private onBotTrackStarted(track: MediaStreamTrack) {
|
||||
if (track.kind === 'video') {
|
||||
this.videoElement.srcObject = new MediaStream([track]);
|
||||
} else {
|
||||
this.audioElement.srcObject = new MediaStream([track]);
|
||||
this.videoElement = document.getElementById('bot-video') as HTMLVideoElement;
|
||||
this.audioElement = document.getElementById('bot-audio') as HTMLAudioElement;
|
||||
|
||||
this.debugLog = document.getElementById('debug-log');
|
||||
this.statusSpan = document.getElementById('connection-status');
|
||||
}
|
||||
}
|
||||
|
||||
private async populateDevices(): Promise<void> {
|
||||
const populateSelect = (
|
||||
select: HTMLSelectElement,
|
||||
devices: MediaDeviceInfo[]
|
||||
): void => {
|
||||
let counter = 1;
|
||||
devices.forEach((device) => {
|
||||
const option = document.createElement('option');
|
||||
option.value = device.deviceId;
|
||||
option.text = device.label || 'Device #' + counter;
|
||||
select.appendChild(option);
|
||||
counter += 1;
|
||||
});
|
||||
};
|
||||
private setupDOMEventListeners(): void {
|
||||
this.connectBtn.addEventListener("click", () => this.start());
|
||||
this.disconnectBtn.addEventListener("click", () => this.stop());
|
||||
this.audioInput.addEventListener("change", (e) => {
|
||||
// @ts-ignore
|
||||
let audioDevice = e.target?.value
|
||||
this.rtviClient.updateMic(audioDevice)
|
||||
})
|
||||
this.videoInput.addEventListener("change", (e) => {
|
||||
// @ts-ignore
|
||||
let videoDevice = e.target?.value
|
||||
this.rtviClient.updateCam(videoDevice)
|
||||
})
|
||||
this.muteBtn.addEventListener('click', () => {
|
||||
let isCamEnabled = this.rtviClient.isCamEnabled
|
||||
this.rtviClient.enableCam(!isCamEnabled)
|
||||
this.muteBtn.textContent = isCamEnabled ? '📵' : '📷';
|
||||
});
|
||||
|
||||
try {
|
||||
const audioDevices = await this.pcClient.getAllMics();
|
||||
populateSelect(this.audioInput, audioDevices);
|
||||
const videoDevices = await this.pcClient.getAllCams();
|
||||
populateSelect(this.videoInput, videoDevices);
|
||||
} catch (e) {
|
||||
alert(e);
|
||||
}
|
||||
}
|
||||
|
||||
private async start(): Promise<void> {
|
||||
this.clearAllLogs();
|
||||
|
||||
this.connectBtn.disabled = true;
|
||||
this.updateStatus('Connecting');
|
||||
|
||||
this.smallWebRTCTransport.setAudioCodec(this.audioCodec.value);
|
||||
this.smallWebRTCTransport.setVideoCodec(this.videoCodec.value);
|
||||
try {
|
||||
await this.pcClient.connect();
|
||||
} catch (e) {
|
||||
console.log(`Failed to connect ${e}`);
|
||||
this.stop();
|
||||
private log(message: string): void {
|
||||
if (!this.debugLog) return;
|
||||
const entry = document.createElement('div');
|
||||
entry.textContent = `${new Date().toISOString()} - ${message}`;
|
||||
if (message.startsWith('User: ')) {
|
||||
entry.style.color = '#2196F3';
|
||||
} else if (message.startsWith('Bot: ')) {
|
||||
entry.style.color = '#4CAF50';
|
||||
}
|
||||
this.debugLog.appendChild(entry);
|
||||
this.debugLog.scrollTop = this.debugLog.scrollHeight;
|
||||
}
|
||||
}
|
||||
|
||||
private stop(): void {
|
||||
void this.pcClient.disconnect();
|
||||
}
|
||||
private clearAllLogs() {
|
||||
this.debugLog!.innerText = ''
|
||||
}
|
||||
|
||||
private updateStatus(status: string): void {
|
||||
if (this.statusSpan) {
|
||||
this.statusSpan.textContent = status;
|
||||
}
|
||||
this.log(`Status: ${status}`);
|
||||
}
|
||||
|
||||
private onConnectedHandler() {
|
||||
this.updateStatus('Connected');
|
||||
if (this.connectBtn) this.connectBtn.disabled = true;
|
||||
if (this.disconnectBtn) this.disconnectBtn.disabled = false;
|
||||
}
|
||||
|
||||
private onDisconnectedHandler() {
|
||||
this.updateStatus('Disconnected');
|
||||
if (this.connectBtn) this.connectBtn.disabled = false;
|
||||
if (this.disconnectBtn) this.disconnectBtn.disabled = true;
|
||||
}
|
||||
|
||||
private onBotTrackStarted(track: MediaStreamTrack) {
|
||||
if (track.kind === 'video') {
|
||||
this.videoElement.srcObject = new MediaStream([track]);
|
||||
} else {
|
||||
this.audioElement.srcObject = new MediaStream([track]);
|
||||
}
|
||||
}
|
||||
|
||||
private async populateDevices(): Promise<void> {
|
||||
const populateSelect = (select: HTMLSelectElement, devices: MediaDeviceInfo[]): void => {
|
||||
let counter = 1;
|
||||
devices.forEach((device) => {
|
||||
const option = document.createElement('option');
|
||||
option.value = device.deviceId;
|
||||
option.text = device.label || ('Device #' + counter);
|
||||
select.appendChild(option);
|
||||
counter += 1;
|
||||
});
|
||||
};
|
||||
|
||||
try {
|
||||
const audioDevices = await this.rtviClient.getAllMics();
|
||||
populateSelect(this.audioInput, audioDevices);
|
||||
const videoDevices = await this.rtviClient.getAllCams();
|
||||
populateSelect(this.videoInput, videoDevices);
|
||||
} catch (e) {
|
||||
alert(e);
|
||||
}
|
||||
}
|
||||
|
||||
private async start(): Promise<void> {
|
||||
this.clearAllLogs()
|
||||
|
||||
this.connectBtn.disabled = true;
|
||||
this.updateStatus("Connecting")
|
||||
|
||||
this.smallWebRTCTransport.setAudioCodec(this.audioCodec.value)
|
||||
this.smallWebRTCTransport.setVideoCodec(this.videoCodec.value)
|
||||
try {
|
||||
await this.rtviClient.connect()
|
||||
} catch (e) {
|
||||
console.log(`Failed to connect ${e}`)
|
||||
this.stop()
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private stop(): void {
|
||||
void this.rtviClient.disconnect()
|
||||
}
|
||||
}
|
||||
|
||||
// Create the WebRTCConnection instance
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
pipecat-ai[daily,cartesia,openai,silero]
|
||||
pipecat-ai[daily,elevenlabs,openai,silero]
|
||||
fastapi==0.115.6
|
||||
uvicorn
|
||||
python-dotenv
|
||||
|
||||
@@ -49,7 +49,7 @@ async def main():
|
||||
|
||||
# Initialize Sentry
|
||||
sentry_sdk.init(
|
||||
dsn=os.getenv("SENTRY_DSN"),
|
||||
dsn="your-project-dsn",
|
||||
traces_sample_rate=1.0,
|
||||
)
|
||||
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
90031FC22C616EE900408370 /* SimpleChatbotUITests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 90031FC12C616EE900408370 /* SimpleChatbotUITests.swift */; };
|
||||
90031FC42C616EE900408370 /* SimpleChatbotUITestsLaunchTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 90031FC32C616EE900408370 /* SimpleChatbotUITestsLaunchTests.swift */; };
|
||||
90031FDC2C6D5DD700408370 /* ToastModifier.swift in Sources */ = {isa = PBXBuildFile; fileRef = 90031FDB2C6D5DD700408370 /* ToastModifier.swift */; };
|
||||
907C98842D37E6AF0079441F /* PipecatClientIOSDaily in Frameworks */ = {isa = PBXBuildFile; productRef = 907C98832D37E6AF0079441F /* PipecatClientIOSDaily */; };
|
||||
90ABB98E2C735ED6000D9CC7 /* MeetingView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 90ABB98D2C735ED6000D9CC7 /* MeetingView.swift */; };
|
||||
90ABB9902C736A8B000D9CC7 /* WaveformView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 90ABB98F2C736A8B000D9CC7 /* WaveformView.swift */; };
|
||||
90ABB9932C73820D000D9CC7 /* MicrophoneView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 90ABB9922C73820D000D9CC7 /* MicrophoneView.swift */; };
|
||||
@@ -24,8 +25,6 @@
|
||||
90ABB9A32C74E1CE000D9CC7 /* SettingsView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 90ABB9A22C74E1CE000D9CC7 /* SettingsView.swift */; };
|
||||
90ABB9A62C74EA8A000D9CC7 /* SettingsPreference.swift in Sources */ = {isa = PBXBuildFile; fileRef = 90ABB9A52C74EA8A000D9CC7 /* SettingsPreference.swift */; };
|
||||
90ABB9A82C74EAB1000D9CC7 /* SettingsManager.swift in Sources */ = {isa = PBXBuildFile; fileRef = 90ABB9A72C74EAB1000D9CC7 /* SettingsManager.swift */; };
|
||||
90CC98B02E158093003C2706 /* PipecatClientIOSDaily in Frameworks */ = {isa = PBXBuildFile; productRef = 90CC98AF2E158093003C2706 /* PipecatClientIOSDaily */; };
|
||||
90CC98B62E15820B003C2706 /* PipecatClientIOSDaily in Frameworks */ = {isa = PBXBuildFile; productRef = 90CC98B52E15820B003C2706 /* PipecatClientIOSDaily */; };
|
||||
/* End PBXBuildFile section */
|
||||
|
||||
/* Begin PBXContainerItemProxy section */
|
||||
@@ -74,8 +73,7 @@
|
||||
isa = PBXFrameworksBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
90CC98B62E15820B003C2706 /* PipecatClientIOSDaily in Frameworks */,
|
||||
90CC98B02E158093003C2706 /* PipecatClientIOSDaily in Frameworks */,
|
||||
907C98842D37E6AF0079441F /* PipecatClientIOSDaily in Frameworks */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
@@ -220,8 +218,7 @@
|
||||
);
|
||||
name = SimpleChatbot;
|
||||
packageProductDependencies = (
|
||||
90CC98AF2E158093003C2706 /* PipecatClientIOSDaily */,
|
||||
90CC98B52E15820B003C2706 /* PipecatClientIOSDaily */,
|
||||
907C98832D37E6AF0079441F /* PipecatClientIOSDaily */,
|
||||
);
|
||||
productName = SimpleChatbot;
|
||||
productReference = 90031FA32C616EE700408370 /* SimpleChatbot.app */;
|
||||
@@ -296,7 +293,7 @@
|
||||
);
|
||||
mainGroup = 90031F9A2C616EE700408370;
|
||||
packageReferences = (
|
||||
90CC98B42E15820B003C2706 /* XCRemoteSwiftPackageReference "pipecat-client-ios-daily" */,
|
||||
907C98822D37E6AF0079441F /* XCRemoteSwiftPackageReference "pipecat-client-ios-daily" */,
|
||||
);
|
||||
productRefGroup = 90031FA42C616EE700408370 /* Products */;
|
||||
projectDirPath = "";
|
||||
@@ -685,24 +682,20 @@
|
||||
/* End XCConfigurationList section */
|
||||
|
||||
/* Begin XCRemoteSwiftPackageReference section */
|
||||
90CC98B42E15820B003C2706 /* XCRemoteSwiftPackageReference "pipecat-client-ios-daily" */ = {
|
||||
907C98822D37E6AF0079441F /* XCRemoteSwiftPackageReference "pipecat-client-ios-daily" */ = {
|
||||
isa = XCRemoteSwiftPackageReference;
|
||||
repositoryURL = "https://github.com/pipecat-ai/pipecat-client-ios-daily/";
|
||||
requirement = {
|
||||
kind = upToNextMajorVersion;
|
||||
minimumVersion = 0.3.6;
|
||||
minimumVersion = 0.3.2;
|
||||
};
|
||||
};
|
||||
/* End XCRemoteSwiftPackageReference section */
|
||||
|
||||
/* Begin XCSwiftPackageProductDependency section */
|
||||
90CC98AF2E158093003C2706 /* PipecatClientIOSDaily */ = {
|
||||
907C98832D37E6AF0079441F /* PipecatClientIOSDaily */ = {
|
||||
isa = XCSwiftPackageProductDependency;
|
||||
productName = PipecatClientIOSDaily;
|
||||
};
|
||||
90CC98B52E15820B003C2706 /* PipecatClientIOSDaily */ = {
|
||||
isa = XCSwiftPackageProductDependency;
|
||||
package = 90CC98B42E15820B003C2706 /* XCRemoteSwiftPackageReference "pipecat-client-ios-daily" */;
|
||||
package = 907C98822D37E6AF0079441F /* XCRemoteSwiftPackageReference "pipecat-client-ios-daily" */;
|
||||
productName = PipecatClientIOSDaily;
|
||||
};
|
||||
/* End XCSwiftPackageProductDependency section */
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
{
|
||||
"originHash" : "cc17f08b06def9570d775e9c6f7a8dc10d1588b98127e977c47d052abac659b7",
|
||||
"pins" : [
|
||||
{
|
||||
"identity" : "daily-client-ios",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/daily-co/daily-client-ios.git",
|
||||
"state" : {
|
||||
"revision" : "431938db25e5807120e89e2dc5bab1c076729f59",
|
||||
"version" : "0.31.0"
|
||||
"revision" : "15804ce495780da3ec2d05ab99736315f7bfbd24",
|
||||
"version" : "0.28.0"
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -15,8 +14,8 @@
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/pipecat-ai/pipecat-client-ios.git",
|
||||
"state" : {
|
||||
"revision" : "f92b5e68e56a8311f7d8ead68a7a5674843cbc40",
|
||||
"version" : "0.3.6"
|
||||
"revision" : "c679512e367002a1a67da85d503fec72d9b17191",
|
||||
"version" : "0.3.2"
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -24,10 +23,10 @@
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/pipecat-ai/pipecat-client-ios-daily/",
|
||||
"state" : {
|
||||
"revision" : "8f494da903192c22c367ecf9e51248c9b651fbc6",
|
||||
"version" : "0.3.6"
|
||||
"revision" : "a337fe6642c52376d2f90eafcb965f5be772ce72",
|
||||
"version" : "0.3.2"
|
||||
}
|
||||
}
|
||||
],
|
||||
"version" : 3
|
||||
"version" : 2
|
||||
}
|
||||
|
||||
@@ -78,11 +78,10 @@ class CallContainerModel: ObservableObject {
|
||||
self.saveCredentials(backendURL: baseUrl)
|
||||
}
|
||||
|
||||
@MainActor
|
||||
func disconnect() {
|
||||
Task { @MainActor in
|
||||
try await self.rtviClientIOS?.disconnect()
|
||||
self.rtviClientIOS?.release()
|
||||
}
|
||||
self.rtviClientIOS?.disconnect(completion: nil)
|
||||
self.rtviClientIOS?.release()
|
||||
}
|
||||
|
||||
func showError(message: String) {
|
||||
|
||||
@@ -1,44 +1,40 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>AI Chatbot</title>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<div class="container">
|
||||
<div class="status-bar">
|
||||
<div class="status">
|
||||
Status: <span id="connection-status">Disconnected</span>
|
||||
</div>
|
||||
<div class="controls">
|
||||
<button id="connect-btn">Connect</button>
|
||||
<button id="disconnect-btn" disabled>Disconnect</button>
|
||||
</div>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>AI Chatbot</title>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<div class="container">
|
||||
<div class="status-bar">
|
||||
<div class="status">
|
||||
Status: <span id="connection-status">Disconnected</span>
|
||||
</div>
|
||||
|
||||
<div class="main-content">
|
||||
<div class="bot-container">
|
||||
<div id="bot-video-container"></div>
|
||||
<audio id="bot-audio" autoplay></audio>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="device-bar">
|
||||
<div class="device-controls">
|
||||
<select id="device-selector"></select>
|
||||
<button id="mic-toggle-btn">Unmute Mic</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="debug-panel">
|
||||
<h3>Debug Info</h3>
|
||||
<div id="debug-log"></div>
|
||||
<div class="controls">
|
||||
<button id="connect-btn">Connect</button>
|
||||
<button id="disconnect-btn" disabled>Disconnect</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script type="module" src="/src/app.js"></script>
|
||||
<link rel="stylesheet" href="/src/style.css" />
|
||||
</body>
|
||||
</html>
|
||||
<div class="main-content">
|
||||
<div class="bot-container">
|
||||
<div id="bot-video-container">
|
||||
</div>
|
||||
<audio id="bot-audio" autoplay></audio>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="debug-panel">
|
||||
<h3>Debug Info</h3>
|
||||
<div id="debug-log"></div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script type="module" src="/src/app.js"></script>
|
||||
<link rel="stylesheet" href="/src/style.css">
|
||||
</body>
|
||||
|
||||
</html>
|
||||
File diff suppressed because it is too large
Load Diff
@@ -15,7 +15,7 @@
|
||||
"vite": "^6.3.5"
|
||||
},
|
||||
"dependencies": {
|
||||
"@pipecat-ai/client-js": "^1.0.0",
|
||||
"@pipecat-ai/daily-transport": "^1.0.0"
|
||||
"@pipecat-ai/client-js": "^0.3.5",
|
||||
"@pipecat-ai/daily-transport": "^0.3.8"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
*/
|
||||
|
||||
/**
|
||||
* Pipecat Client Implementation
|
||||
* RTVI Client Implementation
|
||||
*
|
||||
* This client connects to an RTVI-compatible bot server using WebRTC (via Daily).
|
||||
* It handles audio/video streaming and manages the connection lifecycle.
|
||||
@@ -16,7 +16,7 @@
|
||||
* - Browser with WebRTC support
|
||||
*/
|
||||
|
||||
import { PipecatClient, RTVIEvent } from '@pipecat-ai/client-js';
|
||||
import { RTVIClient, RTVIEvent } from '@pipecat-ai/client-js';
|
||||
import { DailyTransport } from '@pipecat-ai/daily-transport';
|
||||
|
||||
/**
|
||||
@@ -26,8 +26,9 @@ import { DailyTransport } from '@pipecat-ai/daily-transport';
|
||||
class ChatbotClient {
|
||||
constructor() {
|
||||
// Initialize client state
|
||||
this.pcClient = null;
|
||||
this.rtviClient = null;
|
||||
this.setupDOMElements();
|
||||
this.setupEventListeners();
|
||||
this.initializeClientAndTransport();
|
||||
}
|
||||
|
||||
@@ -41,8 +42,6 @@ class ChatbotClient {
|
||||
this.statusSpan = document.getElementById('connection-status');
|
||||
this.debugLog = document.getElementById('debug-log');
|
||||
this.botVideoContainer = document.getElementById('bot-video-container');
|
||||
this.deviceSelector = document.getElementById('device-selector');
|
||||
this.micToggleBtn = document.getElementById('mic-toggle-btn');
|
||||
|
||||
// Create an audio element for bot's voice output
|
||||
this.botAudio = document.createElement('audio');
|
||||
@@ -55,53 +54,25 @@ class ChatbotClient {
|
||||
* Set up event listeners for connect/disconnect buttons
|
||||
*/
|
||||
setupEventListeners() {
|
||||
this.connectBtn.addEventListener('click', () => {
|
||||
console.log('click');
|
||||
this.connect();
|
||||
});
|
||||
this.connectBtn.addEventListener('click', () => this.connect());
|
||||
this.disconnectBtn.addEventListener('click', () => this.disconnect());
|
||||
|
||||
// Populate device selector
|
||||
this.pcClient.getAllMics().then((mics) => {
|
||||
console.log('Available mics:', mics);
|
||||
mics.forEach((device) => {
|
||||
const option = document.createElement('option');
|
||||
option.value = device.deviceId;
|
||||
option.textContent = device.label || `Microphone ${device.deviceId}`;
|
||||
this.deviceSelector.appendChild(option);
|
||||
});
|
||||
});
|
||||
this.deviceSelector.addEventListener('change', (event) => {
|
||||
const selectedDeviceId = event.target.value;
|
||||
console.log('Selected device ID:', selectedDeviceId);
|
||||
this.pcClient.updateMic(selectedDeviceId);
|
||||
});
|
||||
|
||||
// Handle mic mute/unmute toggle
|
||||
const micToggleBtn = document.getElementById('mic-toggle-btn');
|
||||
|
||||
micToggleBtn.addEventListener('click', async () => {
|
||||
if (this.pcClient.state === 'disconnected') {
|
||||
await this.pcClient.initDevices();
|
||||
} else {
|
||||
this.pcClient.enableMic(!this.pcClient.isMicEnabled);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
updateMicToggleButton(micEnabled) {
|
||||
console.log('Mic enabled:', micEnabled, this.pcClient?.isMicEnabled);
|
||||
this.micToggleBtn.textContent = micEnabled ? 'Mute Mic' : 'Unmute Mic';
|
||||
}
|
||||
/**
|
||||
* Set up the Pipecat client and Daily transport
|
||||
* Set up the RTVI client and Daily transport
|
||||
*/
|
||||
async initializeClientAndTransport() {
|
||||
console.log('Initializing Pipecat client and transport...');
|
||||
// Initialize the Pipecat client with a DailyTransport and our configuration
|
||||
this.pcClient = new PipecatClient({
|
||||
initializeClientAndTransport() {
|
||||
// Initialize the RTVI client with a DailyTransport and our configuration
|
||||
this.rtviClient = new RTVIClient({
|
||||
transport: new DailyTransport(),
|
||||
enableMic: true,
|
||||
params: {
|
||||
// The baseURL and endpoint of your bot server that the client will connect to
|
||||
baseUrl: 'http://localhost:7860',
|
||||
endpoints: {
|
||||
connect: '/connect',
|
||||
},
|
||||
},
|
||||
enableMic: true, // Enable microphone for user input
|
||||
enableCam: false,
|
||||
callbacks: {
|
||||
// Handle connection state changes
|
||||
@@ -116,7 +87,6 @@ class ChatbotClient {
|
||||
this.connectBtn.disabled = false;
|
||||
this.disconnectBtn.disabled = true;
|
||||
this.log('Client disconnected');
|
||||
this.updateMicToggleButton(false);
|
||||
},
|
||||
// Handle transport state changes
|
||||
onTransportStateChanged: (state) => {
|
||||
@@ -151,20 +121,14 @@ class ChatbotClient {
|
||||
onMessageError: (error) => {
|
||||
console.log('Message error:', error);
|
||||
},
|
||||
onMicUpdated: (data) => {
|
||||
console.log('Mic updated:', data);
|
||||
this.deviceSelector.value = data.deviceId;
|
||||
},
|
||||
onError: (error) => {
|
||||
console.log('Error:', JSON.stringify(error));
|
||||
},
|
||||
},
|
||||
});
|
||||
window.client = this; // Expose client globally for debugging
|
||||
|
||||
// Set up listeners for media track events
|
||||
this.setupTrackListeners();
|
||||
this.setupEventListeners();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -199,10 +163,10 @@ class ChatbotClient {
|
||||
* This is called when the bot is ready or when the transport state changes to ready
|
||||
*/
|
||||
setupMediaTracks() {
|
||||
if (!this.pcClient) return;
|
||||
if (!this.rtviClient) return;
|
||||
|
||||
// Get current tracks from the client
|
||||
const tracks = this.pcClient.tracks();
|
||||
const tracks = this.rtviClient.tracks();
|
||||
|
||||
// Set up any available bot tracks
|
||||
if (tracks.bot?.audio) {
|
||||
@@ -218,34 +182,27 @@ class ChatbotClient {
|
||||
* This handles new tracks being added during the session
|
||||
*/
|
||||
setupTrackListeners() {
|
||||
if (!this.pcClient) return;
|
||||
if (!this.rtviClient) return;
|
||||
|
||||
// Listen for new tracks starting
|
||||
this.pcClient.on(RTVIEvent.TrackStarted, (track, participant) => {
|
||||
this.rtviClient.on(RTVIEvent.TrackStarted, (track, participant) => {
|
||||
// Only handle non-local (bot) tracks
|
||||
if (!participant?.local) {
|
||||
if (track.kind === 'audio') {
|
||||
this.setupAudioTrack(track);
|
||||
} else if (track.kind === 'video') {
|
||||
this.setupVideoTrack(track);
|
||||
}
|
||||
} else if (track.kind === 'audio') {
|
||||
console.log(`Local audio track started: `, this.pcClient.tracks());
|
||||
// If local audio track starts, update mic
|
||||
this.updateMicToggleButton(true);
|
||||
}
|
||||
});
|
||||
|
||||
// Listen for tracks stopping
|
||||
this.pcClient.on(RTVIEvent.TrackStopped, (track, participant) => {
|
||||
this.rtviClient.on(RTVIEvent.TrackStopped, (track, participant) => {
|
||||
this.log(
|
||||
`Track stopped event: ${track.kind} from ${
|
||||
participant ? (participant.local ? 'local' : 'bot') : 'unknown'
|
||||
participant?.name || 'unknown'
|
||||
}`
|
||||
);
|
||||
if (participant?.local && track.kind === 'audio') {
|
||||
// If local audio track stops, update mic toggle button
|
||||
this.updateMicToggleButton(false);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@@ -294,16 +251,17 @@ class ChatbotClient {
|
||||
|
||||
/**
|
||||
* Initialize and connect to the bot
|
||||
* This sets up the Pipecat client, initializes devices, and establishes the connection
|
||||
* This sets up the RTVI client, initializes devices, and establishes the connection
|
||||
*/
|
||||
async connect() {
|
||||
try {
|
||||
// Initialize audio/video devices
|
||||
this.log('Initializing devices...');
|
||||
await this.rtviClient.initDevices();
|
||||
|
||||
// Connect to the bot
|
||||
this.log('Connecting to bot...');
|
||||
await this.pcClient.connect({
|
||||
endpoint: 'http://localhost:7860/connect',
|
||||
timeout: 25000,
|
||||
});
|
||||
await this.rtviClient.connect();
|
||||
|
||||
this.log('Connection complete');
|
||||
} catch (error) {
|
||||
@@ -313,9 +271,9 @@ class ChatbotClient {
|
||||
this.updateStatus('Error');
|
||||
|
||||
// Clean up if there's an error
|
||||
if (this.pcClient) {
|
||||
if (this.rtviClient) {
|
||||
try {
|
||||
await this.pcClient.disconnect();
|
||||
await this.rtviClient.disconnect();
|
||||
} catch (disconnectError) {
|
||||
this.log(`Error during disconnect: ${disconnectError.message}`);
|
||||
}
|
||||
@@ -327,10 +285,10 @@ class ChatbotClient {
|
||||
* Disconnect from the bot and clean up media resources
|
||||
*/
|
||||
async disconnect() {
|
||||
if (this.pcClient) {
|
||||
if (this.rtviClient) {
|
||||
try {
|
||||
// Disconnect the Pipecat client
|
||||
await this.pcClient.disconnect();
|
||||
// Disconnect the RTVI client
|
||||
await this.rtviClient.disconnect();
|
||||
|
||||
// Clean up audio
|
||||
if (this.botAudio.srcObject) {
|
||||
|
||||
@@ -10,8 +10,7 @@ body {
|
||||
margin: 0 auto;
|
||||
}
|
||||
|
||||
.status-bar,
|
||||
.device-bar {
|
||||
.status-bar {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
@@ -21,19 +20,7 @@ body {
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
|
||||
.controls,
|
||||
.device-controls {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 10px; /* Adds spacing between elements */
|
||||
}
|
||||
|
||||
.device-controls {
|
||||
margin-left: auto;
|
||||
}
|
||||
|
||||
.controls button,
|
||||
.device-controls button {
|
||||
.controls button {
|
||||
padding: 8px 16px;
|
||||
margin-left: 10px;
|
||||
border: none;
|
||||
@@ -41,27 +28,6 @@ body {
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
#bot-selector,
|
||||
#device-selector {
|
||||
padding: 8px 16px;
|
||||
padding-right: 40px;
|
||||
border: none;
|
||||
border-radius: 4px;
|
||||
background-color: #6c757d; /* Gray background */
|
||||
color: white; /* White text */
|
||||
cursor: pointer;
|
||||
appearance: none; /* Removes default browser styling for dropdowns */
|
||||
background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24' fill='white'%3E%3Cpath d='M7 10l5 5 5-5z'/%3E%3C/svg%3E"); /* Custom arrow */
|
||||
background-repeat: no-repeat;
|
||||
background-position: right 8px center; /* Position the arrow */
|
||||
}
|
||||
|
||||
#bot-selector:focus,
|
||||
#device-selector:focus {
|
||||
outline: none;
|
||||
box-shadow: 0 0 4px rgba(0, 0, 0, 0.3); /* Add a subtle focus effect */
|
||||
}
|
||||
|
||||
#connect-btn {
|
||||
background-color: #4caf50;
|
||||
color: white;
|
||||
@@ -72,9 +38,6 @@ body {
|
||||
color: white;
|
||||
}
|
||||
|
||||
#mic-toggle-btn {
|
||||
}
|
||||
|
||||
button:disabled {
|
||||
opacity: 0.5;
|
||||
cursor: not-allowed;
|
||||
|
||||
1388
examples/simple-chatbot/client/react/package-lock.json
generated
1388
examples/simple-chatbot/client/react/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -10,9 +10,9 @@
|
||||
"preview": "vite preview"
|
||||
},
|
||||
"dependencies": {
|
||||
"@pipecat-ai/client-js": "^1.0.0",
|
||||
"@pipecat-ai/client-react": "^1.0.0",
|
||||
"@pipecat-ai/daily-transport": "^1.0.0",
|
||||
"@pipecat-ai/client-js": "^0.3.5",
|
||||
"@pipecat-ai/client-react": "^0.3.5",
|
||||
"@pipecat-ai/daily-transport": "^0.3.8",
|
||||
"react": "^18.3.1",
|
||||
"react-dom": "^18.3.1"
|
||||
},
|
||||
|
||||
@@ -1,22 +1,22 @@
|
||||
import {
|
||||
PipecatClientAudio,
|
||||
PipecatClientVideo,
|
||||
usePipecatClientTransportState,
|
||||
RTVIClientAudio,
|
||||
RTVIClientVideo,
|
||||
useRTVIClientTransportState,
|
||||
} from '@pipecat-ai/client-react';
|
||||
import { PipecatProvider } from './providers/PipecatProvider';
|
||||
import { RTVIProvider } from './providers/RTVIProvider';
|
||||
import { ConnectButton } from './components/ConnectButton';
|
||||
import { StatusDisplay } from './components/StatusDisplay';
|
||||
import { DebugDisplay } from './components/DebugDisplay';
|
||||
import './App.css';
|
||||
|
||||
function BotVideo() {
|
||||
const transportState = usePipecatClientTransportState();
|
||||
const transportState = useRTVIClientTransportState();
|
||||
const isConnected = transportState !== 'disconnected';
|
||||
|
||||
return (
|
||||
<div className="bot-container">
|
||||
<div className="video-container">
|
||||
{isConnected && <PipecatClientVideo participant="bot" fit="cover" />}
|
||||
{isConnected && <RTVIClientVideo participant="bot" fit="cover" />}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
@@ -35,16 +35,16 @@ function AppContent() {
|
||||
</div>
|
||||
|
||||
<DebugDisplay />
|
||||
<PipecatClientAudio />
|
||||
<RTVIClientAudio />
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function App() {
|
||||
return (
|
||||
<PipecatProvider>
|
||||
<RTVIProvider>
|
||||
<AppContent />
|
||||
</PipecatProvider>
|
||||
</RTVIProvider>
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -1,16 +1,16 @@
|
||||
import {
|
||||
usePipecatClient,
|
||||
usePipecatClientTransportState,
|
||||
useRTVIClient,
|
||||
useRTVIClientTransportState,
|
||||
} from '@pipecat-ai/client-react';
|
||||
|
||||
export function ConnectButton() {
|
||||
const client = usePipecatClient();
|
||||
const transportState = usePipecatClientTransportState();
|
||||
const client = useRTVIClient();
|
||||
const transportState = useRTVIClientTransportState();
|
||||
const isConnected = ['connected', 'ready'].includes(transportState);
|
||||
|
||||
const handleClick = async () => {
|
||||
if (!client) {
|
||||
console.error('Pipecat client is not initialized');
|
||||
console.error('RTVI client is not initialized');
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -18,7 +18,7 @@ export function ConnectButton() {
|
||||
if (isConnected) {
|
||||
await client.disconnect();
|
||||
} else {
|
||||
await client.connect({ endpoint: 'http://localhost:7860/connect' });
|
||||
await client.connect();
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Connection error:', error);
|
||||
|
||||
@@ -6,12 +6,12 @@ import {
|
||||
TranscriptData,
|
||||
BotLLMTextData,
|
||||
} from '@pipecat-ai/client-js';
|
||||
import { usePipecatClient, useRTVIClientEvent } from '@pipecat-ai/client-react';
|
||||
import { useRTVIClient, useRTVIClientEvent } from '@pipecat-ai/client-react';
|
||||
import './DebugDisplay.css';
|
||||
|
||||
export function DebugDisplay() {
|
||||
const debugLogRef = useRef<HTMLDivElement>(null);
|
||||
const client = usePipecatClient();
|
||||
const client = useRTVIClient();
|
||||
|
||||
const log = useCallback((message: string) => {
|
||||
if (!debugLogRef.current) return;
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { usePipecatClientTransportState } from '@pipecat-ai/client-react';
|
||||
import { useRTVIClientTransportState } from '@pipecat-ai/client-react';
|
||||
|
||||
export function StatusDisplay() {
|
||||
const transportState = usePipecatClientTransportState();
|
||||
const transportState = useRTVIClientTransportState();
|
||||
|
||||
return (
|
||||
<div className="status">
|
||||
|
||||
@@ -1,16 +0,0 @@
|
||||
import { type PropsWithChildren } from 'react';
|
||||
import { PipecatClient } from '@pipecat-ai/client-js';
|
||||
import { DailyTransport } from '@pipecat-ai/daily-transport';
|
||||
import { PipecatClientProvider } from '@pipecat-ai/client-react';
|
||||
|
||||
const client = new PipecatClient({
|
||||
transport: new DailyTransport(),
|
||||
enableMic: true,
|
||||
enableCam: false,
|
||||
});
|
||||
|
||||
export function PipecatProvider({ children }: PropsWithChildren) {
|
||||
return (
|
||||
<PipecatClientProvider client={client}>{children}</PipecatClientProvider>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
import { type PropsWithChildren } from 'react';
|
||||
import { RTVIClient } from '@pipecat-ai/client-js';
|
||||
import { DailyTransport } from '@pipecat-ai/daily-transport';
|
||||
import { RTVIClientProvider } from '@pipecat-ai/client-react';
|
||||
|
||||
const transport = new DailyTransport();
|
||||
|
||||
const client = new RTVIClient({
|
||||
transport,
|
||||
params: {
|
||||
baseUrl: 'http://localhost:7860',
|
||||
endpoints: {
|
||||
connect: '/connect',
|
||||
},
|
||||
},
|
||||
enableMic: true,
|
||||
enableCam: false,
|
||||
});
|
||||
|
||||
export function RTVIProvider({ children }: PropsWithChildren) {
|
||||
return <RTVIClientProvider client={client}>{children}</RTVIClientProvider>;
|
||||
}
|
||||
104
examples/storytelling-chatbot/client/package-lock.json
generated
104
examples/storytelling-chatbot/client/package-lock.json
generated
@@ -345,9 +345,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/env": {
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/env/-/env-14.2.30.tgz",
|
||||
"integrity": "sha512-KBiBKrDY6kxTQWGzKjQB7QirL3PiiOkV7KW98leHFjtVRKtft76Ra5qSA/SL75xT44dp6hOcqiiJ6iievLOYug=="
|
||||
"version": "14.2.28",
|
||||
"resolved": "https://registry.npmjs.org/@next/env/-/env-14.2.28.tgz",
|
||||
"integrity": "sha512-PAmWhJfJQlP+kxZwCjrVd9QnR5x0R3u0mTXTiZDgSd4h5LdXmjxCCWbN9kq6hkZBOax8Rm3xDW5HagWyJuT37g=="
|
||||
},
|
||||
"node_modules/@next/eslint-plugin-next": {
|
||||
"version": "14.1.4",
|
||||
@@ -359,9 +359,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-darwin-arm64": {
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-14.2.30.tgz",
|
||||
"integrity": "sha512-EAqfOTb3bTGh9+ewpO/jC59uACadRHM6TSA9DdxJB/6gxOpyV+zrbqeXiFTDy9uV6bmipFDkfpAskeaDcO+7/g==",
|
||||
"version": "14.2.28",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-14.2.28.tgz",
|
||||
"integrity": "sha512-kzGChl9setxYWpk3H6fTZXXPFFjg7urptLq5o5ZgYezCrqlemKttwMT5iFyx/p1e/JeglTwDFRtb923gTJ3R1w==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -374,9 +374,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-darwin-x64": {
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-14.2.30.tgz",
|
||||
"integrity": "sha512-TyO7Wz1IKE2kGv8dwQ0bmPL3s44EKVencOqwIY69myoS3rdpO1NPg5xPM5ymKu7nfX4oYJrpMxv8G9iqLsnL4A==",
|
||||
"version": "14.2.28",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-14.2.28.tgz",
|
||||
"integrity": "sha512-z6FXYHDJlFOzVEOiiJ/4NG8aLCeayZdcRSMjPDysW297Up6r22xw6Ea9AOwQqbNsth8JNgIK8EkWz2IDwaLQcw==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -389,9 +389,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-arm64-gnu": {
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-14.2.30.tgz",
|
||||
"integrity": "sha512-I5lg1fgPJ7I5dk6mr3qCH1hJYKJu1FsfKSiTKoYwcuUf53HWTrEkwmMI0t5ojFKeA6Vu+SfT2zVy5NS0QLXV4Q==",
|
||||
"version": "14.2.28",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-14.2.28.tgz",
|
||||
"integrity": "sha512-9ARHLEQXhAilNJ7rgQX8xs9aH3yJSj888ssSjJLeldiZKR4D7N08MfMqljk77fAwZsWwsrp8ohHsMvurvv9liQ==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -404,9 +404,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-arm64-musl": {
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-14.2.30.tgz",
|
||||
"integrity": "sha512-8GkNA+sLclQyxgzCDs2/2GSwBc92QLMrmYAmoP2xehe5MUKBLB2cgo34Yu242L1siSkwQkiV4YLdCnjwc/Micw==",
|
||||
"version": "14.2.28",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-14.2.28.tgz",
|
||||
"integrity": "sha512-p6gvatI1nX41KCizEe6JkF0FS/cEEF0u23vKDpl+WhPe/fCTBeGkEBh7iW2cUM0rvquPVwPWdiUR6Ebr/kQWxQ==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -419,9 +419,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-x64-gnu": {
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-14.2.30.tgz",
|
||||
"integrity": "sha512-8Ly7okjssLuBoe8qaRCcjGtcMsv79hwzn/63wNeIkzJVFVX06h5S737XNr7DZwlsbTBDOyI6qbL2BJB5n6TV/w==",
|
||||
"version": "14.2.28",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-14.2.28.tgz",
|
||||
"integrity": "sha512-nsiSnz2wO6GwMAX2o0iucONlVL7dNgKUqt/mDTATGO2NY59EO/ZKnKEr80BJFhuA5UC1KZOMblJHWZoqIJddpA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -434,9 +434,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-x64-musl": {
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-14.2.30.tgz",
|
||||
"integrity": "sha512-dBmV1lLNeX4mR7uI7KNVHsGQU+OgTG5RGFPi3tBJpsKPvOPtg9poyav/BYWrB3GPQL4dW5YGGgalwZ79WukbKQ==",
|
||||
"version": "14.2.28",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-14.2.28.tgz",
|
||||
"integrity": "sha512-+IuGQKoI3abrXFqx7GtlvNOpeExUH1mTIqCrh1LGFf8DnlUcTmOOCApEnPJUSLrSbzOdsF2ho2KhnQoO0I1RDw==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -449,9 +449,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-win32-arm64-msvc": {
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-14.2.30.tgz",
|
||||
"integrity": "sha512-6MMHi2Qc1Gkq+4YLXAgbYslE1f9zMGBikKMdmQRHXjkGPot1JY3n5/Qrbg40Uvbi8//wYnydPnyvNhI1DMUW1g==",
|
||||
"version": "14.2.28",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-14.2.28.tgz",
|
||||
"integrity": "sha512-l61WZ3nevt4BAnGksUVFKy2uJP5DPz2E0Ma/Oklvo3sGj9sw3q7vBWONFRgz+ICiHpW5mV+mBrkB3XEubMrKaA==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -464,9 +464,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-win32-ia32-msvc": {
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-ia32-msvc/-/swc-win32-ia32-msvc-14.2.30.tgz",
|
||||
"integrity": "sha512-pVZMnFok5qEX4RT59mK2hEVtJX+XFfak+/rjHpyFh7juiT52r177bfFKhnlafm0UOSldhXjj32b+LZIOdswGTg==",
|
||||
"version": "14.2.28",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-ia32-msvc/-/swc-win32-ia32-msvc-14.2.28.tgz",
|
||||
"integrity": "sha512-+Kcp1T3jHZnJ9v9VTJ/yf1t/xmtFAc/Sge4v7mVc1z+NYfYzisi8kJ9AsY8itbgq+WgEwMtOpiLLJsUy2qnXZw==",
|
||||
"cpu": [
|
||||
"ia32"
|
||||
],
|
||||
@@ -479,9 +479,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-win32-x64-msvc": {
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-14.2.30.tgz",
|
||||
"integrity": "sha512-4KCo8hMZXMjpTzs3HOqOGYYwAXymXIy7PEPAXNEcEOyKqkjiDlECumrWziy+JEF0Oi4ILHGxzgQ3YiMGG2t/Lg==",
|
||||
"version": "14.2.28",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-14.2.28.tgz",
|
||||
"integrity": "sha512-1gCmpvyhz7DkB1srRItJTnmR2UwQPAUXXIg9r0/56g3O8etGmwlX68skKXJOp9EejW3hhv7nSQUJ2raFiz4MoA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -1317,9 +1317,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@typescript-eslint/typescript-estree/node_modules/brace-expansion": {
|
||||
"version": "2.0.2",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz",
|
||||
"integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==",
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
|
||||
"integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0"
|
||||
@@ -1960,9 +1960,9 @@
|
||||
"integrity": "sha512-AlcaJBi/pqqJBIQ8U9Mcpc9i8Aqxn88Skv5d+xBX006BY5u8N3mGLHa5Lgppa7L/HfwgwLgZ6NYs+Ag6uUmJRA=="
|
||||
},
|
||||
"node_modules/brace-expansion": {
|
||||
"version": "1.1.12",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
|
||||
"integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
|
||||
"version": "1.1.11",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
|
||||
"integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0",
|
||||
@@ -3391,9 +3391,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/glob/node_modules/brace-expansion": {
|
||||
"version": "2.0.2",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz",
|
||||
"integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==",
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
|
||||
"integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0"
|
||||
}
|
||||
@@ -4389,11 +4389,11 @@
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/next": {
|
||||
"version": "14.2.30",
|
||||
"resolved": "https://registry.npmjs.org/next/-/next-14.2.30.tgz",
|
||||
"integrity": "sha512-+COdu6HQrHHFQ1S/8BBsCag61jZacmvbuL2avHvQFbWa2Ox7bE+d8FyNgxRLjXQ5wtPyQwEmk85js/AuaG2Sbg==",
|
||||
"version": "14.2.28",
|
||||
"resolved": "https://registry.npmjs.org/next/-/next-14.2.28.tgz",
|
||||
"integrity": "sha512-QLEIP/kYXynIxtcKB6vNjtWLVs3Y4Sb+EClTC/CSVzdLD1gIuItccpu/n1lhmduffI32iPGEK2cLLxxt28qgYA==",
|
||||
"dependencies": {
|
||||
"@next/env": "14.2.30",
|
||||
"@next/env": "14.2.28",
|
||||
"@swc/helpers": "0.5.5",
|
||||
"busboy": "1.6.0",
|
||||
"caniuse-lite": "^1.0.30001579",
|
||||
@@ -4408,15 +4408,15 @@
|
||||
"node": ">=18.17.0"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@next/swc-darwin-arm64": "14.2.30",
|
||||
"@next/swc-darwin-x64": "14.2.30",
|
||||
"@next/swc-linux-arm64-gnu": "14.2.30",
|
||||
"@next/swc-linux-arm64-musl": "14.2.30",
|
||||
"@next/swc-linux-x64-gnu": "14.2.30",
|
||||
"@next/swc-linux-x64-musl": "14.2.30",
|
||||
"@next/swc-win32-arm64-msvc": "14.2.30",
|
||||
"@next/swc-win32-ia32-msvc": "14.2.30",
|
||||
"@next/swc-win32-x64-msvc": "14.2.30"
|
||||
"@next/swc-darwin-arm64": "14.2.28",
|
||||
"@next/swc-darwin-x64": "14.2.28",
|
||||
"@next/swc-linux-arm64-gnu": "14.2.28",
|
||||
"@next/swc-linux-arm64-musl": "14.2.28",
|
||||
"@next/swc-linux-x64-gnu": "14.2.28",
|
||||
"@next/swc-linux-x64-musl": "14.2.28",
|
||||
"@next/swc-win32-arm64-msvc": "14.2.28",
|
||||
"@next/swc-win32-ia32-msvc": "14.2.28",
|
||||
"@next/swc-win32-x64-msvc": "14.2.28"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@opentelemetry/api": "^1.1.0",
|
||||
|
||||
@@ -12,11 +12,12 @@ import {
|
||||
import {
|
||||
WebSocketTransport,
|
||||
TwilioSerializer,
|
||||
} from '@pipecat-ai/websocket-transport';
|
||||
} from "@pipecat-ai/websocket-transport";
|
||||
|
||||
class WebsocketClientApp {
|
||||
private static STREAM_SID = 'ws_mock_stream_sid';
|
||||
private static CALL_SID = 'ws_mock_call_sid';
|
||||
|
||||
private static STREAM_SID = "ws_mock_stream_sid"
|
||||
private static CALL_SID = "ws_mock_call_sid"
|
||||
|
||||
private rtviClient: RTVIClient | null = null;
|
||||
private connectBtn: HTMLButtonElement | null = null;
|
||||
@@ -37,12 +38,8 @@ class WebsocketClientApp {
|
||||
* Set up references to DOM elements and create necessary media elements
|
||||
*/
|
||||
private setupDOMElements(): void {
|
||||
this.connectBtn = document.getElementById(
|
||||
'connect-btn'
|
||||
) as HTMLButtonElement;
|
||||
this.disconnectBtn = document.getElementById(
|
||||
'disconnect-btn'
|
||||
) as HTMLButtonElement;
|
||||
this.connectBtn = document.getElementById('connect-btn') as HTMLButtonElement;
|
||||
this.disconnectBtn = document.getElementById('disconnect-btn') as HTMLButtonElement;
|
||||
this.statusSpan = document.getElementById('connection-status');
|
||||
this.debugLog = document.getElementById('debug-log');
|
||||
}
|
||||
@@ -83,23 +80,13 @@ class WebsocketClientApp {
|
||||
}
|
||||
|
||||
private async emulateTwilioMessages() {
|
||||
const connectedMessage = {
|
||||
event: 'connected',
|
||||
protocol: 'Call',
|
||||
version: '1.0.0',
|
||||
};
|
||||
const connectedMessage={"event": "connected", "protocol": "Call", "version": "1.0.0"}
|
||||
|
||||
const websocketTransport = this.rtviClient?.transport as WebSocketTransport;
|
||||
void websocketTransport?.sendRawMessage(connectedMessage);
|
||||
const websocketTransport = this.rtviClient?.transport as WebSocketTransport
|
||||
void websocketTransport?.sendRawMessage(connectedMessage)
|
||||
|
||||
const startMessage = {
|
||||
event: 'start',
|
||||
start: {
|
||||
streamSid: WebsocketClientApp.STREAM_SID,
|
||||
callSid: WebsocketClientApp.CALL_SID,
|
||||
},
|
||||
};
|
||||
void websocketTransport?.sendRawMessage(startMessage);
|
||||
const startMessage={"event": "start", "start": {"streamSid": WebsocketClientApp.STREAM_SID, "callSid": WebsocketClientApp.CALL_SID}}
|
||||
void websocketTransport?.sendRawMessage(startMessage)
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -131,9 +118,7 @@ class WebsocketClientApp {
|
||||
|
||||
// Listen for tracks stopping
|
||||
this.rtviClient.on(RTVIEvent.TrackStopped, (track, participant) => {
|
||||
this.log(
|
||||
`Track stopped: ${track.kind} from ${participant?.name || 'unknown'}`
|
||||
);
|
||||
this.log(`Track stopped: ${track.kind} from ${participant?.name || 'unknown'}`);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -143,10 +128,7 @@ class WebsocketClientApp {
|
||||
*/
|
||||
private setupAudioTrack(track: MediaStreamTrack): void {
|
||||
this.log('Setting up audio track');
|
||||
if (
|
||||
this.botAudio.srcObject &&
|
||||
'getAudioTracks' in this.botAudio.srcObject
|
||||
) {
|
||||
if (this.botAudio.srcObject && "getAudioTracks" in this.botAudio.srcObject) {
|
||||
const oldTrack = this.botAudio.srcObject.getAudioTracks()[0];
|
||||
if (oldTrack?.id === track.id) return;
|
||||
}
|
||||
@@ -161,19 +143,23 @@ class WebsocketClientApp {
|
||||
try {
|
||||
const startTime = Date.now();
|
||||
|
||||
const ws_opts = {
|
||||
const transport = new WebSocketTransport({
|
||||
serializer: new TwilioSerializer(),
|
||||
recorderSampleRate: 8000,
|
||||
playerSampleRate: 8000,
|
||||
ws_url: 'http://localhost:8765/ws',
|
||||
};
|
||||
playerSampleRate: 8000
|
||||
});
|
||||
const RTVIConfig: RTVIClientOptions = {
|
||||
transport: new WebSocketTransport(ws_opts),
|
||||
transport,
|
||||
params: {
|
||||
// The baseURL and endpoint of your bot server that the client will connect to
|
||||
baseUrl: 'http://localhost:8765',
|
||||
endpoints: { connect: '/' },
|
||||
},
|
||||
enableMic: true,
|
||||
enableCam: false,
|
||||
callbacks: {
|
||||
onConnected: () => {
|
||||
this.emulateTwilioMessages();
|
||||
this.emulateTwilioMessages()
|
||||
this.updateStatus('Connected');
|
||||
if (this.connectBtn) this.connectBtn.disabled = true;
|
||||
if (this.disconnectBtn) this.disconnectBtn.disabled = false;
|
||||
@@ -197,7 +183,13 @@ class WebsocketClientApp {
|
||||
onMessageError: (error) => console.error('Message error:', error),
|
||||
onError: (error) => console.error('Error:', error),
|
||||
},
|
||||
};
|
||||
}
|
||||
// @ts-ignore
|
||||
RTVIConfig.customConnectHandler = () => Promise.resolve(
|
||||
{
|
||||
ws_url: "/ws",
|
||||
}
|
||||
);
|
||||
this.rtviClient = new RTVIClient(RTVIConfig);
|
||||
this.setupTrackListeners();
|
||||
|
||||
@@ -231,13 +223,8 @@ class WebsocketClientApp {
|
||||
try {
|
||||
await this.rtviClient.disconnect();
|
||||
this.rtviClient = null;
|
||||
if (
|
||||
this.botAudio.srcObject &&
|
||||
'getAudioTracks' in this.botAudio.srcObject
|
||||
) {
|
||||
this.botAudio.srcObject
|
||||
.getAudioTracks()
|
||||
.forEach((track) => track.stop());
|
||||
if (this.botAudio.srcObject && "getAudioTracks" in this.botAudio.srcObject) {
|
||||
this.botAudio.srcObject.getAudioTracks().forEach((track) => track.stop());
|
||||
this.botAudio.srcObject = null;
|
||||
}
|
||||
} catch (error) {
|
||||
@@ -245,6 +232,7 @@ class WebsocketClientApp {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
declare global {
|
||||
|
||||
@@ -6,10 +6,10 @@ Basic implementation using the [Pipecat JavaScript SDK](https://docs.pipecat.ai/
|
||||
|
||||
1. Run the bot server. See the [server README](../README).
|
||||
|
||||
2. Navigate to the `client` directory:
|
||||
2. Navigate to the `client/javascript` directory:
|
||||
|
||||
```bash
|
||||
cd client
|
||||
cd client/javascript
|
||||
```
|
||||
|
||||
3. Install dependencies:
|
||||
|
||||
563
examples/websocket/client/package-lock.json
generated
563
examples/websocket/client/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -19,8 +19,8 @@
|
||||
"vite": "^6.3.5"
|
||||
},
|
||||
"dependencies": {
|
||||
"@pipecat-ai/client-js": "^1.0.0",
|
||||
"@pipecat-ai/websocket-transport": "^1.0.0",
|
||||
"@pipecat-ai/client-js": "^0.4.0",
|
||||
"@pipecat-ai/websocket-transport": "^0.4.2",
|
||||
"protobufjs": "^7.4.0"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
*/
|
||||
|
||||
/**
|
||||
* Pipecat Client Implementation
|
||||
* RTVI Client Implementation
|
||||
*
|
||||
* This client connects to an RTVI-compatible bot server using WebSocket.
|
||||
*
|
||||
@@ -14,14 +14,16 @@
|
||||
*/
|
||||
|
||||
import {
|
||||
PipecatClient,
|
||||
PipecatClientOptions,
|
||||
RTVIClient,
|
||||
RTVIClientOptions,
|
||||
RTVIEvent,
|
||||
} from '@pipecat-ai/client-js';
|
||||
import { WebSocketTransport } from '@pipecat-ai/websocket-transport';
|
||||
import {
|
||||
WebSocketTransport
|
||||
} from "@pipecat-ai/websocket-transport";
|
||||
|
||||
class WebsocketClientApp {
|
||||
private pcClient: PipecatClient | null = null;
|
||||
private rtviClient: RTVIClient | null = null;
|
||||
private connectBtn: HTMLButtonElement | null = null;
|
||||
private disconnectBtn: HTMLButtonElement | null = null;
|
||||
private statusSpan: HTMLElement | null = null;
|
||||
@@ -29,7 +31,7 @@ class WebsocketClientApp {
|
||||
private botAudio: HTMLAudioElement;
|
||||
|
||||
constructor() {
|
||||
console.log('WebsocketClientApp');
|
||||
console.log("WebsocketClientApp");
|
||||
this.botAudio = document.createElement('audio');
|
||||
this.botAudio.autoplay = true;
|
||||
//this.botAudio.playsInline = true;
|
||||
@@ -43,12 +45,8 @@ class WebsocketClientApp {
|
||||
* Set up references to DOM elements and create necessary media elements
|
||||
*/
|
||||
private setupDOMElements(): void {
|
||||
this.connectBtn = document.getElementById(
|
||||
'connect-btn'
|
||||
) as HTMLButtonElement;
|
||||
this.disconnectBtn = document.getElementById(
|
||||
'disconnect-btn'
|
||||
) as HTMLButtonElement;
|
||||
this.connectBtn = document.getElementById('connect-btn') as HTMLButtonElement;
|
||||
this.disconnectBtn = document.getElementById('disconnect-btn') as HTMLButtonElement;
|
||||
this.statusSpan = document.getElementById('connection-status');
|
||||
this.debugLog = document.getElementById('debug-log');
|
||||
}
|
||||
@@ -93,8 +91,8 @@ class WebsocketClientApp {
|
||||
* This is called when the bot is ready or when the transport state changes to ready
|
||||
*/
|
||||
setupMediaTracks() {
|
||||
if (!this.pcClient) return;
|
||||
const tracks = this.pcClient.tracks();
|
||||
if (!this.rtviClient) return;
|
||||
const tracks = this.rtviClient.tracks();
|
||||
if (tracks.bot?.audio) {
|
||||
this.setupAudioTrack(tracks.bot.audio);
|
||||
}
|
||||
@@ -105,10 +103,10 @@ class WebsocketClientApp {
|
||||
* This handles new tracks being added during the session
|
||||
*/
|
||||
setupTrackListeners() {
|
||||
if (!this.pcClient) return;
|
||||
if (!this.rtviClient) return;
|
||||
|
||||
// Listen for new tracks starting
|
||||
this.pcClient.on(RTVIEvent.TrackStarted, (track, participant) => {
|
||||
this.rtviClient.on(RTVIEvent.TrackStarted, (track, participant) => {
|
||||
// Only handle non-local (bot) tracks
|
||||
if (!participant?.local && track.kind === 'audio') {
|
||||
this.setupAudioTrack(track);
|
||||
@@ -116,10 +114,8 @@ class WebsocketClientApp {
|
||||
});
|
||||
|
||||
// Listen for tracks stopping
|
||||
this.pcClient.on(RTVIEvent.TrackStopped, (track, participant) => {
|
||||
this.log(
|
||||
`Track stopped: ${track.kind} from ${participant?.name || 'unknown'}`
|
||||
);
|
||||
this.rtviClient.on(RTVIEvent.TrackStopped, (track, participant) => {
|
||||
this.log(`Track stopped: ${track.kind} from ${participant?.name || 'unknown'}`);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -129,10 +125,7 @@ class WebsocketClientApp {
|
||||
*/
|
||||
private setupAudioTrack(track: MediaStreamTrack): void {
|
||||
this.log('Setting up audio track');
|
||||
if (
|
||||
this.botAudio.srcObject &&
|
||||
'getAudioTracks' in this.botAudio.srcObject
|
||||
) {
|
||||
if (this.botAudio.srcObject && "getAudioTracks" in this.botAudio.srcObject) {
|
||||
const oldTrack = this.botAudio.srcObject.getAudioTracks()[0];
|
||||
if (oldTrack?.id === track.id) return;
|
||||
}
|
||||
@@ -141,15 +134,21 @@ class WebsocketClientApp {
|
||||
|
||||
/**
|
||||
* Initialize and connect to the bot
|
||||
* This sets up the Pipecat client, initializes devices, and establishes the connection
|
||||
* This sets up the RTVI client, initializes devices, and establishes the connection
|
||||
*/
|
||||
public async connect(): Promise<void> {
|
||||
try {
|
||||
const startTime = Date.now();
|
||||
|
||||
//const transport = new DailyTransport();
|
||||
const PipecatConfig: PipecatClientOptions = {
|
||||
transport: new WebSocketTransport(),
|
||||
const transport = new WebSocketTransport();
|
||||
const RTVIConfig: RTVIClientOptions = {
|
||||
transport,
|
||||
params: {
|
||||
// The baseURL and endpoint of your bot server that the client will connect to
|
||||
baseUrl: 'http://localhost:7860',
|
||||
endpoints: { connect: '/connect' },
|
||||
},
|
||||
enableMic: true,
|
||||
enableCam: false,
|
||||
callbacks: {
|
||||
@@ -177,20 +176,15 @@ class WebsocketClientApp {
|
||||
onMessageError: (error) => console.error('Message error:', error),
|
||||
onError: (error) => console.error('Error:', error),
|
||||
},
|
||||
};
|
||||
this.pcClient = new PipecatClient(PipecatConfig);
|
||||
// @ts-ignore
|
||||
window.pcClient = this.pcClient; // Expose for debugging
|
||||
}
|
||||
this.rtviClient = new RTVIClient(RTVIConfig);
|
||||
this.setupTrackListeners();
|
||||
|
||||
this.log('Initializing devices...');
|
||||
await this.pcClient.initDevices();
|
||||
await this.rtviClient.initDevices();
|
||||
|
||||
this.log('Connecting to bot...');
|
||||
await this.pcClient.connect({
|
||||
// The baseURL and endpoint of your bot server that the client will connect to
|
||||
endpoint: 'http://localhost:7860/connect',
|
||||
});
|
||||
await this.rtviClient.connect();
|
||||
|
||||
const timeTaken = Date.now() - startTime;
|
||||
this.log(`Connection complete, timeTaken: ${timeTaken}`);
|
||||
@@ -198,9 +192,9 @@ class WebsocketClientApp {
|
||||
this.log(`Error connecting: ${(error as Error).message}`);
|
||||
this.updateStatus('Error');
|
||||
// Clean up if there's an error
|
||||
if (this.pcClient) {
|
||||
if (this.rtviClient) {
|
||||
try {
|
||||
await this.pcClient.disconnect();
|
||||
await this.rtviClient.disconnect();
|
||||
} catch (disconnectError) {
|
||||
this.log(`Error during disconnect: ${disconnectError}`);
|
||||
}
|
||||
@@ -212,17 +206,12 @@ class WebsocketClientApp {
|
||||
* Disconnect from the bot and clean up media resources
|
||||
*/
|
||||
public async disconnect(): Promise<void> {
|
||||
if (this.pcClient) {
|
||||
if (this.rtviClient) {
|
||||
try {
|
||||
await this.pcClient.disconnect();
|
||||
this.pcClient = null;
|
||||
if (
|
||||
this.botAudio.srcObject &&
|
||||
'getAudioTracks' in this.botAudio.srcObject
|
||||
) {
|
||||
this.botAudio.srcObject
|
||||
.getAudioTracks()
|
||||
.forEach((track) => track.stop());
|
||||
await this.rtviClient.disconnect();
|
||||
this.rtviClient = null;
|
||||
if (this.botAudio.srcObject && "getAudioTracks" in this.botAudio.srcObject) {
|
||||
this.botAudio.srcObject.getAudioTracks().forEach((track) => track.stop());
|
||||
this.botAudio.srcObject = null;
|
||||
}
|
||||
} catch (error) {
|
||||
@@ -230,6 +219,7 @@ class WebsocketClientApp {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
declare global {
|
||||
|
||||
1607
examples/word-wrangler-gemini-live/client/package-lock.json
generated
1607
examples/word-wrangler-gemini-live/client/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user