Compare commits
210 Commits
khk/mem0
...
hush/backg
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
181cc43724 | ||
|
|
77ab841cab | ||
|
|
3bbc75110a | ||
|
|
b2ce1d9378 | ||
|
|
58714865df | ||
|
|
03b3635b0a | ||
|
|
aaa7b5e626 | ||
|
|
0b8486ce39 | ||
|
|
d4ae091ddd | ||
|
|
9e0a57a6de | ||
|
|
fc4c1e4110 | ||
|
|
9b740d9e72 | ||
|
|
b03563765f | ||
|
|
a1578bd67a | ||
|
|
6466573b84 | ||
|
|
b42dc83696 | ||
|
|
fe5931b884 | ||
|
|
4b438ff7d7 | ||
|
|
89a8c16676 | ||
|
|
c4c92585f9 | ||
|
|
63146d6f85 | ||
|
|
ec00edc893 | ||
|
|
a21be058e2 | ||
|
|
c226c20e12 | ||
|
|
78e6669105 | ||
|
|
79f29e14dd | ||
|
|
d4a00fd080 | ||
|
|
d4186fa115 | ||
|
|
3536cbcd13 | ||
|
|
e3bcb70b13 | ||
|
|
19a82f9522 | ||
|
|
8c0a847449 | ||
|
|
e3704cd1a1 | ||
|
|
1ba037865b | ||
|
|
909520f76e | ||
|
|
d06cfcd597 | ||
|
|
2579d0cf57 | ||
|
|
1ec20b2e74 | ||
|
|
55a6e5aa4c | ||
|
|
2229730169 | ||
|
|
24b54c66ee | ||
|
|
a14205415f | ||
|
|
18b56d4a10 | ||
|
|
b85bd91d08 | ||
|
|
23f3285a7d | ||
|
|
94f6436619 | ||
|
|
480692971c | ||
|
|
5df5f6ae4c | ||
|
|
6940112ab9 | ||
|
|
80584e9138 | ||
|
|
1fd01e715d | ||
|
|
a7a1cd0cde | ||
|
|
e5a6b9d2b4 | ||
|
|
169b50af61 | ||
|
|
31311d8ac5 | ||
|
|
bfd06b321d | ||
|
|
3efbcab39c | ||
|
|
b40ca391f5 | ||
|
|
43008c8c5b | ||
|
|
3a37b11e56 | ||
|
|
9ea81bc982 | ||
|
|
98b499e2e9 | ||
|
|
72c8f6c8c3 | ||
|
|
ea61256ddc | ||
|
|
babafadbe4 | ||
|
|
a5660f6dc7 | ||
|
|
64ad916c5f | ||
|
|
13d0563298 | ||
|
|
20a1dd066d | ||
|
|
56f6e3ceb4 | ||
|
|
3afab63870 | ||
|
|
d3b9a0aab0 | ||
|
|
6b21081a7d | ||
|
|
648bdea64c | ||
|
|
ed387e876a | ||
|
|
2fb9aa4d76 | ||
|
|
9eba8f1637 | ||
|
|
43c255f58a | ||
|
|
121e70a029 | ||
|
|
70e28a0547 | ||
|
|
c9a93f2504 | ||
|
|
8a12470efd | ||
|
|
05d53bc66f | ||
|
|
e763cd7bee | ||
|
|
94ec5118e6 | ||
|
|
7203ef6885 | ||
|
|
3074a62bb1 | ||
|
|
31712b84ac | ||
|
|
c99ec0b0b7 | ||
|
|
cd7abd2962 | ||
|
|
c7544954cf | ||
|
|
4f390b15a3 | ||
|
|
f2a05b065d | ||
|
|
5d5041eb2b | ||
|
|
f4dc66cb13 | ||
|
|
b88744b18d | ||
|
|
209de2638d | ||
|
|
5d829fb6a9 | ||
|
|
a978a5cd4a | ||
|
|
b9ea3f0fd9 | ||
|
|
d2f5ee2915 | ||
|
|
acddddc508 | ||
|
|
0c2c6fa771 | ||
|
|
80088c6138 | ||
|
|
766639a9a4 | ||
|
|
675e2b1498 | ||
|
|
af6c23f7b1 | ||
|
|
d212e88030 | ||
|
|
d6758bf2ad | ||
|
|
5abfb15300 | ||
|
|
f576254d61 | ||
|
|
a90807a3d2 | ||
|
|
a06fc4ce50 | ||
|
|
80cb4497f0 | ||
|
|
8aa878c5e9 | ||
|
|
e982b3d919 | ||
|
|
8945fd1fc6 | ||
|
|
16b97d151b | ||
|
|
f7ac142ad2 | ||
|
|
2355067f61 | ||
|
|
76f9626d35 | ||
|
|
f82c2566e8 | ||
|
|
b6007bb3d6 | ||
|
|
311a5360ad | ||
|
|
62cb0376f2 | ||
|
|
91a69b7029 | ||
|
|
1d4d7f28a1 | ||
|
|
a55a7bbb96 | ||
|
|
a394b35e85 | ||
|
|
aa85df4fd6 | ||
|
|
3bb1f5f7a8 | ||
|
|
7c115f9d59 | ||
|
|
a82b847971 | ||
|
|
50515aa842 | ||
|
|
b348fde32b | ||
|
|
45787520b2 | ||
|
|
053bf72da2 | ||
|
|
ca4893397a | ||
|
|
c1f6a4e079 | ||
|
|
135ed811f1 | ||
|
|
055a3f1c53 | ||
|
|
750bb88586 | ||
|
|
c4f9171fe1 | ||
|
|
d223201c3f | ||
|
|
86701fd3c7 | ||
|
|
b414077a07 | ||
|
|
15f23929e9 | ||
|
|
cc9e4047d0 | ||
|
|
4ef4dcefce | ||
|
|
f3caa8cf7a | ||
|
|
e5470fec7a | ||
|
|
887c197bce | ||
|
|
f5d49fea81 | ||
|
|
e087f6ec5d | ||
|
|
406f5a395b | ||
|
|
060bb4c26b | ||
|
|
499e69846d | ||
|
|
e6e339a02e | ||
|
|
dc2ee2bf0a | ||
|
|
d982fc35d8 | ||
|
|
72d373e565 | ||
|
|
59fdfe697d | ||
|
|
97c9e0676e | ||
|
|
aeac40312e | ||
|
|
ce9f75a851 | ||
|
|
c45d852f6b | ||
|
|
55cc1fe9f6 | ||
|
|
1ba7e2d6fa | ||
|
|
1b8d326b49 | ||
|
|
077952b658 | ||
|
|
e694971423 | ||
|
|
9450b07ec5 | ||
|
|
19b464ba23 | ||
|
|
8aebf00c2d | ||
|
|
01458895c2 | ||
|
|
2082d023ef | ||
|
|
c99436b80e | ||
|
|
f884c93826 | ||
|
|
67a93d09c2 | ||
|
|
2fcf4e6d70 | ||
|
|
fcb8b9a5b3 | ||
|
|
fee0409f63 | ||
|
|
3be6973e2c | ||
|
|
5184d178ef | ||
|
|
c9f7882728 | ||
|
|
48b6850df4 | ||
|
|
dc5067407d | ||
|
|
1c19777d5e | ||
|
|
2e1a18503b | ||
|
|
1baa52a17e | ||
|
|
526f9c2e06 | ||
|
|
2770d64a25 | ||
|
|
8a7e305619 | ||
|
|
8f2dadf5a0 | ||
|
|
30432639b4 | ||
|
|
d33a4b3a11 | ||
|
|
9cad8bfcc6 | ||
|
|
93d8ddf4f2 | ||
|
|
0d05312071 | ||
|
|
f8e33d8b7b | ||
|
|
f24c5b0aa7 | ||
|
|
da25e0c008 | ||
|
|
c99d02d8bb | ||
|
|
e56c8f881c | ||
|
|
a747f08017 | ||
|
|
c6c0b73345 | ||
|
|
fde90ee01d | ||
|
|
689a844aaf | ||
|
|
aab98b61a0 | ||
|
|
79ac696973 |
187
CHANGELOG.md
187
CHANGELOG.md
@@ -9,10 +9,195 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
### Added
|
||||
|
||||
- ElevenLabs TTS services now support a sample rate of 8000.
|
||||
- Added a new iOS client option to the `SmallWebRTCTransport` **video-transform** example.
|
||||
|
||||
- Added new processors `ProducerProcessor` and `ConsumerProcessor`. The
|
||||
producer processor processes frames from the pipeline and decides whether the
|
||||
consumers should consume it or not. If so, the same frame that is received by
|
||||
the producer is sent to the consumer. There can be multiple consumers per
|
||||
producer. These processors can be useful to push frames from one part of a
|
||||
pipeline to a different one (e.g. when using `ParallelPipeline`).
|
||||
|
||||
- Improvements for the `SmallWebRTCTransport`:
|
||||
- Wait until the pipeline is ready before triggering the `connected` event.
|
||||
- Queue messages if the data channel is not ready.
|
||||
- Update the aiortc dependency to fix an issue where the 'video/rtx' MIME
|
||||
type was incorrectly handled as a codec retransmission.
|
||||
- Avoid initial video delays.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue in the Azure TTS services where the language was being set
|
||||
incorrectly.
|
||||
|
||||
- Fixed `SmallWebRTCTransport` to support dynamic values for
|
||||
`TransportParams.audio_out_10ms_chunks`. Previously, it only worked with 20ms
|
||||
chunks.
|
||||
|
||||
- Fixed an issue where `LLMAssistantContextAggregator` would prevent a
|
||||
`BotStoppedSpeakingFrame` from moving through the pipeline.
|
||||
|
||||
## [0.0.62] - 2025-04-01 "An April Fools' release"
|
||||
|
||||
### Added
|
||||
|
||||
- Added `TransportParams.audio_out_10ms_chunks` parameter to allow controlling
|
||||
the amount of audio being sent by the output transport. It defaults to 4, so
|
||||
40ms audio chunks are sent.
|
||||
|
||||
- Added `QwenLLMService` for Qwen integration with an OpenAI-compatible
|
||||
interface. Added foundational example `14q-function-calling-qwen.py`.
|
||||
|
||||
- Added `Mem0MemoryService`. Mem0 is a self-improving memory layer for LLM
|
||||
applications. Learn more at: https://mem0.ai/.
|
||||
|
||||
- Added `WhisperSTTServiceMLX` for Whisper transcription on Apple Silicon.
|
||||
See example in `examples/foundational/13e-whisper-mlx.py`. Latency of
|
||||
completed transcription using Whisper large-v3-turbo on an M4 macbook is
|
||||
~500ms.
|
||||
|
||||
- Added `SmallWebRTCTransport`, a new P2P WebRTC transport.
|
||||
|
||||
- Created two examples in `p2p-webrtc`:
|
||||
- **video-transform**: Demonstrates sending and receiving audio/video with
|
||||
`SmallWebRTCTransport` using `TypeScript`. Includes video frame
|
||||
processing with OpenCV.
|
||||
- **voice-agent**: A minimal example of creating a voice agent with
|
||||
`SmallWebRTCTransport`.
|
||||
|
||||
- `GladiaSTTService` now have comprehensive support for the latest API config
|
||||
options, including model, language detection, preprocessing, custom
|
||||
vocabulary, custom spelling, translation, and message filtering options.
|
||||
|
||||
- Added `SmallWebRTCTransport`, a new P2P WebRTC transport.
|
||||
|
||||
- Created two examples in `p2p-webrtc`:
|
||||
- **video-transform**: Demonstrates sending and receiving audio/video with
|
||||
`SmallWebRTCTransport` using `TypeScript`. Includes video frame
|
||||
processing with OpenCV.
|
||||
- **voice-agent**: A minimal example of creating a voice agent with
|
||||
`SmallWebRTCTransport`.
|
||||
|
||||
- Added support to `ProtobufFrameSerializer` to send the messages from
|
||||
`TransportMessageFrame` and `TransportMessageUrgentFrame`.
|
||||
|
||||
- Added support for a new TTS service, `PiperTTSService`.
|
||||
(see https://github.com/rhasspy/piper/)
|
||||
|
||||
- It is now possible to tell whether `UserStartedSpeakingFrame` or
|
||||
`UserStoppedSpeakingFrame` have been generated because of emulation frames.
|
||||
|
||||
### Changed
|
||||
|
||||
- `FunctionCallResultFrame`a are now system frames. This is to prevent function
|
||||
call results to be discarded during interruptions.
|
||||
|
||||
- Pipecat services have been reorganized into packages. Each package can have
|
||||
one or more of the following modules (in the future new module names might be
|
||||
needed) depending on the services implemented:
|
||||
|
||||
- image: for image generation services
|
||||
- llm: for LLM services
|
||||
- memory: for memory services
|
||||
- stt: for Speech-To-Text services
|
||||
- tts: for Text-To-Speech services
|
||||
- video: for video generation services
|
||||
- vision: for video recognition services
|
||||
|
||||
- Base classes for AI services have been reorganized into modules. They can now
|
||||
be found in
|
||||
`pipecat.services.[ai_service,image_service,llm_service,stt_service,vision_service]`.
|
||||
|
||||
- `GladiaSTTService` now uses the `solaria-1` model by default. Other params
|
||||
use Gladia's default values. Added support for more language codes.
|
||||
|
||||
### Deprecated
|
||||
|
||||
- All Pipecat services imports have been deprecated and a warning will be shown
|
||||
when using the old import. The new import should be
|
||||
`pipecat.services.[service].[image,llm,memory,stt,tts,video,vision]`. For
|
||||
example, `from pipecat.services.openai.llm import OpenAILLMService`.
|
||||
|
||||
- Import for AI services base classes from `pipecat.services.ai_services` is now
|
||||
deprecated, use one of
|
||||
`pipecat.services.[ai_service,image_service,llm_service,stt_service,vision_service]`.
|
||||
|
||||
- Deprecated the `language` parameter in `GladiaSTTService.InputParams` in
|
||||
favor of `language_config`, which better aligns with Gladia's API.
|
||||
|
||||
- Deprecated using `GladiaSTTService.InputParams` directly. Use the new
|
||||
`GladiaInputParams` class instead.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed a `FastAPIWebsocketTransport` and `WebsocketClientTransport` issue that
|
||||
would cause the transport to be closed prematurely, preventing the internally
|
||||
queued audio to be sent. The same issue could also cause an infinite loop
|
||||
while using an output mixer and when sending an `EndFrame`, preventing the bot
|
||||
to finish.
|
||||
|
||||
- Fixed an issue that could cause the `TranscriptionUpdateFrame` being pushed
|
||||
because of an interruption to be discarded.
|
||||
|
||||
- Fixed an issue that would cause `SegmentedSTTService` based services
|
||||
(e.g. `OpenAISTTService`) to try to transcribe non-spoken audio, causing
|
||||
invalid transcriptions.
|
||||
|
||||
- Fixed an issue where `GoogleTTSService` was emitting two `TTSStoppedFrames`.
|
||||
|
||||
### Performance
|
||||
|
||||
- Output transports now send 40ms audio chunks instead of 20ms. This should
|
||||
improve performance.
|
||||
|
||||
- `BotSpeakingFrame`s are now sent every 200ms. If the output transport audio chunks
|
||||
are higher than 200ms then they will be sent at every audio chunk.
|
||||
|
||||
### Other
|
||||
|
||||
- Added foundational example `37-mem0.py` demonstrating how to use the
|
||||
`Mem0MemoryService`.
|
||||
|
||||
- Added foundational example `13e-whisper-mlx.py` demonstrating how to use the
|
||||
`WhisperSTTServiceMLX`.
|
||||
|
||||
## [0.0.61] - 2025-03-26
|
||||
|
||||
### Added
|
||||
|
||||
- Added a new frame, `LLMSetToolChoiceFrame`, which provides a mechanism
|
||||
for modifying the `tool_choice` in the context.
|
||||
|
||||
- Added `GroqTTSService` which provides text-to-speech functionality using
|
||||
Groq's API.
|
||||
|
||||
- Added support in `DailyTransport` for updating remote participants'
|
||||
`canReceive` permission via the `update_remote_participants()` method, by
|
||||
bumping the daily-python dependency to >= 0.16.0.
|
||||
|
||||
- ElevenLabs TTS services now support a sample rate of 8000.
|
||||
|
||||
- Added support for `instructions` in `OpenAITTSService`.
|
||||
|
||||
- Added support for `base_url` in `OpenAIImageGenService` and
|
||||
`OpenAITTSService`.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue in `RTVIObserver` that prevented handling of Google LLM
|
||||
context messages. The observer now processes both OpenAI-style and
|
||||
Google-style contexts.
|
||||
|
||||
- Fixed an issue in Daily involving switching virtual devices, by bumping the
|
||||
daily-python dependency to >= 0.16.1.
|
||||
|
||||
- Fixed a `GoogleAssistantContextAggregator` issue where function calls
|
||||
placeholders where not being updated when then function call result was
|
||||
different from a string.
|
||||
|
||||
- Fixed an issue that would cause `LLMAssistantContextAggregator` to block
|
||||
processing more frames while processing a function call result.
|
||||
|
||||
- Fixed an issue where the `RTVIObserver` would report two bot started and
|
||||
stopped speaking events for each bot turn.
|
||||
|
||||
|
||||
@@ -26,11 +26,52 @@ git commit -m "Description of your changes"
|
||||
git push origin your-branch-name
|
||||
```
|
||||
|
||||
9. **Submit a Pull Request (PR)**: Open a PR from your forked repository to the main branch of this repo.
|
||||
> Important: Describe the changes you've made clearly!
|
||||
8. **Submit a Pull Request (PR)**: Open a PR from your forked repository to the main branch of this repo.
|
||||
> Important: Describe the changes you've made clearly!
|
||||
|
||||
Our maintainers will review your PR, and once everything is good, your contributions will be merged!
|
||||
|
||||
## Code Style and Documentation
|
||||
|
||||
### Python Code Style
|
||||
|
||||
We use Ruff for code linting and formatting. Please ensure your code passes all linting checks before submitting a PR.
|
||||
|
||||
### Docstring Conventions
|
||||
|
||||
We follow Google-style docstrings with these specific conventions:
|
||||
|
||||
- Class docstrings should fully document all parameters used in `__init__`
|
||||
- We don't require separate docstrings for `__init__` methods when parameters are documented in the class docstring
|
||||
- Property methods should have docstrings explaining their purpose and return value
|
||||
|
||||
Example of correctly documented class:
|
||||
|
||||
```python
|
||||
class MyClass:
|
||||
"""Class description.
|
||||
|
||||
Additional details about the class.
|
||||
|
||||
Args:
|
||||
param1: Description of first parameter.
|
||||
param2: Description of second parameter.
|
||||
"""
|
||||
|
||||
def __init__(self, param1, param2):
|
||||
# No docstring required here as parameters are documented above
|
||||
self.param1 = param1
|
||||
self.param2 = param2
|
||||
|
||||
@property
|
||||
def some_property(self) -> str:
|
||||
"""Get the formatted property value.
|
||||
|
||||
Returns:
|
||||
A string representation of the property.
|
||||
"""
|
||||
return f"Property: {self.param1}"
|
||||
```
|
||||
|
||||
# Contributor Covenant Code of Conduct
|
||||
|
||||
@@ -51,23 +92,23 @@ diverse, inclusive, and healthy community.
|
||||
Examples of behavior that contributes to a positive environment for our
|
||||
community include:
|
||||
|
||||
* Demonstrating empathy and kindness toward other people
|
||||
* Being respectful of differing opinions, viewpoints, and experiences
|
||||
* Giving and gracefully accepting constructive feedback
|
||||
* Accepting responsibility and apologizing to those affected by our mistakes,
|
||||
- Demonstrating empathy and kindness toward other people
|
||||
- Being respectful of differing opinions, viewpoints, and experiences
|
||||
- Giving and gracefully accepting constructive feedback
|
||||
- Accepting responsibility and apologizing to those affected by our mistakes,
|
||||
and learning from the experience
|
||||
* Focusing on what is best not just for us as individuals, but for the overall
|
||||
- Focusing on what is best not just for us as individuals, but for the overall
|
||||
community
|
||||
|
||||
Examples of unacceptable behavior include:
|
||||
|
||||
* The use of sexualized language or imagery, and sexual attention or advances of
|
||||
- The use of sexualized language or imagery, and sexual attention or advances of
|
||||
any kind
|
||||
* Trolling, insulting or derogatory comments, and personal or political attacks
|
||||
* Public or private harassment
|
||||
* Publishing others' private information, such as a physical or email address,
|
||||
- Trolling, insulting or derogatory comments, and personal or political attacks
|
||||
- Public or private harassment
|
||||
- Publishing others' private information, such as a physical or email address,
|
||||
without their explicit permission
|
||||
* Other conduct which could reasonably be considered inappropriate in a
|
||||
- Other conduct which could reasonably be considered inappropriate in a
|
||||
professional setting
|
||||
|
||||
## Enforcement Responsibilities
|
||||
@@ -162,4 +203,4 @@ For answers to common questions about this code of conduct, see the FAQ at
|
||||
[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
|
||||
[Mozilla CoC]: https://github.com/mozilla/diversity
|
||||
[FAQ]: https://www.contributor-covenant.org/faq
|
||||
[translations]: https://www.contributor-covenant.org/translations
|
||||
[translations]: https://www.contributor-covenant.org/translations
|
||||
|
||||
23
README.md
23
README.md
@@ -55,17 +55,18 @@ pip install "pipecat-ai[option,...]"
|
||||
|
||||
### Available services
|
||||
|
||||
| Category | Services | Install Command Example |
|
||||
| ------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------- |
|
||||
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [Parakeet (NVIDIA)](https://docs.pipecat.ai/server/services/stt/parakeet), [Ultravox](https://docs.pipecat.ai/server/services/stt/ultravox), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) | `pip install "pipecat-ai[deepgram]"` |
|
||||
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Together AI](https://docs.pipecat.ai/server/services/llm/together) | `pip install "pipecat-ai[openai]"` |
|
||||
| Text-to-Speech | [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [FastPitch (NVIDIA)](https://docs.pipecat.ai/server/services/tts/fastpitch), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) | `pip install "pipecat-ai[cartesia]"` |
|
||||
| Speech-to-Speech | [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai) | `pip install "pipecat-ai[google]"` |
|
||||
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local | `pip install "pipecat-ai[daily]"` |
|
||||
| Video | [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) | `pip install "pipecat-ai[tavus,simli]"` |
|
||||
| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/fal), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) | `pip install "pipecat-ai[moondream]"` |
|
||||
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [Noisereduce](https://docs.pipecat.ai/server/utilities/audio/noisereduce-filter) | `pip install "pipecat-ai[silero]"` |
|
||||
| Analytics & Metrics | [Canonical AI](https://docs.pipecat.ai/server/services/analytics/canonical), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) | `pip install "pipecat-ai[canonical]"` |
|
||||
| Category | Services | Install Command Example |
|
||||
| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------- |
|
||||
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [Parakeet (NVIDIA)](https://docs.pipecat.ai/server/services/stt/parakeet), [Ultravox](https://docs.pipecat.ai/server/services/stt/ultravox), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) | `pip install "pipecat-ai[deepgram]"` |
|
||||
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [Together AI](https://docs.pipecat.ai/server/services/llm/together) | `pip install "pipecat-ai[openai]"` |
|
||||
| Text-to-Speech | [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [FastPitch (NVIDIA)](https://docs.pipecat.ai/server/services/tts/fastpitch), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) | `pip install "pipecat-ai[cartesia]"` |
|
||||
| Speech-to-Speech | [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai) | `pip install "pipecat-ai[google]"` |
|
||||
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local | `pip install "pipecat-ai[daily]"` |
|
||||
| Video | [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) | `pip install "pipecat-ai[tavus,simli]"` |
|
||||
| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) | `pip install "pipecat-ai[mem0]"` |
|
||||
| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/fal), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) | `pip install "pipecat-ai[moondream]"` |
|
||||
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [Noisereduce](https://docs.pipecat.ai/server/utilities/audio/noisereduce-filter) | `pip install "pipecat-ai[silero]"` |
|
||||
| Analytics & Metrics | [Canonical AI](https://docs.pipecat.ai/server/services/analytics/canonical), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) | `pip install "pipecat-ai[canonical]"` |
|
||||
|
||||
📚 [View full services documentation →](https://docs.pipecat.ai/server/services/supported-services)
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@ pre-commit~=4.0.1
|
||||
pyright~=1.1.397
|
||||
pytest~=8.3.4
|
||||
pytest-asyncio~=0.25.3
|
||||
pytest-aiohttp==1.1.0
|
||||
ruff~=0.11.1
|
||||
setuptools~=70.0.0
|
||||
setuptools_scm~=8.1.0
|
||||
|
||||
@@ -50,6 +50,14 @@ autodoc_mock_imports = [
|
||||
"pyht.protos",
|
||||
"pyht.protos.api_pb2",
|
||||
"pipecat_ai_playht", # PlayHT wrapper
|
||||
"vllm",
|
||||
"aiortc",
|
||||
"aiortc.mediastreams",
|
||||
"cv2",
|
||||
"av",
|
||||
"pyneuphonic",
|
||||
"mem0",
|
||||
"mlx_whisper",
|
||||
"anthropic",
|
||||
"assemblyai",
|
||||
"boto3",
|
||||
|
||||
@@ -45,8 +45,10 @@ Transport & Serialization
|
||||
Utilities
|
||||
~~~~~~~~~
|
||||
|
||||
* :mod:`Adapters <pipecat.adapters>`
|
||||
* :mod:`Clocks <pipecat.clocks>`
|
||||
* :mod:`Metrics <pipecat.metrics>`
|
||||
* :mod:`Observers <pipecat.observers>`
|
||||
* :mod:`Sync <pipecat.sync>`
|
||||
* :mod:`Transcriptions <pipecat.transcriptions>`
|
||||
* :mod:`Utils <pipecat.utils>`
|
||||
@@ -56,10 +58,12 @@ Utilities
|
||||
:caption: API Reference
|
||||
:hidden:
|
||||
|
||||
Adapters <api/pipecat.adapters>
|
||||
Audio <api/pipecat.audio>
|
||||
Clocks <api/pipecat.clocks>
|
||||
Frames <api/pipecat.frames>
|
||||
Metrics <api/pipecat.metrics>
|
||||
Observers <api/pipecat.observers>
|
||||
Pipeline <api/pipecat.pipeline>
|
||||
Processors <api/pipecat.processors>
|
||||
Serializers <api/pipecat.serializers>
|
||||
|
||||
@@ -12,22 +12,29 @@ pipecat-ai[aws]
|
||||
pipecat-ai[azure]
|
||||
pipecat-ai[canonical]
|
||||
pipecat-ai[cartesia]
|
||||
pipecat-ai[cerebras]
|
||||
pipecat-ai[deepseek]
|
||||
pipecat-ai[daily]
|
||||
pipecat-ai[deepgram]
|
||||
pipecat-ai[elevenlabs]
|
||||
pipecat-ai[fal]
|
||||
pipecat-ai[fireworks]
|
||||
pipecat-ai[fish]
|
||||
pipecat-ai[gladia]
|
||||
pipecat-ai[google]
|
||||
pipecat-ai[grok]
|
||||
pipecat-ai[groq]
|
||||
# pipecat-ai[krisp] # Mocked instead
|
||||
# pipecat-ai[krisp] # Mocked
|
||||
pipecat-ai[koala]
|
||||
pipecat-ai[langchain]
|
||||
pipecat-ai[livekit]
|
||||
pipecat-ai[lmnt]
|
||||
pipecat-ai[local]
|
||||
# pipecat-ai[mem0] # Mocked
|
||||
# pipecat-ai[mlx-whisper] # Mocked
|
||||
pipecat-ai[moondream]
|
||||
pipecat-ai[nim]
|
||||
# pipecat-ai[neuphonic] # Mocked
|
||||
pipecat-ai[noisereduce]
|
||||
pipecat-ai[openai]
|
||||
# pipecat-ai[openpipe]
|
||||
@@ -36,5 +43,9 @@ pipecat-ai[riva]
|
||||
pipecat-ai[silero]
|
||||
pipecat-ai[simli]
|
||||
pipecat-ai[soundfile]
|
||||
pipecat-ai[tavus]
|
||||
pipecat-ai[together]
|
||||
# pipecat-ai[ultravox] # Mocked
|
||||
# pipecat-ai[webrtc] # Mocked
|
||||
pipecat-ai[websocket]
|
||||
pipecat-ai[whisper]
|
||||
@@ -90,3 +90,6 @@ ASSEMBLYAI_API_KEY=...
|
||||
|
||||
# OpenRouter
|
||||
OPENROUTER_API_KEY=...
|
||||
|
||||
# Piper
|
||||
PIPER_BASE_URL=...
|
||||
@@ -18,7 +18,7 @@ from pipecat.frames.frames import AudioRawFrame, EndFrame, OutputAudioRawFrame,
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -21,9 +21,9 @@ from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
|
||||
from pipecat.services.canonical import CanonicalMetricsService
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.canonical.metrics import CanonicalMetricsService
|
||||
from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -23,8 +23,8 @@ from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -1,3 +1,9 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import os
|
||||
@@ -12,8 +18,8 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -1,3 +1,9 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import os
|
||||
|
||||
import aiohttp
|
||||
|
||||
@@ -1,3 +1,9 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
@@ -10,8 +16,8 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
178
examples/deployment/pipecat-cloud-daily-pstn-server/README.md
Normal file
178
examples/deployment/pipecat-cloud-daily-pstn-server/README.md
Normal file
@@ -0,0 +1,178 @@
|
||||
# Handling PSTN/SIP Dial-in on Pipecat Cloud
|
||||
|
||||
This repository contains two server implementations for handling
|
||||
the pinless dial-in workflow in Pipecat Cloud. This is the companion to the
|
||||
Pipecat Cloud [pstn_sip starter image](https://github.com/daily-co/pipecat-cloud-images/tree/main/pipecat-starters/pstn_sip).
|
||||
In addition you can use `/api/dial` to trigger dial-out, and
|
||||
eventually, call-transfers.
|
||||
|
||||
1. [FastAPI Server](fastapi-webhook-server/README.md) -
|
||||
A FastAPI implementation that handles PSTN (Public Switched Telephone
|
||||
Network) and SIP (Session Initiation Protocol) calls using the Daily API.
|
||||
|
||||
2. [Next.js Serverless](nextjs-webhook-server/README.md) -
|
||||
A Next.js API implementation designed for deployment on Vercel's
|
||||
serverless platform.
|
||||
|
||||
Both implementations provide:
|
||||
|
||||
- HMAC signature validation for pinless webhook
|
||||
- Structured logging
|
||||
- Support for dial-in and dial-out settings
|
||||
- Voicemail detection and call transfer functionality (coming soon)
|
||||
- Test request handling
|
||||
|
||||
## Choosing an Implementation
|
||||
|
||||
- Use the **FastAPI Server** if you:
|
||||
|
||||
- Need a standalone server
|
||||
- Prefer Python and FastAPI
|
||||
- Want to deploy to traditional hosting platforms
|
||||
|
||||
- Use the **Next.js Serverless** implementation if you:
|
||||
- Want serverless deployment
|
||||
- Prefer JavaScript/TypeScript
|
||||
- Already use Next.js and Vercel for other projects
|
||||
- Need quick scaling and zero maintenance
|
||||
|
||||
## Prerequisites
|
||||
|
||||
### Environment Variables
|
||||
|
||||
Both implementations require similar environment variables:
|
||||
|
||||
- `PIPECAT_CLOUD_API_KEY`: Pipecat Cloud API Key, begins with pk\_\*
|
||||
- `AGENT_NAME`: Your Daily agent name
|
||||
- `PINLESS_HMAC_SECRET`: Your HMAC secret for request verification
|
||||
- `LOG_LEVEL`: (Optional) Logging level (defaults to 'info')
|
||||
|
||||
See the individual README files in each implementation directory for
|
||||
specific setup instructions.
|
||||
|
||||
### Phone number setup
|
||||
|
||||
You can buy a phone number through the Pipecat Cloud Dashboard:
|
||||
|
||||
1. Go to `Settings` > `Telephony`
|
||||
2. Follow the UI to purchase a phone number
|
||||
3. Configure the webhook URL to receive incoming calls (e.g. `https://my-webhook-url.com/api/dial`)
|
||||
|
||||
Or purchase the number using Daily's
|
||||
[PhoneNumbers API](https://docs.daily.co/reference/rest-api/phone-numbers).
|
||||
|
||||
```bash
|
||||
curl --request POST \
|
||||
--url https://api.daily.co/v1/domain-dialin-config \
|
||||
--header 'Authorization: Bearer $TOKEN' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data-raw '{
|
||||
"type": "pinless_dialin",
|
||||
"name_prefix": "Customer1",
|
||||
"phone_number": "+1PURCHASED_NUM",
|
||||
"room_creation_api": "https://example.com/api/dial",
|
||||
"hold_music_url": "https://example.com/static/ringtone.mp3",
|
||||
"timeout_config": {
|
||||
"message": "No agent is available right now"
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
The API will return a static SIP URI (`sip_uri`) that can be called
|
||||
from other SIP services.
|
||||
|
||||
### `room_creation_api`
|
||||
|
||||
To make and receive calls currently you have to host a server that
|
||||
handles incoming calls. In the coming weeks, incoming calls will be
|
||||
directly handled within Daily and we will expose an endpoint similar
|
||||
to `{service}/start` that will manage this for you.
|
||||
|
||||
In the meantime, the server described below serves as the webhook
|
||||
handler for the `room_creation_api`. Configure your pinless phone
|
||||
number or SIP interconnect to the `ngrok` tunnel or
|
||||
the actual server URL, append `/api/dial` to the webhook URL.
|
||||
|
||||
## Example curl commands
|
||||
|
||||
Note: Replace `http://localhost:3000` with your actual server URL and
|
||||
phone numbers with valid values for your use case.
|
||||
|
||||
### Dialin Request
|
||||
|
||||
The server will receive a request when a call is received from Daily.
|
||||
|
||||
### Dialout Request
|
||||
|
||||
Dial a number, will use any purchased number
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:3000/api/dial \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"dialout_settings": [
|
||||
{
|
||||
"phoneNumber": "+1234567890",
|
||||
}
|
||||
]
|
||||
}'
|
||||
```
|
||||
|
||||
Dial a number with callerId, which is the UUID of a purchased number.
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:3000/api/dial \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"dialout_settings": [
|
||||
{
|
||||
"phoneNumber": "+1234567890",
|
||||
"callerId": "purchased_phone_uuid"
|
||||
}
|
||||
]
|
||||
}'
|
||||
```
|
||||
|
||||
Dial a number
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:3000/api/dial \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"dialout_settings": [
|
||||
{
|
||||
"phoneNumber": "+1234567890",
|
||||
"callerId": "purchased_phone_uuid"
|
||||
}
|
||||
]
|
||||
}'
|
||||
```
|
||||
|
||||
### Advanced Request with Voicemail Detection
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:3000/api/dial \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"To": "+1234567890",
|
||||
"From": "+1987654321",
|
||||
"callId": "call-uuid-123",
|
||||
"callDomain": "domain-uuid-456",
|
||||
"dialout_settings": [
|
||||
{
|
||||
"phoneNumber": "+1234567890",
|
||||
"callerId": "purchased_phone_uuid"
|
||||
}
|
||||
],
|
||||
"voicemail_detection": {
|
||||
"testInPrebuilt": true
|
||||
},
|
||||
"call_transfer": {
|
||||
"mode": "dialout",
|
||||
"speakSummary": true,
|
||||
"storeSummary": true,
|
||||
"operatorNumber": "+1234567890",
|
||||
"testInPrebuilt": true
|
||||
}
|
||||
}'
|
||||
```
|
||||
@@ -0,0 +1,98 @@
|
||||
# FastAPI server for handling Daily PSTN/SIP Webhook
|
||||
|
||||
A FastAPI server that handles PSTN (Public Switched Telephone Network) and SIP (Session Initiation Protocol) calls using the Daily API.
|
||||
|
||||
## Setup
|
||||
|
||||
1. Clone the repository
|
||||
|
||||
2. Navigate to the `fastapi-webhook-server` directory:
|
||||
|
||||
```bash
|
||||
cd fastapi-webhook-server
|
||||
```
|
||||
|
||||
3. Install dependencies:
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
4. Copy `env.example` to `.env`:
|
||||
|
||||
```bash
|
||||
cp env.example .env
|
||||
```
|
||||
|
||||
5. Update `.env` with your credentials:
|
||||
|
||||
- `AGENT_NAME`: Your Daily agent name
|
||||
- `PIPECAT_CLOUD_API_KEY`: Your Daily API key
|
||||
- `PINLESS_HMAC_SECRET`: Your HMAC secret for request verification
|
||||
|
||||
## Running the Server
|
||||
|
||||
Start the server:
|
||||
|
||||
```bash
|
||||
python server.py
|
||||
```
|
||||
|
||||
The server will run on `http://localhost:7860` and you can expose it via ngrok for testing:
|
||||
|
||||
```bash
|
||||
`ngrok http 7860`
|
||||
```
|
||||
|
||||
> Tip: Use a subdomain for a consistent URL (e.g. `ngrok http -subdomain=mydomain http://localhost:7860`)
|
||||
|
||||
## API Endpoints
|
||||
|
||||
### GET /
|
||||
|
||||
Health check endpoint that returns a "Hello, World!" message.
|
||||
|
||||
### POST /api/dial
|
||||
|
||||
Initiates a PSTN/SIP call with the following request body format:
|
||||
|
||||
```json
|
||||
{
|
||||
"To": "+14152251493",
|
||||
"From": "+14158483432",
|
||||
"callId": "string-contains-uuid",
|
||||
"callDomain": "string-contains-uuid",
|
||||
"dialout_settings": [
|
||||
{
|
||||
"phoneNumber": "+14158483432",
|
||||
"callerId": "+14152251493"
|
||||
}
|
||||
],
|
||||
"voicemail_detection": {
|
||||
"testInPrebuilt": true
|
||||
},
|
||||
"call_transfer": {
|
||||
"mode": "dialout",
|
||||
"speakSummary": true,
|
||||
"storeSummary": true,
|
||||
"operatorNumber": "+14152250006",
|
||||
"testInPrebuilt": true
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
#### Response
|
||||
|
||||
Returns a JSON object containing:
|
||||
|
||||
- `status`: Success/failure status
|
||||
- `data`: Response from Daily API
|
||||
- `room_properties`: Properties of the created Daily room
|
||||
|
||||
## Error Handling
|
||||
|
||||
- 401: Invalid signature
|
||||
- 400: Invalid authorization header (e.g. missing Daily API key in bot.py)
|
||||
- 405: Method not allowed (e.g. incorrect route on the webhook URL)
|
||||
- 500: Server errors (missing API key, network issues)
|
||||
- Other status codes are passed through from the Daily API
|
||||
@@ -0,0 +1,3 @@
|
||||
AGENT_NAME="your-agent-name"
|
||||
PIPECAT_CLOUD_API_KEY="your-daily-api-key"
|
||||
PINLESS_HMAC_SECRET="hmac-secret-pinless-dialin"
|
||||
@@ -0,0 +1,6 @@
|
||||
fastapi
|
||||
uvicorn
|
||||
python-dotenv
|
||||
requests
|
||||
pydantic
|
||||
loguru
|
||||
@@ -0,0 +1,201 @@
|
||||
#
|
||||
# Copyright (c) 2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
# server.py
|
||||
|
||||
|
||||
import base64 # for calculating hmac signature
|
||||
import hmac
|
||||
import os # for accessing environment variables
|
||||
import time # for setting expiration time
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import requests
|
||||
from dotenv import load_dotenv
|
||||
from fastapi import FastAPI, HTTPException, Request
|
||||
from loguru import logger
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
|
||||
class RoomRequest(BaseModel):
|
||||
test: Optional[str] = Field(None, alias="Test", description="Test field")
|
||||
To: Optional[str] = Field(None, alias="to", description="Destination phone number")
|
||||
From: Optional[str] = Field(None, alias="from", description="Source phone number")
|
||||
callId: Optional[str] = Field(None, alias="call_id", description="Unique call identifier")
|
||||
callDomain: Optional[str] = Field(
|
||||
None, alias="call_domain", description="Call domain identifier"
|
||||
)
|
||||
dialout_settings: Optional[List[Dict[str, Any]]] = Field(
|
||||
None, description="An array of phone numbers or SIP URIs to dialout to"
|
||||
)
|
||||
voicemail_detection: Optional[Dict[str, Any]] = Field(
|
||||
None, description="A flag to perform voicemail or answeing-machine detection"
|
||||
)
|
||||
call_transfer: Optional[Dict[str, Any]] = Field(None, description="to initiate a call transfer")
|
||||
|
||||
class Config:
|
||||
populate_by_name = True
|
||||
alias_generator = None
|
||||
|
||||
|
||||
"""
|
||||
body can contain any fields, but for handling PSTN/SIP,
|
||||
we recommend sending the following custom values:
|
||||
dialin, dialout, voicemail detection, and call transfer
|
||||
|
||||
|
||||
"To": "+14152251493",
|
||||
"From": "+14158483432",
|
||||
"callId": "string-contains-uuid",
|
||||
"callDomain": "string-contains-uuid"
|
||||
These need to be remapped to dialin_settings
|
||||
|
||||
"dialout_settings": [
|
||||
{"phoneNumber": "+14158483432", "callerId": "+14152251493"},
|
||||
{"sipUri": "sip:username@sip.hostname"}
|
||||
],
|
||||
},
|
||||
|
||||
voicemail_detection:{
|
||||
testInPrebuilt: true
|
||||
},
|
||||
|
||||
"call_transfer": {
|
||||
"mode": "dialout",
|
||||
"speakSummary": true,
|
||||
"storeSummary": true,
|
||||
"operatorNumber": "+14152250006",
|
||||
"testInPrebuilt": true
|
||||
}
|
||||
"""
|
||||
|
||||
|
||||
@app.get("/")
|
||||
async def read_root():
|
||||
return {"message": "Hello, World!"}
|
||||
|
||||
|
||||
@app.post("/api/dial")
|
||||
async def dial(request: RoomRequest, raw_request: Request):
|
||||
logger.info("Incoming request to /dial:")
|
||||
logger.info(f"Headers: {dict(raw_request.headers)}")
|
||||
raw_body = await raw_request.body()
|
||||
raw_body_str = raw_body.decode()
|
||||
logger.info(f"Raw body: {raw_body_str}")
|
||||
logger.info(f"Parsed body: {request.dict()}")
|
||||
|
||||
# calculate signature and compare/verify
|
||||
hmac_secret = os.getenv("PINLESS_HMAC_SECRET")
|
||||
timestamp = raw_request.headers.get("x-pinless-timestamp")
|
||||
signature = raw_request.headers.get("x-pinless-signature")
|
||||
|
||||
if not hmac_secret:
|
||||
logger.debug("Skipping HMAC validation - PINLESS_HMAC_SECRET not set")
|
||||
elif timestamp and signature:
|
||||
message = timestamp + "." + raw_body_str
|
||||
|
||||
base64_decoded_secret = base64.b64decode(hmac_secret)
|
||||
computed_signature = base64.b64encode(
|
||||
hmac.new(base64_decoded_secret, message.encode(), "sha256").digest()
|
||||
).decode()
|
||||
|
||||
if computed_signature != signature:
|
||||
logger.error(f"Invalid signature. Expected {signature}, got {computed_signature}")
|
||||
raise HTTPException(status_code=401, detail="Invalid signature")
|
||||
else:
|
||||
logger.debug("Skipping HMAC validation - no signature headers present")
|
||||
|
||||
if request.test == "test":
|
||||
logger.debug("Test request received")
|
||||
return {"status": "success", "message": "Test request received"}
|
||||
|
||||
dialin_settings = None
|
||||
# these fields are camelCase in the request
|
||||
required_fields = ["To", "From", "callId", "callDomain"]
|
||||
if all(
|
||||
field in request.dict() and request.dict()[field] is not None for field in required_fields
|
||||
):
|
||||
# transform from camelCase to snake_case because daily-python expects snake_case
|
||||
dialin_settings = {
|
||||
"From": request.From,
|
||||
"To": request.To,
|
||||
"call_id": request.callId,
|
||||
"call_domain": request.callDomain,
|
||||
# transform from camelCase to snake_case
|
||||
}
|
||||
logger.debug(f"Populated dialin_settings from request: {dialin_settings}")
|
||||
|
||||
daily_room_properties = {
|
||||
"enable_dialout": request.dialout_settings is not None,
|
||||
}
|
||||
|
||||
if dialin_settings is not None:
|
||||
sip_config = {
|
||||
"display_name": request.From,
|
||||
"sip_mode": "dial-in",
|
||||
"num_endpoints": 2 if request.call_transfer is not None else 1,
|
||||
}
|
||||
daily_room_properties["sip"] = sip_config
|
||||
|
||||
# Setting default expiry to 5 minutes from now
|
||||
daily_room_properties["exp"] = int(time.time()) + (5 * 60)
|
||||
|
||||
logger.debug(f"Daily room properties: {daily_room_properties}")
|
||||
payload = {
|
||||
"createDailyRoom": True,
|
||||
"dailyRoomProperties": daily_room_properties,
|
||||
"body": {
|
||||
"dialin_settings": dialin_settings,
|
||||
"dialout_settings": request.dialout_settings,
|
||||
"voicemail_detection": request.voicemail_detection,
|
||||
"call_transfer": request.call_transfer,
|
||||
},
|
||||
}
|
||||
|
||||
pcc_api_key = os.getenv("PIPECAT_CLOUD_API_KEY")
|
||||
agent_name = os.getenv("AGENT_NAME", "my-first-agent")
|
||||
|
||||
if not pcc_api_key:
|
||||
raise HTTPException(status_code=500, detail="DAILY_API_KEY environment variable is not set")
|
||||
|
||||
headers = {"Authorization": f"Bearer {pcc_api_key}", "Content-Type": "application/json"}
|
||||
|
||||
url = f"https://api.pipecat.daily.co/v1/public/{agent_name}/start"
|
||||
|
||||
logger.debug(f"Making API call to Daily: {url} {headers} {payload}")
|
||||
|
||||
try:
|
||||
response = requests.post(url, json=payload, headers=headers)
|
||||
response.raise_for_status()
|
||||
response_data = response.json()
|
||||
logger.debug(f"Response: {response_data}")
|
||||
return {
|
||||
"status": "success",
|
||||
"data": response_data,
|
||||
"room_properties": daily_room_properties,
|
||||
}
|
||||
except requests.exceptions.HTTPError as e:
|
||||
# Pass through the status code and error details from the Daily API
|
||||
status_code = e.response.status_code
|
||||
error_detail = e.response.json() if e.response.content else str(e)
|
||||
logger.error(f"HTTP error: {error_detail}")
|
||||
raise HTTPException(status_code=status_code, detail=error_detail)
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.error(f"Request error: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
import uvicorn
|
||||
|
||||
uvicorn.run(app, host="0.0.0.0", port=7860)
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Server stopped manually")
|
||||
53
examples/deployment/pipecat-cloud-daily-pstn-server/nextjs-webhook-server/.gitignore
vendored
Normal file
53
examples/deployment/pipecat-cloud-daily-pstn-server/nextjs-webhook-server/.gitignore
vendored
Normal file
@@ -0,0 +1,53 @@
|
||||
# dependencies
|
||||
/node_modules
|
||||
/.pnp
|
||||
.pnp.js
|
||||
|
||||
# testing
|
||||
/coverage
|
||||
|
||||
# next.js
|
||||
/.next/
|
||||
/out/
|
||||
|
||||
# production
|
||||
/build
|
||||
|
||||
# misc
|
||||
.DS_Store
|
||||
*.pem
|
||||
|
||||
# debug
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
.pnpm-debug.log*
|
||||
|
||||
# local env files
|
||||
.env*.local
|
||||
|
||||
# vercel
|
||||
.vercel
|
||||
|
||||
# typescript
|
||||
*.tsbuildinfo
|
||||
next-env.d.ts
|
||||
|
||||
# IDE specific files
|
||||
.idea/
|
||||
.vscode/
|
||||
*.swp
|
||||
*.swo
|
||||
|
||||
# Logs
|
||||
logs
|
||||
*.log
|
||||
|
||||
# OS generated files
|
||||
.DS_Store
|
||||
.DS_Store?
|
||||
._*
|
||||
.Spotlight-V100
|
||||
.Trashes
|
||||
ehthumbs.db
|
||||
Thumbs.db
|
||||
@@ -0,0 +1,115 @@
|
||||
# Next.js server for handling Daily PSTN/SIP Webhook
|
||||
|
||||
Next.js API routes for handling Daily PSTN/SIP Pipecat requests.
|
||||
|
||||
## Features
|
||||
|
||||
- API endpoint for handling Daily PSTN/SIP Pipecat requests
|
||||
- HMAC signature validation
|
||||
- Structured logging with Pino
|
||||
- Support for dial-in and dial-out settings
|
||||
- Voicemail detection and call transfer functionality
|
||||
- Test request handling
|
||||
|
||||
## Setup
|
||||
|
||||
1. Clone the repository
|
||||
|
||||
2. Navigate to the `nextjs-webhook-server` directory:
|
||||
|
||||
```bash
|
||||
cd nextjs-webhook-server
|
||||
```
|
||||
|
||||
3. Install dependencies:
|
||||
|
||||
```bash
|
||||
npm install
|
||||
```
|
||||
|
||||
4. Create `.env.local` file with your credentials:
|
||||
|
||||
```bash
|
||||
cp env.local.example .env.local
|
||||
```
|
||||
|
||||
5. Update your `.env` with your secrets:
|
||||
|
||||
```bash
|
||||
PIPECAT_CLOUD_API_KEY=pk_*
|
||||
AGENT_NAME=my-first-agent
|
||||
PINLESS_HMAC_SECRET=your_hmac_secret
|
||||
LOG_LEVEL=info
|
||||
```
|
||||
|
||||
### Running the server
|
||||
|
||||
Run the development server:
|
||||
|
||||
```bash
|
||||
npm run dev
|
||||
```
|
||||
|
||||
The server will run on `http://localhost:7860` and you can expose it via ngrok for testing:
|
||||
|
||||
```bash
|
||||
`ngrok http 7860`
|
||||
```
|
||||
|
||||
> Tip: Use a subdomain for a consistent URL (e.g. `ngrok http -subdomain=mydomain http://localhost:7860`)
|
||||
|
||||
## API Endpoints
|
||||
|
||||
### GET /api
|
||||
|
||||
Returns a simple "Hello, World!" message with a cute cat emoji to verify the server is running.
|
||||
|
||||
### POST /api/dial
|
||||
|
||||
Handles dial-in and dial-out requests for Pipecat Cloud.
|
||||
|
||||
#### Test Requests
|
||||
|
||||
The endpoint handles test requests when a webhook is configured. Send a request with `"Test": "test"` to verify your setup:
|
||||
|
||||
```json
|
||||
{
|
||||
"Test": "test"
|
||||
}
|
||||
```
|
||||
|
||||
#### Production Request Format
|
||||
|
||||
```json
|
||||
{
|
||||
// for dial-in from webhook
|
||||
"To": "+14152251493",
|
||||
"From": "+14158483432",
|
||||
"callId": "string-contains-uuid",
|
||||
"callDomain": "string-contains-uuid",
|
||||
// for making a dial out to a phone or SIP
|
||||
"dialout_settings": [
|
||||
{ "phoneNumber": "+14158483432", "callerId": "purchased_phone_uuid" },
|
||||
{ "sipUri": "sip:username@sip.hostname.com" }
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Deployment
|
||||
|
||||
The application is configured for Vercel deployment:
|
||||
|
||||
1. Push your code to a Git repository
|
||||
2. Import your project in Vercel dashboard
|
||||
3. Configure environment variables:
|
||||
- `PIPECAT_CLOUD_API_KEY`
|
||||
- `AGENT_NAME`
|
||||
- `PINLESS_HMAC_SECRET`
|
||||
- `LOG_LEVEL` (optional, defaults to 'info')
|
||||
4. Deploy!
|
||||
|
||||
## Security
|
||||
|
||||
- HMAC signature validation for request authentication
|
||||
- Environment variables for sensitive credentials
|
||||
- Method validation (POST only for /dial)
|
||||
@@ -0,0 +1,4 @@
|
||||
AGENT_NAME=my-first-agent
|
||||
PIPECAT_CLOUD_API_KEY=your_daily_api_key
|
||||
PINLESS_HMAC_SECRET=your_hmac_secret
|
||||
LOG_LEVEL="info"
|
||||
5447
examples/deployment/pipecat-cloud-daily-pstn-server/nextjs-webhook-server/package-lock.json
generated
Normal file
5447
examples/deployment/pipecat-cloud-daily-pstn-server/nextjs-webhook-server/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"name": "my-daily-app",
|
||||
"version": "0.1.0",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"dev": "next dev -p 7860",
|
||||
"build": "next build",
|
||||
"start": "next start -p 7860",
|
||||
"lint": "next lint"
|
||||
},
|
||||
"dependencies": {
|
||||
"axios": "^1.6.0",
|
||||
"next": "^14.0.0",
|
||||
"pino": "^8.15.0",
|
||||
"react": "^18.2.0",
|
||||
"react-dom": "^18.2.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"eslint": "^8.46.0",
|
||||
"eslint-config-next": "^14.0.0"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,175 @@
|
||||
import { logger } from '../../lib/utils';
|
||||
import axios from 'axios';
|
||||
import crypto from 'crypto';
|
||||
|
||||
const validateSignature = (body, signature, timestamp, secret) => {
|
||||
// Skip if any required fields are missing
|
||||
if (!signature || !timestamp || !secret) {
|
||||
logger.warn('Missing required fields for HMAC validation');
|
||||
return true;
|
||||
}
|
||||
|
||||
try {
|
||||
const decodedSecret = Buffer.from(secret, 'base64');
|
||||
const hmac = crypto.createHmac('sha256', decodedSecret);
|
||||
const signatureData = `${timestamp}.${body}`;
|
||||
const computedSignature = hmac.update(signatureData).digest('base64');
|
||||
|
||||
logger.debug('Signature validation:', {
|
||||
timestamp,
|
||||
signatureData: signatureData.substring(0, 50) + '...',
|
||||
computedSignature,
|
||||
receivedSignature: signature
|
||||
});
|
||||
|
||||
return computedSignature === signature;
|
||||
} catch (error) {
|
||||
logger.error('Error validating signature:', error);
|
||||
return true; // Allow request to proceed on error
|
||||
}
|
||||
};
|
||||
|
||||
export default async function handler(req, res) {
|
||||
// Only allow POST requests
|
||||
if (req.method !== 'POST') {
|
||||
return res.status(405).json({ error: 'Method not allowed' });
|
||||
}
|
||||
|
||||
try {
|
||||
logger.info('Incoming request to /api/dial:');
|
||||
logger.info(`Headers: ${JSON.stringify(req.headers)}`);
|
||||
|
||||
const rawBody = JSON.stringify(req.body);
|
||||
logger.info(`Raw body: ${rawBody}`);
|
||||
|
||||
const signature = req.headers['x-pinless-signature'];
|
||||
const timestamp = req.headers['x-pinless-timestamp'];
|
||||
|
||||
if (signature && timestamp) {
|
||||
logger.info('Validating HMAC signature');
|
||||
if (!validateSignature(rawBody, signature, timestamp, process.env.PINLESS_HMAC_SECRET)) {
|
||||
logger.error('Invalid HMAC signature', { signature, timestamp });
|
||||
return res.status(401).json({
|
||||
error: 'Invalid signature',
|
||||
message: 'Invalid HMAC signature'
|
||||
});
|
||||
}
|
||||
} else {
|
||||
logger.info('Skipping HMAC validation - no signature headers present');
|
||||
}
|
||||
|
||||
// Extract request data
|
||||
const {
|
||||
Test: test,
|
||||
To,
|
||||
From,
|
||||
callId,
|
||||
callDomain,
|
||||
dialout_settings,
|
||||
voicemail_detection,
|
||||
call_transfer
|
||||
} = req.body;
|
||||
|
||||
// Handle test requests when a webhook is configured
|
||||
if (test === 'test') {
|
||||
logger.debug('Test request received');
|
||||
return res.status(200).json({ status: 'success', message: 'Test request received' });
|
||||
}
|
||||
|
||||
// Process dialin settings
|
||||
let dialin_settings = null;
|
||||
const requiredFields = ['To', 'From', 'callId', 'callDomain'];
|
||||
|
||||
if (requiredFields.every(field => req.body[field] !== undefined && req.body[field] !== null)) {
|
||||
dialin_settings = {
|
||||
// snake_case because pipecat expects this format
|
||||
From,
|
||||
To,
|
||||
call_id: callId,
|
||||
call_domain: callDomain,
|
||||
};
|
||||
logger.debug(`Populated dialin_settings from request: ${JSON.stringify(dialin_settings)}`);
|
||||
}
|
||||
|
||||
// Set up Daily room properties
|
||||
const daily_room_properties = {
|
||||
enable_dialout: dialout_settings !== undefined && dialout_settings !== null,
|
||||
exp: Math.floor(Date.now() / 1000) + (5 * 60), // 5 minutes from now
|
||||
};
|
||||
|
||||
// Configure SIP if dialin settings are provided
|
||||
if (dialin_settings !== null) {
|
||||
const sip_config = {
|
||||
display_name: From,
|
||||
sip_mode: 'dial-in',
|
||||
num_endpoints: call_transfer !== null ? 2 : 1,
|
||||
};
|
||||
daily_room_properties.sip = sip_config;
|
||||
}
|
||||
|
||||
// Prepare payload for {service}/start API call
|
||||
const payload = {
|
||||
createDailyRoom: true,
|
||||
dailyRoomProperties: daily_room_properties,
|
||||
body: {
|
||||
dialin_settings,
|
||||
dialout_settings,
|
||||
voicemail_detection,
|
||||
call_transfer,
|
||||
},
|
||||
};
|
||||
|
||||
logger.debug(`Daily room properties: ${JSON.stringify(daily_room_properties)}`);
|
||||
|
||||
// Get Daily API key and agent name from environment variables
|
||||
const pccApiKey = process.env.PIPECAT_CLOUD_API_KEY;
|
||||
const agentName = process.env.AGENT_NAME || 'my-first-agent';
|
||||
|
||||
if (!pccApiKey) {
|
||||
throw new Error('PIPECAT_CLOUD_API_KEY environment variable is not set');
|
||||
}
|
||||
|
||||
// Set up headers for Daily API call
|
||||
const headers = {
|
||||
'Authorization': `Bearer ${pccApiKey}`,
|
||||
'Content-Type': 'application/json',
|
||||
};
|
||||
|
||||
const url = `https://api.pipecat.daily.co/v1/public/${agentName}/start`;
|
||||
logger.debug(`Making API call to Daily: ${url} ${JSON.stringify(headers)} ${JSON.stringify(payload)}`);
|
||||
|
||||
try {
|
||||
const response = await axios.post(url, payload, { headers });
|
||||
logger.debug(`Response: ${JSON.stringify(response.data)}`);
|
||||
|
||||
return res.status(200).json({
|
||||
status: 'success',
|
||||
data: response.data,
|
||||
room_properties: daily_room_properties,
|
||||
});
|
||||
} catch (error) {
|
||||
if (error.response) {
|
||||
// Pass through status code and error details from the Daily API
|
||||
const statusCode = error.response.status;
|
||||
const errorDetail = error.response.data || error.message;
|
||||
logger.error(`HTTP error: ${JSON.stringify(errorDetail)}`);
|
||||
return res.status(statusCode).json(errorDetail);
|
||||
} else {
|
||||
logger.error(`Request error: ${error.message}`);
|
||||
return res.status(500).json({ error: error.message });
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`Unexpected error: ${error.message}`);
|
||||
return res.status(500).json({ error: 'Internal server error', message: error.message });
|
||||
}
|
||||
}
|
||||
|
||||
// Configure body parser to preserve raw body text
|
||||
export const config = {
|
||||
api: {
|
||||
bodyParser: {
|
||||
sizeLimit: '1mb',
|
||||
},
|
||||
},
|
||||
};
|
||||
@@ -0,0 +1,6 @@
|
||||
import { logger } from '../../lib/utils';
|
||||
|
||||
export default function handler(req, res) {
|
||||
logger.info('Received request to /api');
|
||||
res.status(200).json({ message: 'Hello, World! from ᓚᘏᗢ' });
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
module.exports = {
|
||||
version: 2,
|
||||
buildCommand: "next build",
|
||||
outputDirectory: ".next",
|
||||
cleanUrls: true
|
||||
};
|
||||
@@ -17,8 +17,8 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
# Check if we're in local development mode
|
||||
|
||||
57
examples/foundational/01-say-one-thing-piper.py
Normal file
57
examples/foundational/01-say-one-thing-piper.py
Normal file
@@ -0,0 +1,57 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.frames.frames import EndFrame, TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.services.piper.tts import PiperTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url, None, "Say One Thing", DailyParams(audio_out_enabled=True)
|
||||
)
|
||||
|
||||
tts = PiperTTSService(
|
||||
base_url=os.getenv("PIPER_BASE_URL"), aiohttp_session=session, sample_rate=24000
|
||||
)
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
task = PipelineTask(Pipeline([tts, transport.output()]))
|
||||
|
||||
# Register an event handler so we can play the audio when the
|
||||
# participant joins.
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await task.queue_frames(
|
||||
[TTSSpeakFrame(f"Hello there, how are you today ?"), EndFrame()]
|
||||
)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -17,7 +17,7 @@ from pipecat.frames.frames import EndFrame, TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -15,7 +15,7 @@ from pipecat.frames.frames import EndFrame, TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.transports.local.audio import LocalAudioTransport, LocalAudioTransportParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -1,3 +1,9 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import os
|
||||
@@ -12,7 +18,7 @@ from pipecat.frames.frames import TextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.transports.services.livekit import LiveKitParams, LiveKitTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -17,7 +17,7 @@ from pipecat.frames.frames import EndFrame, TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.services.riva import FastPitchTTSService
|
||||
from pipecat.services.riva.tts import FastPitchTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -17,8 +17,8 @@ from pipecat.frames.frames import EndFrame, LLMMessagesFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -17,7 +17,7 @@ from pipecat.frames.frames import TextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.services.fal import FalImageGenService
|
||||
from pipecat.services.fal.image import FalImageGenService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -17,7 +17,7 @@ from pipecat.frames.frames import TextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.services.fal import FalImageGenService
|
||||
from pipecat.services.fal.image import FalImageGenService
|
||||
from pipecat.transports.local.tk import TkLocalTransport, TkTransportParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -17,7 +17,7 @@ from pipecat.frames.frames import EndFrame, TextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.services.google import GoogleImageGenService
|
||||
from pipecat.services.google.image import GoogleImageGenService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -27,9 +27,9 @@ from pipecat.pipeline.sync_parallel_pipeline import SyncParallelPipeline
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.sentence import SentenceAggregator
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.cartesia import CartesiaHttpTTSService
|
||||
from pipecat.services.fal import FalImageGenService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.cartesia.tts import CartesiaHttpTTSService
|
||||
from pipecat.services.fal.image import FalImageGenService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -27,9 +27,9 @@ from pipecat.pipeline.sync_parallel_pipeline import SyncParallelPipeline
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.sentence import SentenceAggregator
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.cartesia import CartesiaHttpTTSService
|
||||
from pipecat.services.fal import FalImageGenService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.cartesia.tts import CartesiaHttpTTSService
|
||||
from pipecat.services.fal.image import FalImageGenService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.local.tk import TkLocalTransport, TkTransportParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -26,8 +26,8 @@ from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -27,8 +27,8 @@ from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -18,8 +18,8 @@ from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.audio.vad.silero import SileroVAD
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -18,8 +18,8 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -18,8 +18,8 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.anthropic import AnthropicLLMService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.anthropic.llm import AnthropicLLMService
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -28,7 +28,7 @@ from pipecat.processors.aggregators.llm_response import (
|
||||
LLMUserResponseAggregator,
|
||||
)
|
||||
from pipecat.processors.frameworks.langchain import LangchainProcessor
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -24,8 +24,9 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.deepgram import DeepgramSTTService, DeepgramTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.deepgram.tts import DeepgramTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -18,8 +18,9 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.deepgram import DeepgramSTTService, DeepgramTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.deepgram.tts import DeepgramTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -18,8 +18,8 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.elevenlabs import ElevenLabsHttpTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.elevenlabs.tts import ElevenLabsHttpTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -18,8 +18,8 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -18,8 +18,8 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.playht import PlayHTHttpTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.services.playht.tts import PlayHTHttpTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -18,8 +18,8 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.playht import PlayHTTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.services.playht.tts import PlayHTTTSService
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
@@ -48,7 +48,7 @@ async def main():
|
||||
tts = PlayHTTTSService(
|
||||
user_id=os.getenv("PLAYHT_USER_ID"),
|
||||
api_key=os.getenv("PLAYHT_API_KEY"),
|
||||
voice_url="s3://voice-cloning-zero-shot/d9ff78ba-d016-47f6-b0ef-dd630f59414e/female-cs/manifest.json",
|
||||
voice_url="s3://voice-cloning-zero-shot/e46b4027-b38d-4d24-b292-38fbca2be0ef/original/manifest.json",
|
||||
params=PlayHTTTSService.InputParams(language=Language.EN),
|
||||
)
|
||||
|
||||
|
||||
@@ -18,7 +18,9 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.azure import AzureLLMService, AzureSTTService, AzureTTSService
|
||||
from pipecat.services.azure.llm import AzureLLMService
|
||||
from pipecat.services.azure.stt import AzureSTTService
|
||||
from pipecat.services.azure.tts import AzureTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -18,7 +18,9 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.openai import OpenAILLMService, OpenAISTTService, OpenAITTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.services.openai.stt import OpenAISTTService
|
||||
from pipecat.services.openai.tts import OpenAITTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
@@ -57,7 +59,7 @@ async def main():
|
||||
prompt="Expect words related to dogs, such as breed names.",
|
||||
)
|
||||
|
||||
tts = OpenAITTSService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o-mini-tts-latest")
|
||||
tts = OpenAITTSService(api_key=os.getenv("OPENAI_API_KEY"), voice="ballad")
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
|
||||
@@ -19,8 +19,8 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openpipe import OpenPipeLLMService
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.openpipe.llm import OpenPipeLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -18,8 +18,8 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.xtts import XTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.services.xtts.tts import XTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -18,9 +18,11 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.gladia import GladiaSTTService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.gladia.config import GladiaInputParams, LanguageConfig
|
||||
from pipecat.services.gladia.stt import GladiaSTTService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
@@ -47,6 +49,11 @@ async def main():
|
||||
|
||||
stt = GladiaSTTService(
|
||||
api_key=os.getenv("GLADIA_API_KEY"),
|
||||
params=GladiaInputParams(
|
||||
language_config=LanguageConfig(
|
||||
languages=[Language.EN],
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
|
||||
@@ -18,8 +18,8 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.lmnt import LmntTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.lmnt.tts import LmntTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -17,9 +17,9 @@ from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.services.ai_services import OpenAILLMContext
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.together import TogetherLLMService
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.together.llm import TogetherLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -18,9 +18,9 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.aws import PollyTTSService
|
||||
from pipecat.services.deepgram import DeepgramSTTService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.aws.tts import PollyTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -18,7 +18,9 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.google import GoogleLLMService, GoogleSTTService, GoogleTTSService
|
||||
from pipecat.services.google.llm import GoogleLLMService
|
||||
from pipecat.services.google.stt import GoogleSTTService
|
||||
from pipecat.services.google.tts import GoogleTTSService
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
|
||||
@@ -18,9 +18,9 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.assemblyai import AssemblyAISTTService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.assemblyai.stt import AssemblyAISTTService
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -14,14 +14,15 @@ from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.audio.filters.krisp_filter import KrispFilter
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.deepgram import DeepgramSTTService, DeepgramTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.deepgram.tts import DeepgramTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
from pipecat.vad.silero import SileroVADAnalyzer
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
@@ -18,8 +18,8 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.rime import RimeHttpTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.services.rime.tts import RimeHttpTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -18,8 +18,8 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.rime import RimeTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.services.rime.tts import RimeTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -18,8 +18,9 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.nim import NimLLMService
|
||||
from pipecat.services.riva import FastPitchTTSService, ParakeetSTTService
|
||||
from pipecat.services.nim.llm import NimLLMService
|
||||
from pipecat.services.riva.stt import ParakeetSTTService
|
||||
from pipecat.services.riva.tts import FastPitchTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -32,8 +32,8 @@ from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.frame_processor import FrameProcessor
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.google import GoogleLLMService
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.google.llm import GoogleLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -18,8 +18,8 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.fish import FishAudioTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.fish.tts import FishAudioTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -18,8 +18,8 @@ from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.ultravox import UltravoxSTTService
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.ultravox.stt import UltravoxSTTService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
@@ -34,7 +34,7 @@ logger.add(sys.stderr, level="DEBUG")
|
||||
# Want to initialize the ultravox processor since it takes time to load the model and dont
|
||||
# want to load it every time the pipeline is run
|
||||
ultravox_processor = UltravoxSTTService(
|
||||
model_size="fixie-ai/ultravox-v0_4_1-llama-3_1-8b",
|
||||
model_name="fixie-ai/ultravox-v0_5-llama-3_1-8b",
|
||||
hf_token=os.getenv("HF_TOKEN"),
|
||||
)
|
||||
|
||||
|
||||
@@ -18,8 +18,8 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.neuphonic import NeuphonicHttpTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.neuphonic.tts import NeuphonicHttpTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -18,8 +18,8 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.neuphonic import NeuphonicTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.neuphonic.tts import NeuphonicTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -18,10 +18,9 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.fal import FalSTTService
|
||||
from pipecat.services.gladia import GladiaSTTService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.fal.stt import FalSTTService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -16,9 +16,9 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.deepgram import DeepgramSTTService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.local.audio import LocalAudioTransport, LocalAudioTransportParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
103
examples/foundational/07y-interruptible-groq.py
Normal file
103
examples/foundational/07y-interruptible-groq.py
Normal file
@@ -0,0 +1,103 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.groq.llm import GroqLLMService
|
||||
from pipecat.services.groq.stt import GroqSTTService
|
||||
from pipecat.services.groq.tts import GroqTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
# transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_audio_passthrough=True,
|
||||
),
|
||||
)
|
||||
|
||||
stt = GroqSTTService(api_key=os.getenv("GROQ_API_KEY"))
|
||||
|
||||
llm = GroqLLMService(api_key=os.getenv("GROQ_API_KEY"), model="llama-3.3-70b-versatile")
|
||||
|
||||
tts = GroqTTSService(api_key=os.getenv("GROQ_API_KEY"))
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt,
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -19,8 +19,8 @@ from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.filters.wake_check_filter import WakeCheckFilter
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -29,8 +29,8 @@ from pipecat.processors.aggregators.openai_llm_context import (
|
||||
)
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.processors.logger import FrameLogger
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -22,8 +22,8 @@ from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.user_response import UserResponseAggregator
|
||||
from pipecat.processors.aggregators.vision_image_frame import VisionImageFrameAggregator
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.moondream import MoondreamService
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.moondream.vision import MoondreamService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -22,8 +22,8 @@ from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.user_response import UserResponseAggregator
|
||||
from pipecat.processors.aggregators.vision_image_frame import VisionImageFrameAggregator
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.google import GoogleLLMService
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.google.llm import GoogleLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -22,8 +22,8 @@ from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.user_response import UserResponseAggregator
|
||||
from pipecat.processors.aggregators.vision_image_frame import VisionImageFrameAggregator
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -22,8 +22,8 @@ from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.user_response import UserResponseAggregator
|
||||
from pipecat.processors.aggregators.vision_image_frame import VisionImageFrameAggregator
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.anthropic import AnthropicLLMService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.anthropic.llm import AnthropicLLMService
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -12,12 +12,13 @@ from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import Frame, TranscriptionFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.whisper import WhisperSTTService
|
||||
from pipecat.services.whisper.stt import WhisperSTTService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
@@ -39,7 +40,15 @@ async def main():
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url, None, "Transcription bot", DailyParams(audio_in_enabled=True)
|
||||
room_url,
|
||||
None,
|
||||
"Transcription bot",
|
||||
DailyParams(
|
||||
audio_in_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_audio_passthrough=True,
|
||||
),
|
||||
)
|
||||
|
||||
stt = WhisperSTTService()
|
||||
|
||||
@@ -10,12 +10,13 @@ import sys
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import Frame, TranscriptionFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.whisper import WhisperSTTService
|
||||
from pipecat.services.whisper.stt import WhisperSTTService
|
||||
from pipecat.transports.local.audio import LocalAudioTransport, LocalAudioTransportParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
@@ -33,7 +34,14 @@ class TranscriptionLogger(FrameProcessor):
|
||||
|
||||
|
||||
async def main():
|
||||
transport = LocalAudioTransport(LocalAudioTransportParams(audio_in_enabled=True))
|
||||
transport = LocalAudioTransport(
|
||||
LocalAudioTransportParams(
|
||||
audio_in_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_audio_passthrough=True,
|
||||
)
|
||||
)
|
||||
|
||||
stt = WhisperSTTService()
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.deepgram import DeepgramSTTService, Language, LiveOptions
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService, Language, LiveOptions
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
@@ -45,7 +45,7 @@ async def main():
|
||||
|
||||
stt = DeepgramSTTService(
|
||||
api_key=os.getenv("DEEPGRAM_API_KEY"),
|
||||
# live_options=LiveOptions(language=Language.FR),
|
||||
# live_options=LiveOptions(language=Language.FR),
|
||||
)
|
||||
|
||||
tl = TranscriptionLogger()
|
||||
|
||||
@@ -18,7 +18,7 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.assemblyai import AssemblyAISTTService
|
||||
from pipecat.services.assemblyai.stt import AssemblyAISTTService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
95
examples/foundational/13e-whisper-mlx.py
Normal file
95
examples/foundational/13e-whisper-mlx.py
Normal file
@@ -0,0 +1,95 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
import time
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import Frame, TranscriptionFrame, UserStoppedSpeakingFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.whisper.stt import MLXModel, WhisperSTTServiceMLX
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
STOP_SECS = 2.0
|
||||
|
||||
|
||||
class TranscriptionLogger(FrameProcessor):
|
||||
"""Measures transcription latency.
|
||||
|
||||
Uses the (intentionally) long STOP_SECS parameter to give the transcription time to finish,
|
||||
then outputs the timing between when the VAD first classified audio input as not-speech and
|
||||
the delivery of the last transcription frame.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self._last_transcription_time = time.time()
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, UserStoppedSpeakingFrame):
|
||||
logger.debug(
|
||||
f"Transcription latency: {(STOP_SECS - (time.time() - self._last_transcription_time)):.2f}"
|
||||
)
|
||||
|
||||
if isinstance(frame, TranscriptionFrame):
|
||||
self._last_transcription_time = time.time()
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, _) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
None,
|
||||
"Transcription bot",
|
||||
DailyParams(
|
||||
audio_in_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=STOP_SECS)),
|
||||
vad_audio_passthrough=True,
|
||||
),
|
||||
)
|
||||
|
||||
stt = WhisperSTTServiceMLX(model=MLXModel.LARGE_V3_TURBO)
|
||||
|
||||
tl = TranscriptionLogger()
|
||||
|
||||
pipeline = Pipeline([transport.input(), stt, tl])
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
report_only_initial_ttfb=False,
|
||||
),
|
||||
)
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -20,8 +20,9 @@ from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMContext, OpenAILLMService
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -20,8 +20,8 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.anthropic import AnthropicLLMService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.anthropic.llm import AnthropicLLMService
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -20,8 +20,8 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.anthropic import AnthropicLLMService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.anthropic.llm import AnthropicLLMService
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -20,9 +20,9 @@ from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMContext
|
||||
from pipecat.services.together import TogetherLLMService
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.together.llm import TogetherLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -19,8 +19,9 @@ from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMContext, OpenAILLMService
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -20,9 +20,9 @@ from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.google import GoogleLLMService
|
||||
from pipecat.services.openai import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.google.llm import GoogleLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -20,9 +20,10 @@ from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.groq import GroqLLMService, GroqSTTService
|
||||
from pipecat.services.openai import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.groq.llm import GroqLLMService
|
||||
from pipecat.services.groq.stt import GroqSTTService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -19,9 +19,9 @@ from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.grok import GrokLLMService
|
||||
from pipecat.services.openai import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.grok.llm import GrokLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -20,9 +20,9 @@ from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.services.azure import AzureLLMService
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.azure.llm import AzureLLMService
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -20,9 +20,9 @@ from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.fireworks import FireworksLLMService
|
||||
from pipecat.services.openai import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.fireworks.llm import FireworksLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -20,9 +20,9 @@ from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.nim import NimLLMService
|
||||
from pipecat.services.openai import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.nim.llm import NimLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -20,9 +20,9 @@ from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.cerebras import CerebrasLLMService
|
||||
from pipecat.services.openai import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.cerebras.llm import CerebrasLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -20,9 +20,9 @@ from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.deepseek import DeepSeekLLMService
|
||||
from pipecat.services.openai import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepseek.llm import DeepSeekLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -21,8 +21,8 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.azure import AzureTTSService
|
||||
from pipecat.services.openrouter import OpenRouterLLMService
|
||||
from pipecat.services.azure.tts import AzureTTSService
|
||||
from pipecat.services.openrouter.llm import OpenRouterLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -18,17 +18,15 @@ import sys
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from openai.types.chat import ChatCompletionToolParam
|
||||
from runner import configure
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMContext, OpenAILLMService
|
||||
from pipecat.services.perplexity import PerplexityLLMService
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.perplexity.llm import PerplexityLLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -20,9 +20,9 @@ from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.google import GoogleLLMOpenAIBetaService
|
||||
from pipecat.services.openai import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.elevenlabs.tts import ElevenLabsTTSService
|
||||
from pipecat.services.google.llm_openai import GoogleLLMOpenAIBetaService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user