Compare commits
212 Commits
v0.0.107
...
aleix/para
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
644babe241 | ||
|
|
b33df03724 | ||
|
|
28fbe1db08 | ||
|
|
9240e92d9f | ||
|
|
5caf53f086 | ||
|
|
ac2716811c | ||
|
|
d313d56776 | ||
|
|
159776f106 | ||
|
|
a23803478f | ||
|
|
bae193ab4d | ||
|
|
04adb697be | ||
|
|
4f9c8a6860 | ||
|
|
a1a29b3933 | ||
|
|
0798803c70 | ||
|
|
6422661d08 | ||
|
|
ed94b65d83 | ||
|
|
f9670b9601 | ||
|
|
5b2991f47f | ||
|
|
fc3186dc0d | ||
|
|
1808b447c9 | ||
|
|
70df9d3fe4 | ||
|
|
a8bfc23d3a | ||
|
|
e2870fc2ac | ||
|
|
e851f8c1d5 | ||
|
|
b31bece617 | ||
|
|
9e350bcc2f | ||
|
|
9c2594c484 | ||
|
|
900fc88430 | ||
|
|
4ef5ac6f0c | ||
|
|
cbb3d99493 | ||
|
|
fb1996cedc | ||
|
|
95c55ec6c3 | ||
|
|
a45de9af7f | ||
|
|
5e61a57582 | ||
|
|
d8b0ed18fd | ||
|
|
789275a57b | ||
|
|
38c961a363 | ||
|
|
41a86a51bf | ||
|
|
e1bfa4cf21 | ||
|
|
537d57449e | ||
|
|
33e146decd | ||
|
|
eee47deb34 | ||
|
|
21a729ae5d | ||
|
|
1870f4010e | ||
|
|
28683a7296 | ||
|
|
0e504d876d | ||
|
|
5c51981207 | ||
|
|
a13c4d1248 | ||
|
|
ca1b4ad124 | ||
|
|
533dcdba3f | ||
|
|
7eec03cb77 | ||
|
|
83911dced6 | ||
|
|
4e4a8c45d5 | ||
|
|
9c6d51c570 | ||
|
|
9152d85824 | ||
|
|
6a87d0e87d | ||
|
|
fe0633ecd1 | ||
|
|
ca2bfd6f12 | ||
|
|
345ccc0abe | ||
|
|
800fd6a916 | ||
|
|
d286991257 | ||
|
|
a06bf47ed2 | ||
|
|
5ad4aa9bea | ||
|
|
c4466ba678 | ||
|
|
df602b900d | ||
|
|
c331c75d66 | ||
|
|
f7ec6befe1 | ||
|
|
6a6ee8d563 | ||
|
|
259f5e124c | ||
|
|
cfe91d11ec | ||
|
|
467184e63e | ||
|
|
af566ac936 | ||
|
|
62484a4fc3 | ||
|
|
7fef3b01eb | ||
|
|
6d1918f12a | ||
|
|
e58740e948 | ||
|
|
ddfe44940d | ||
|
|
fdbdbc8be3 | ||
|
|
3cd7d882fb | ||
|
|
2d78533d77 | ||
|
|
c1dd44f947 | ||
|
|
9db15e7942 | ||
|
|
503e5e9106 | ||
|
|
2ff4b3f4a3 | ||
|
|
b4096f9a11 | ||
|
|
c4253a7d98 | ||
|
|
2441c4f801 | ||
|
|
a7a55dd30e | ||
|
|
de6a7223ba | ||
|
|
165932e1cc | ||
|
|
1f0d9ad01a | ||
|
|
052075c244 | ||
|
|
a8d0e1de9f | ||
|
|
4f0b2066c0 | ||
|
|
413dbaf974 | ||
|
|
5645909d34 | ||
|
|
da3f184316 | ||
|
|
e5a2723632 | ||
|
|
4ee4002d5d | ||
|
|
54a17ab1f3 | ||
|
|
1c99a537b2 | ||
|
|
ff5d055b3c | ||
|
|
adc003d6c7 | ||
|
|
bbd14de9c5 | ||
|
|
02b97035f8 | ||
|
|
f470ff193e | ||
|
|
7bc8b89a54 | ||
|
|
a8eff6fbbf | ||
|
|
86e086c6b5 | ||
|
|
4bdfe1cf31 | ||
|
|
bb33045389 | ||
|
|
ac2b1ecd47 | ||
|
|
e7dd84b552 | ||
|
|
39329aaddb | ||
|
|
56a56a4174 | ||
|
|
b80328e038 | ||
|
|
3a80be760b | ||
|
|
b66c892100 | ||
|
|
6c30371295 | ||
|
|
ddf6a41854 | ||
|
|
e0c49927cf | ||
|
|
45926a7135 | ||
|
|
8c678c1c98 | ||
|
|
4c121332cf | ||
|
|
74686f9190 | ||
|
|
19bcc8620c | ||
|
|
0530722c58 | ||
|
|
0d1b834770 | ||
|
|
7a0f7b58d1 | ||
|
|
5806a3f0fa | ||
|
|
27fabfc1b3 | ||
|
|
d779a5b4ea | ||
|
|
2bb36b5b66 | ||
|
|
e0bc9c73c6 | ||
|
|
2135557689 | ||
|
|
a0393b9af6 | ||
|
|
64ba013b68 | ||
|
|
7377d88cf5 | ||
|
|
3bbec0a2c8 | ||
|
|
e29a63e1ae | ||
|
|
45178972d7 | ||
|
|
bb7199d143 | ||
|
|
d4dea30407 | ||
|
|
b49bf1c83f | ||
|
|
1b0f7ecb0e | ||
|
|
8e57dd67a2 | ||
|
|
5d71de8aad | ||
|
|
dc56cb2ccc | ||
|
|
063955b7eb | ||
|
|
e05bd54743 | ||
|
|
35f52f70ab | ||
|
|
d05eb02b98 | ||
|
|
4abd4d031d | ||
|
|
7e42998e9e | ||
|
|
28eb4544d3 | ||
|
|
b45dcb1ae0 | ||
|
|
6eb988b729 | ||
|
|
f68b3222b3 | ||
|
|
3274235ea1 | ||
|
|
05b9c514fb | ||
|
|
03c0d7c345 | ||
|
|
0783edb185 | ||
|
|
51d28b4a9f | ||
|
|
cf083b8411 | ||
|
|
099814d74a | ||
|
|
dd45843c42 | ||
|
|
fe15d8654b | ||
|
|
68a440ae2e | ||
|
|
8109ab6135 | ||
|
|
f311a0b6e4 | ||
|
|
9df8985d60 | ||
|
|
b3a25e0ebe | ||
|
|
02cfb129d3 | ||
|
|
311afef7da | ||
|
|
5ed183d215 | ||
|
|
5c3d3aea2b | ||
|
|
0651569a4e | ||
|
|
bf04ea2043 | ||
|
|
aa0b49d69f | ||
|
|
8c6f4a8d7b | ||
|
|
bbaa5971c4 | ||
|
|
cdd8c3e5bb | ||
|
|
1c8a8f51d4 | ||
|
|
349b8645f3 | ||
|
|
696196e30c | ||
|
|
dacffccd3a | ||
|
|
f21b262969 | ||
|
|
42cab7eea0 | ||
|
|
483b643b07 | ||
|
|
d314e2831a | ||
|
|
0f6cc231cf | ||
|
|
844555c520 | ||
|
|
3428a4c6ad | ||
|
|
f8c7414ea7 | ||
|
|
f1f51de962 | ||
|
|
c32240e14b | ||
|
|
e9f3086ea3 | ||
|
|
05157129e2 | ||
|
|
4a0411cbc4 | ||
|
|
6cd39b8b42 | ||
|
|
38d7882f0f | ||
|
|
9a55eb67cf | ||
|
|
8a4f6b486e | ||
|
|
8745f20330 | ||
|
|
3e5be23bd8 | ||
|
|
33f042b500 | ||
|
|
0722784f3a | ||
|
|
cbc1c275b3 | ||
|
|
14ca70f13e | ||
|
|
f7568a91b1 | ||
|
|
dfe5fec8f9 | ||
|
|
dc0386937a |
@@ -65,12 +65,25 @@ Once your PR is submitted, post in the `#community-integrations` Discord channel
|
||||
|
||||
#### Websocket-based Services
|
||||
|
||||
**Base class:** `WebsocketSTTService`
|
||||
|
||||
**Use for:** Services where you manage the websocket connection directly. Combines `STTService` with `WebsocketService` for automatic reconnection and keepalive support.
|
||||
|
||||
**Examples:**
|
||||
|
||||
- [CartesiaSTTService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/cartesia/stt.py)
|
||||
- [ElevenLabsRealtimeSTTService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/elevenlabs/stt.py)
|
||||
|
||||
#### SDK-based Streaming Services
|
||||
|
||||
**Base class:** `STTService`
|
||||
|
||||
**Use for:** Streaming services where the provider's Python SDK manages the connection internally.
|
||||
|
||||
**Examples:**
|
||||
|
||||
- [DeepgramSTTService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/deepgram/stt.py)
|
||||
- [SpeechmaticsSTTService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/speechmatics/stt.py)
|
||||
- [GoogleSTTService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/google/stt.py)
|
||||
|
||||
#### File-based Services
|
||||
|
||||
@@ -108,55 +121,59 @@ Once your PR is submitted, post in the `#community-integrations` Discord channel
|
||||
|
||||
#### Key requirements:
|
||||
|
||||
- **Frame sequence:** Output must follow this frame sequence pattern:
|
||||
- `LLMFullResponseStartFrame` - Signals the start of an LLM response
|
||||
- `LLMTextFrame` - Contains LLM content, typically streamed as tokens
|
||||
- `LLMFullResponseEndFrame` - Signals the end of an LLM response
|
||||
- **`_process_context(self, context: LLMContext)`** — The main method that processes an LLM context and generates a response. Each LLM service overrides `process_frame` to extract context from `LLMContextFrame` and calls `_process_context`.
|
||||
|
||||
- **Context aggregation:** Implement context aggregation to collect user and assistant content:
|
||||
- Aggregators come in pairs with a `user()` instance and `assistant()` instance
|
||||
- Context must adhere to the `LLMContext` universal format
|
||||
- Aggregators should handle adding messages, function calls, and images to the context
|
||||
- **`adapter_class`** — Class attribute pointing to a `BaseLLMAdapter` subclass. Defaults to `OpenAILLMAdapter`. Non-OpenAI services must implement their own adapter (see `src/pipecat/adapters/base_llm_adapter.py`) with methods:
|
||||
- `get_llm_invocation_params(context)` — Extract provider-specific params from universal context
|
||||
- `to_provider_tools_format(tools_schema)` — Convert standard tools to provider format
|
||||
- `get_messages_for_logging(context)` — Format messages for logging
|
||||
- Reference adapters: `src/pipecat/adapters/services/` (anthropic, gemini, bedrock, etc.)
|
||||
|
||||
- **Frame sequence:** Output must follow this frame sequence pattern:
|
||||
- `LLMFullResponseStartFrame` — Signals the start of an LLM response
|
||||
- `LLMTextFrame` — Contains LLM content, typically streamed as tokens
|
||||
- `LLMFullResponseEndFrame` — Signals the end of an LLM response
|
||||
|
||||
- **Thought frames (reasoning models):** If the model supports extended thinking / chain-of-thought, emit thought frames alongside the response:
|
||||
- `LLMThoughtStartFrame` — Signals the start of a thought
|
||||
- `LLMThoughtTextFrame` — Contains thought content, streamed as tokens
|
||||
- `LLMThoughtEndFrame` — Signals the end of a thought
|
||||
|
||||
- **Context aggregation** is handled by the framework via `LLMContext` + `LLMContextAggregatorPair`. The LLM service just processes context it receives — no need to implement aggregators.
|
||||
|
||||
### TTS (Text-to-Speech) Services
|
||||
|
||||
#### AudioContextWordTTSService
|
||||
#### WebsocketTTSService
|
||||
|
||||
**Use for:** Websocket-based services supporting word/timestamp alignment
|
||||
**Use for:** Websocket-based streaming services (with or without word timestamps)
|
||||
|
||||
**Example:**
|
||||
**Examples:**
|
||||
|
||||
- [CartesiaTTSService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/cartesia/tts.py)
|
||||
- [ElevenLabsTTSService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/elevenlabs/tts.py)
|
||||
|
||||
#### InterruptibleTTSService
|
||||
|
||||
**Use for:** Websocket-based services without word/timestamp alignment, requiring disconnection on interruption
|
||||
**Use for:** Websocket-based services without word timestamps that reconnect on interruption (e.g. don't support a context ID or interruption message)
|
||||
|
||||
**Example:**
|
||||
|
||||
- [SarvamTTSService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/sarvam/tts.py)
|
||||
|
||||
#### WordTTSService
|
||||
|
||||
**Use for:** HTTP-based services supporting word/timestamp alignment
|
||||
|
||||
**Example:**
|
||||
|
||||
- [ElevenLabsHttpTTSService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/elevenlabs/tts.py)
|
||||
|
||||
#### TTSService
|
||||
|
||||
**Use for:** HTTP-based services without word/timestamp alignment
|
||||
**Use for:** HTTP-based services (word timestamps are supported in the base class)
|
||||
|
||||
**Example:**
|
||||
**Examples:**
|
||||
|
||||
- [GoogleHttpTTSService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/google/tts.py)
|
||||
- [OpenAITTSService](https://github.com/pipecat-ai/pipecat/blob/main/src/pipecat/services/openai/tts.py)
|
||||
|
||||
#### Key requirements:
|
||||
|
||||
- For websocket services, use asyncio WebSocket implementation (required for v13+ support)
|
||||
- For websocket services, use asyncio WebSocket implementation
|
||||
- Handle idle service timeouts with keepalives
|
||||
- TTSServices push both audio (`TTSRawAudioFrame`) and text (`TTSTextFrame`) frames
|
||||
- TTS services push both audio (`TTSAudioRawFrame`) and text (`TTSTextFrame`) frames
|
||||
|
||||
### Telephony Serializers
|
||||
|
||||
@@ -200,9 +217,9 @@ Vision services process images and provide analysis such as descriptions, object
|
||||
|
||||
#### Key requirements:
|
||||
|
||||
- Must implement `run_vision` method that takes an `LLMContext` and returns an `AsyncGenerator[Frame, None]`
|
||||
- The method processes the latest image in the context and yields frames with analysis results
|
||||
- Typically yields `TextFrame` objects containing descriptions or answers
|
||||
- Must implement `run_vision` method that takes a `UserImageRawFrame` and returns an `AsyncGenerator[Frame, None]`
|
||||
- The method processes the image frame and yields frames with analysis results
|
||||
- Must yield the frame sequence: `VisionFullResponseStartFrame`, `VisionTextFrame`, `VisionFullResponseEndFrame`
|
||||
|
||||
## Implementation Guidelines
|
||||
|
||||
@@ -381,7 +398,7 @@ Note that `self.sample_rate` is a `@property` set in the TTSService base class,
|
||||
|
||||
Use Pipecat's tracing decorators:
|
||||
|
||||
- **STT:** `@traced_stt` - decorate a function that handles `transcript`, `is_final`, `language` as args
|
||||
- **STT:** `@traced_stt` - decorate `_handle_transcription(self, transcript, is_final, language)` (the standard method name convention)
|
||||
- **LLM:** `@traced_llm` - decorate the `_process_context()` method
|
||||
- **TTS:** `@traced_tts` - decorate the `run_tts()` method
|
||||
|
||||
@@ -403,17 +420,15 @@ For REST-based communication, use aiohttp. Pipecat includes this as a required d
|
||||
- Wrap API calls in appropriate try/catch blocks
|
||||
- Handle rate limits and network failures gracefully
|
||||
- Provide meaningful error messages
|
||||
- When errors occur, raise exceptions AND push `ErrorFrame`s to notify the pipeline:
|
||||
- When errors occur, raise exceptions AND push errors to notify the pipeline:
|
||||
|
||||
```python
|
||||
from pipecat.frames.frames import ErrorFrame
|
||||
|
||||
try:
|
||||
# Your API call
|
||||
result = await self._make_api_call()
|
||||
except Exception as e:
|
||||
# Push error frame to pipeline
|
||||
await self.push_error(ErrorFrame(error=f"{self} error: {e}"))
|
||||
# Push error upstream to notify the pipeline
|
||||
await self.push_error(f"{self} error: {e}", exception=e)
|
||||
# Raise or handle as appropriate
|
||||
raise
|
||||
```
|
||||
|
||||
28
README.md
28
README.md
@@ -85,20 +85,20 @@ Catch new features, interviews, and how-tos on our [Pipecat TV](https://www.yout
|
||||
|
||||
## 🧩 Available services
|
||||
|
||||
| Category | Services |
|
||||
| ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/stt/elevenlabs), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Gradium](https://docs.pipecat.ai/server/services/stt/gradium), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [NVIDIA Riva](https://docs.pipecat.ai/server/services/stt/riva), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [SambaNova (Whisper)](https://docs.pipecat.ai/server/services/stt/sambanova), [Sarvam](https://docs.pipecat.ai/server/services/stt/sarvam), [Soniox](https://docs.pipecat.ai/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/server/services/stt/speechmatics), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) |
|
||||
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [Mistral](https://docs.pipecat.ai/server/services/llm/mistral), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/server/services/llm/sambanova) [Together AI](https://docs.pipecat.ai/server/services/llm/together) |
|
||||
| Text-to-Speech | [Async](https://docs.pipecat.ai/server/services/tts/asyncai), [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Camb AI](https://docs.pipecat.ai/server/services/tts/camb), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [Gradium](https://docs.pipecat.ai/server/services/tts/gradium), [Groq](https://docs.pipecat.ai/server/services/tts/groq), [Hume](https://docs.pipecat.ai/server/services/tts/hume), [Inworld](https://docs.pipecat.ai/server/services/tts/inworld), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [NVIDIA Riva](https://docs.pipecat.ai/server/services/tts/riva), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [Resemble](https://docs.pipecat.ai/server/services/tts/resemble), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [Speechmatics](https://docs.pipecat.ai/server/services/tts/speechmatics), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
|
||||
| Speech-to-Speech | [AWS Nova Sonic](https://docs.pipecat.ai/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [Grok Voice Agent](https://docs.pipecat.ai/server/services/s2s/grok), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai), [Ultravox](https://docs.pipecat.ai/server/services/s2s/ultravox), |
|
||||
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local |
|
||||
| Serializers | [Exotel](https://docs.pipecat.ai/server/utilities/serializers/exotel), [Plivo](https://docs.pipecat.ai/server/utilities/serializers/plivo), [Twilio](https://docs.pipecat.ai/server/utilities/serializers/twilio), [Telnyx](https://docs.pipecat.ai/server/utilities/serializers/telnyx), [Vonage](https://docs.pipecat.ai/server/utilities/serializers/vonage) |
|
||||
| Video | [HeyGen](https://docs.pipecat.ai/server/services/video/heygen), [LemonSlice](https://docs.pipecat.ai/server/services/video/lemonslice), [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) |
|
||||
| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) |
|
||||
| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/google-imagen), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) |
|
||||
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [ai-coustics](https://docs.pipecat.ai/server/utilities/audio/aic-filter) |
|
||||
| Analytics & Metrics | [OpenTelemetry](https://docs.pipecat.ai/server/utilities/opentelemetry), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) |
|
||||
| Community | [Browse community integrations →](https://docs.pipecat.ai/server/services/community-integrations) |
|
||||
| Category | Services |
|
||||
| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/stt/elevenlabs), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Gradium](https://docs.pipecat.ai/server/services/stt/gradium), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [NVIDIA Riva](https://docs.pipecat.ai/server/services/stt/riva), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [Sarvam](https://docs.pipecat.ai/server/services/stt/sarvam), [Soniox](https://docs.pipecat.ai/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/server/services/stt/speechmatics), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) |
|
||||
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [Mistral](https://docs.pipecat.ai/server/services/llm/mistral), [Novita](https://docs.pipecat.ai/server/services/llm/novita), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nvidia), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/server/services/llm/sambanova), [Sarvam](https://docs.pipecat.ai/server/services/llm/sarvam), [Together AI](https://docs.pipecat.ai/server/services/llm/together) |
|
||||
| Text-to-Speech | [Async](https://docs.pipecat.ai/server/services/tts/asyncai), [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Camb AI](https://docs.pipecat.ai/server/services/tts/camb), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [Gradium](https://docs.pipecat.ai/server/services/tts/gradium), [Groq](https://docs.pipecat.ai/server/services/tts/groq), [Hume](https://docs.pipecat.ai/server/services/tts/hume), [Inworld](https://docs.pipecat.ai/server/services/tts/inworld), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [NVIDIA Riva](https://docs.pipecat.ai/server/services/tts/riva), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [Resemble](https://docs.pipecat.ai/server/services/tts/resemble), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [Smallest](https://docs.pipecat.ai/server/services/tts/smallest), [Speechmatics](https://docs.pipecat.ai/server/services/tts/speechmatics), [xAI](https://docs.pipecat.ai/server/services/tts/xai), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
|
||||
| Speech-to-Speech | [AWS Nova Sonic](https://docs.pipecat.ai/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [Grok Voice Agent](https://docs.pipecat.ai/server/services/s2s/grok), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai), [Ultravox](https://docs.pipecat.ai/server/services/s2s/ultravox), |
|
||||
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local |
|
||||
| Serializers | [Exotel](https://docs.pipecat.ai/server/utilities/serializers/exotel), [Plivo](https://docs.pipecat.ai/server/utilities/serializers/plivo), [Twilio](https://docs.pipecat.ai/server/utilities/serializers/twilio), [Telnyx](https://docs.pipecat.ai/server/utilities/serializers/telnyx), [Vonage](https://docs.pipecat.ai/server/utilities/serializers/vonage) |
|
||||
| Video | [HeyGen](https://docs.pipecat.ai/server/services/video/heygen), [LemonSlice](https://docs.pipecat.ai/server/services/video/lemonslice), [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) |
|
||||
| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) |
|
||||
| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/google-imagen), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) |
|
||||
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [ai-coustics](https://docs.pipecat.ai/server/utilities/audio/aic-filter) |
|
||||
| Analytics & Metrics | [OpenTelemetry](https://docs.pipecat.ai/server/utilities/opentelemetry), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) |
|
||||
| Community | [Browse community integrations →](https://docs.pipecat.ai/server/services/community-integrations) |
|
||||
|
||||
📚 [View full services documentation →](https://docs.pipecat.ai/server/services/supported-services)
|
||||
|
||||
|
||||
1
changelog/3978.added.md
Normal file
1
changelog/3978.added.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added `SarvamLLMService` with support for `sarvam-30b`, `sarvam-30b-16k`, `sarvam-105b` and `sarvam-105b-32k`
|
||||
1
changelog/4013.added.md
Normal file
1
changelog/4013.added.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added `on_turn_context_created(context_id)` hook to `TTSService`. Override this to perform provider-specific setup (e.g. eagerly opening a server-side context) before text starts flowing. Called each time a new turn context ID is created.
|
||||
1
changelog/4013.changed.md
Normal file
1
changelog/4013.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added context prewarming path for `InworldTTSService` to improve first audio latency
|
||||
1
changelog/4022.changed.md
Normal file
1
changelog/4022.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added `KrispVivaVadAnalyzer` for Voice Activity Detection using the Krisp VIVA SDK (requires `krisp_audio`).
|
||||
1
changelog/4028.changed.md
Normal file
1
changelog/4028.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Modeified `InworldTTSService` to close context at end of turn instead of relying on idle timeout
|
||||
1
changelog/4031.added.md
Normal file
1
changelog/4031.added.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added `XAIHttpTTSService` for text-to-speech using xAI's HTTP TTS API.
|
||||
1
changelog/4078.changed.md
Normal file
1
changelog/4078.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added Gemini 3 support to the Gemini Live service.
|
||||
1
changelog/4084.changed.md
Normal file
1
changelog/4084.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- `TTSService`: the default `stop_frame_timeout_s` (idle time before an automatic `TTSStoppedFrame` is pushed when `push_stop_frames=True`) has changed from `2.0` to `3.0` seconds.
|
||||
1
changelog/4089.added.md
Normal file
1
changelog/4089.added.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added support for "developer" role messages in conversation context across all LLM adapters. For non-OpenAI services (Anthropic, Google, AWS Bedrock), "developer" messages are converted to "user" messages (use `system_instruction` to set the system instruction). For OpenAI services, "developer" messages pass through in conversation history. For the Responses API, they are kept as "developer" role (matching the existing "system" → "developer" conversion).
|
||||
1
changelog/4089.changed.md
Normal file
1
changelog/4089.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- ⚠️ `GeminiLLMAdapter` now only treats `messages[0]` as the initial system message, matching all other adapters. Previously it searched for the first "system" message anywhere in the conversation history. A "system" message appearing later in the list will now be converted to "user" instead of being extracted as the system instruction.
|
||||
1
changelog/4089.fixed.2.md
Normal file
1
changelog/4089.fixed.2.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed Gemini Live (`GoogleGeminiLiveLLMService`) not honoring `settings.system_instruction`. The system instruction was being read from a deprecated constructor parameter instead of the settings object, causing it to be silently ignored.
|
||||
1
changelog/4089.fixed.md
Normal file
1
changelog/4089.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed `AWSBedrockLLMAdapter` sending an empty message list to the API when the only message in context was a system message. The lone system message is now converted to "user" role instead of being extracted, matching the existing Anthropic adapter behavior.
|
||||
1
changelog/4092.added.md
Normal file
1
changelog/4092.added.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added `SmallestTTSService`, a WebSocket-based TTS service integration with Smallest AI's Waves API. Supports the Lightning v2 and v3.1 models with configurable voice, language, speed, consistency, similarity, and enhancement settings.
|
||||
1
changelog/4113.changed.md
Normal file
1
changelog/4113.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed `InworldTtsService` to fallback to full text when TTS timestamps are not received
|
||||
1
changelog/4115.added.md
Normal file
1
changelog/4115.added.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added warnings in turn stop strategies when `VADParams.stop_secs` differs from the recommended default (0.2s) or when `stop_secs >= STT p99 latency`, which collapses the STT wait timeout to 0s and may cause delayed turn detection. The warnings guide developers to re-run the [stt-benchmark](https://github.com/pipecat-ai/stt-benchmark) with their VAD settings.
|
||||
1
changelog/4117.added.md
Normal file
1
changelog/4117.added.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added `domain` parameter to `AssemblyAISTTSettings` for specialized recognition modes such as Medical Mode (`domain="medical-v1"`).
|
||||
1
changelog/4119.added.md
Normal file
1
changelog/4119.added.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added `NovitaLLMService` for using Novita AI's LLM models via their OpenAI-compatible API.
|
||||
1
changelog/4120.added.md
Normal file
1
changelog/4120.added.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added `cleanup()` method to `VADAnalyzer` and `VADController` so VAD analyzer resources are properly released when no longer needed. Custom `VADAnalyzer` subclasses can override `cleanup()` to free any held resources.
|
||||
1
changelog/4125.fixed.md
Normal file
1
changelog/4125.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed Gemini Live pipeline hanging indefinitely when an `EndFrame` was deferred while waiting for the bot to finish responding and `turn_complete` never arrived. As a possible root-cause fix, `turn_complete` messages are now handled even if they lack `usage_metadata`. As a fallback, the deferred `EndFrame` now has a 30-second safety timeout.
|
||||
1
changelog/4126.fixed.md
Normal file
1
changelog/4126.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed ElevenLabs WebSocket disconnections (1008 "Maximum simultaneous contexts exceeded") caused by rapid user interruptions. When interruptions arrived before any TTS text was generated, phantom contexts were created on the ElevenLabs server that were never closed, eventually exceeding the 5-context limit.
|
||||
1
changelog/4127.fixed.md
Normal file
1
changelog/4127.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed the final sentence being dropped from the conversation context when using RTVI text input with non-word-timestamp TTS services. The `LLMFullResponseEndFrame` was racing ahead of the last `TTSTextFrame`, causing the `LLMAssistantAggregator` to finalize the context before the final sentence arrived.
|
||||
1
changelog/4128.added.md
Normal file
1
changelog/4128.added.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added `on_end_of_turn` event handler to `AssemblyAISTTService`. This fires after the final transcript is pushed, providing a reliable hook for end-of-turn logic that doesn't race with `TranscriptionFrame`. Works in both Pipecat and AssemblyAI turn detection modes.
|
||||
1
changelog/4130.changed.md
Normal file
1
changelog/4130.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- ⚠️ Realtime services (Gemini Live, OpenAI Realtime, Grok Realtime, Nova Sonic) now prefer `system_instruction` from service settings over an initial system message in the LLM context, matching the behavior of non-realtime services. Previously, context-provided system instructions took precedence. A warning is now logged when both are set.
|
||||
1
changelog/4135.fixed.md
Normal file
1
changelog/4135.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed audio crackling and popping in recordings when both user and bot are speaking. `AudioBufferProcessor` no longer injects silence into a track's buffer while that track is actively producing audio, preventing mid-utterance interruptions in the recorded output.
|
||||
1
changelog/4136.changed.2.md
Normal file
1
changelog/4136.changed.2.md
Normal file
@@ -0,0 +1 @@
|
||||
- Bumped `nvidia-riva-client` minimum version to `>=2.25.1`.
|
||||
1
changelog/4136.changed.md
Normal file
1
changelog/4136.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Upgraded `protobuf` from 5.x to 6.x (`>=6.31.1,<7`).
|
||||
1
changelog/4137.changed.md
Normal file
1
changelog/4137.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Unrecognized language strings (e.g. Deepgram's `"multi"`) no longer produce a warning at startup. The log message has been downgraded to debug level since these are valid service-specific values that are passed through correctly.
|
||||
1
changelog/4142.changed.md
Normal file
1
changelog/4142.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- `GrokLLMService` and `GrokRealtimeLLMService` now live in the `pipecat.services.xai` module alongside `XAIHttpTTSService`, since all three use the same xAI API. Update imports from `pipecat.services.grok.*` to `pipecat.services.xai.*` (e.g. `from pipecat.services.xai.llm import GrokLLMService`).
|
||||
1
changelog/4142.deprecated.md
Normal file
1
changelog/4142.deprecated.md
Normal file
@@ -0,0 +1 @@
|
||||
- `pipecat.services.grok.llm`, `pipecat.services.grok.realtime.llm`, and `pipecat.services.grok.realtime.events` are deprecated. The old import paths still work but emit a `DeprecationWarning`; use `pipecat.services.xai.llm`, `pipecat.services.xai.realtime.llm`, and `pipecat.services.xai.realtime.events` instead.
|
||||
1
changelog/4143.added.md
Normal file
1
changelog/4143.added.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added `DeepgramFluxSageMakerSTTService` for running Deepgram Flux speech-to-text on AWS SageMaker endpoints. Use with `ExternalUserTurnStrategies` to take advantage of Flux's turn detection.
|
||||
1
changelog/4145.fixed.2.md
Normal file
1
changelog/4145.fixed.2.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed websocket TTS word timestamps so interrupted contexts cannot leak stale words or backward PTS values into later turns.
|
||||
1
changelog/4145.fixed.md
Normal file
1
changelog/4145.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed a race condition in `InterruptibleTTSService` where, if `run_tts` had been invoked but `BotStartedSpeakingFrame` had not yet been received, a user interruption could allow stale audio to leak through.
|
||||
1
changelog/4145.removed.md
Normal file
1
changelog/4145.removed.md
Normal file
@@ -0,0 +1 @@
|
||||
- ⚠️ `TTSService.add_word_timestamps()` no longer supports the `"Reset"` and `"TTSStoppedFrame"` sentinel strings. If you have a custom TTS service that called `await self.add_word_timestamps([("Reset", 0)])` or `await self.add_word_timestamps([("TTSStoppedFrame", 0), ("Reset", 0)], ctx_id)`, replace them with `await self.append_to_audio_context(ctx_id, TTSStoppedFrame(context_id=ctx_id))` and let `_handle_audio_context` manage the word-timestamp reset automatically.
|
||||
1
changelog/4146.fixed.md
Normal file
1
changelog/4146.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed Gemini Live local VAD mode (`GeminiVADParams(disabled=True)` with external VAD) not working. The bot now correctly detects user speech and signals turn boundaries to the Gemini API.
|
||||
1
changelog/4147.fixed.md
Normal file
1
changelog/4147.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed Gemini Live message handling to process all `server_content` fields independently. Gemini 3.x can bundle multiple fields (e.g. `model_turn` and `output_transcription`) on the same message, but the previous `elif` chain only processed the first match, silently dropping the rest.
|
||||
1
changelog/4149.fixed.md
Normal file
1
changelog/4149.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed `ServiceSwitcher` with `ServiceSwitcherStrategyFailover` incorrectly triggering failover when `ErrorFrame`s from other pipeline stages (e.g. TTS) propagated upstream through the switcher. Previously, any non-fatal error passing through would be misattributed to the active service and trigger an unwanted service switch. Now only errors originating from the switcher's own managed services trigger failover.
|
||||
1
changelog/4151.fixed.md
Normal file
1
changelog/4151.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed `LiveKitOutputTransport` not clearing the `rtc.AudioSource` internal buffer on interruption, causing the bot to continue speaking for several seconds after being interrupted.
|
||||
1
changelog/4152.fixed.md
Normal file
1
changelog/4152.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed a crash in OpenAI LLM processing when the provider returns `chunk.choices[0].delta.audio = None`, which caused `'NoneType' object has no attribute 'get'` errors during audio transcript handling.
|
||||
1
changelog/4153.fixed.md
Normal file
1
changelog/4153.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed error floods in `DeepgramSTTService` when the WebSocket connection drops. With Deepgram SDK 6.x, `send_media()` raises exceptions on a dead connection instead of silently failing, causing every queued audio frame to log an error. Now `send_media()` failures are caught gracefully — a single warning is logged and audio frames are skipped until the existing reconnection logic restores the connection.
|
||||
1
changelog/4154.removed.md
Normal file
1
changelog/4154.removed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Removed `SambaNovaSTTService`. SambaNova no longer offers speech-to-text audio models. Use another STT provider instead.
|
||||
1
changelog/4156.added.md
Normal file
1
changelog/4156.added.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added `Mem0MemoryService.get_memories()` convenience method for retrieving all stored memories outside the pipeline (e.g. to build a personalized greeting at connection time). This avoids the need to manually handle client type branching, filter construction, and async wrapping.
|
||||
1
changelog/4156.changed.md
Normal file
1
changelog/4156.changed.md
Normal file
@@ -0,0 +1 @@
|
||||
- ⚠️ Bumped `mem0ai` dependency from `~=0.1.94` to `>=1.0.8,<2`. Users of the `mem0` extra will need to update their mem0ai package.
|
||||
1
changelog/4156.fixed.2.md
Normal file
1
changelog/4156.fixed.2.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed `Mem0MemoryService` failing to store messages when the context contained system or developer role messages. The Mem0 API only accepts user and assistant roles, so other roles are now filtered out before storing.
|
||||
1
changelog/4156.fixed.md
Normal file
1
changelog/4156.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- `Mem0MemoryService` no longer blocks the event loop during memory storage and retrieval. All Mem0 API calls now run in a background thread, and message storage is fire-and-forget so it doesn't delay downstream processing.
|
||||
1
changelog/4161.fixed.md
Normal file
1
changelog/4161.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Added missing `on_dtmf_event` callback to `LemonSliceTransportClient.setup()` `DailyCallbacks` construction, fixing a `ValidationError` at pipeline setup time.
|
||||
1
changelog/4167.fixed.2.md
Normal file
1
changelog/4167.fixed.2.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed an issue in `InworldTTSService` where, in cases of fast interruption, we would continue receiving audio from the previous context.
|
||||
1
changelog/4167.fixed.md
Normal file
1
changelog/4167.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed a word timestamp interleaving issue in `InworldTTSService` when processing multiple sentences.
|
||||
1
changelog/4172.fixed.md
Normal file
1
changelog/4172.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed duplicate `TTSStoppedFrame` being pushed in TTS services using `push_stop_frames=True`. When the stop-frame timeout fired, a second `TTSStoppedFrame` could be pushed after the normal one at context completion.
|
||||
1
changelog/4172.performance.md
Normal file
1
changelog/4172.performance.md
Normal file
@@ -0,0 +1 @@
|
||||
- `RimeTTSService` now handles Rime's `done` WebSocket message to complete audio contexts immediately, eliminating the 3-second idle timeout that previously added latency at the end of each utterance.
|
||||
1
changelog/4174.fixed.md
Normal file
1
changelog/4174.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- ⚠️ Fixed `DeepgramSTTService` compatibility with deepgram-sdk 6.1.0. The SDK now requires explicit message objects for `send_keep_alive()`, `send_close_stream()`, and `send_finalize()`. The minimum deepgram-sdk version is now 6.1.0.
|
||||
1
changelog/4176.fixed.md
Normal file
1
changelog/4176.fixed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Fixed RTVI events not being delivered to clients when using WebSocket transports. `ProtobufFrameSerializer` now sets `ignore_rtvi_messages=False` by default.
|
||||
12
env.example
12
env.example
@@ -80,9 +80,6 @@ GOOGLE_TEST_CREDENTIALS=...
|
||||
# Gradium
|
||||
GRAPDIUM_API_KEY=...
|
||||
|
||||
# Grok
|
||||
GROK_API_KEY=...
|
||||
|
||||
# Groq
|
||||
GROQ_API_KEY=...
|
||||
|
||||
@@ -127,6 +124,9 @@ MISTRAL_API_KEY=...
|
||||
# Neuphonic
|
||||
NEUPHONIC_API_KEY=...
|
||||
|
||||
# Novita
|
||||
NOVITA_API_KEY=...
|
||||
|
||||
# NVIDIA
|
||||
NVIDIA_API_KEY=...
|
||||
|
||||
@@ -176,6 +176,9 @@ SENTRY_DSN=...
|
||||
SIMLI_API_KEY=...
|
||||
SIMLI_FACE_ID=...
|
||||
|
||||
# Smallest
|
||||
SMALLEST_API_KEY=...
|
||||
|
||||
# Smart turn
|
||||
LOCAL_SMART_TURN_MODEL_PATH=...
|
||||
FAL_SMART_TURN_API_KEY=...
|
||||
@@ -209,3 +212,6 @@ WHATSAPP_TOKEN=...
|
||||
WHATSAPP_WEBHOOK_VERIFICATION_TOKEN=...
|
||||
WHATSAPP_PHONE_NUMBER_ID=...
|
||||
WHATSAPP_APP_SECRET=...
|
||||
|
||||
# xAI / Grok
|
||||
XAI_API_KEY=...
|
||||
@@ -60,7 +60,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
context = LLMContext()
|
||||
context.add_message({"role": "user", "content": "Say hello to the world."})
|
||||
context.add_message({"role": "developer", "content": "Say hello to the world."})
|
||||
await task.queue_frames([LLMContextFrame(context), EndFrame()])
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
@@ -109,7 +109,9 @@ async def run_example(webrtc_connection: SmallWebRTCConnection):
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message({"role": "user", "content": "Please introduce yourself to the user."})
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
|
||||
@@ -92,7 +92,7 @@ async def main():
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
context.add_message(
|
||||
{"role": "user", "content": "Please introduce yourself to the user."}
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
|
||||
@@ -129,7 +129,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message({"role": "user", "content": "Please introduce yourself to the user."})
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
|
||||
@@ -103,7 +103,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message(
|
||||
{"role": "user", "content": "Please introduce yourself to the user."}
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
|
||||
@@ -98,7 +98,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message({"role": "user", "content": "Please introduce yourself to the user."})
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
|
||||
@@ -148,7 +148,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message({"role": "user", "content": "Say a short hello to the user."})
|
||||
context.add_message({"role": "developer", "content": "Say a short hello to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
|
||||
@@ -128,7 +128,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message({"role": "user", "content": "Say a short hello to the user."})
|
||||
context.add_message({"role": "developer", "content": "Say a short hello to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
|
||||
@@ -0,0 +1,151 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.aws.llm import AWSBedrockLLMService, AWSBedrockLLMSettings
|
||||
from pipecat.services.deepgram.flux.sagemaker.stt import DeepgramFluxSageMakerSTTService
|
||||
from pipecat.services.deepgram.sagemaker.tts import DeepgramSageMakerTTSService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
from pipecat.turns.user_turn_strategies import ExternalUserTurnStrategies
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
# Initialize Deepgram Flux SageMaker STT Service
|
||||
# This requires:
|
||||
# - AWS credentials configured (via environment variables or AWS CLI)
|
||||
# - A deployed SageMaker endpoint with Deepgram Flux model
|
||||
stt = DeepgramFluxSageMakerSTTService(
|
||||
endpoint_name=os.getenv("SAGEMAKER_STT_ENDPOINT_NAME"),
|
||||
region=os.getenv("AWS_REGION"),
|
||||
settings=DeepgramFluxSageMakerSTTService.Settings(
|
||||
min_confidence=0.3,
|
||||
),
|
||||
)
|
||||
|
||||
# Initialize Deepgram SageMaker TTS Service
|
||||
# This requires:
|
||||
# - AWS credentials configured (via environment variables or AWS CLI)
|
||||
# - A deployed SageMaker endpoint with Deepgram TTS model
|
||||
tts = DeepgramSageMakerTTSService(
|
||||
endpoint_name=os.getenv("SAGEMAKER_TTS_ENDPOINT_NAME"),
|
||||
region=os.getenv("AWS_REGION"),
|
||||
settings=DeepgramSageMakerTTSService.Settings(
|
||||
voice="aura-2-andromeda-en",
|
||||
),
|
||||
)
|
||||
|
||||
llm = AWSBedrockLLMService(
|
||||
aws_region=os.getenv("AWS_REGION"),
|
||||
settings=AWSBedrockLLMSettings(
|
||||
model="us.amazon.nova-pro-v1:0",
|
||||
temperature=0.8,
|
||||
system_instruction="You are a helpful assistant in a voice conversation. Your responses will be spoken aloud, so avoid emojis, bullet points, or other formatting that can't be spoken. Respond to what the user said in a creative, helpful, and brief way.",
|
||||
),
|
||||
)
|
||||
|
||||
context = LLMContext()
|
||||
# Use ExternalUserTurnStrategies since Flux handles turn detection natively
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(
|
||||
user_turn_strategies=ExternalUserTurnStrategies(),
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt, # STT
|
||||
user_aggregator, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
assistant_aggregator, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message({"role": "user", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
@stt.event_handler("on_update")
|
||||
async def on_deepgram_flux_update(stt, transcript):
|
||||
logger.debug(f"On deepgram flux update: {transcript}")
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
@@ -109,7 +109,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message({"role": "user", "content": "Please introduce yourself to the user."})
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
|
||||
@@ -104,7 +104,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message(
|
||||
{"role": "user", "content": "Please introduce yourself to the user."}
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
|
||||
@@ -114,7 +114,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message({"role": "user", "content": "Please introduce yourself to the user."})
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
|
||||
@@ -106,7 +106,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message({"role": "user", "content": "Please introduce yourself to the user."})
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
|
||||
@@ -100,7 +100,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message({"role": "user", "content": "Please introduce yourself to the user."})
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
|
||||
@@ -108,7 +108,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message(
|
||||
{"role": "user", "content": "Please introduce yourself to the user."}
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
|
||||
@@ -100,7 +100,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message({"role": "user", "content": "Please introduce yourself to the user."})
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
|
||||
128
examples/foundational/07e-interruptible-xai.py
Normal file
128
examples/foundational/07e-interruptible-xai.py
Normal file
@@ -0,0 +1,128 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import os
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import LLMRunFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
LLMUserAggregatorParams,
|
||||
)
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.xai.llm import GrokLLMService
|
||||
from pipecat.services.xai.tts import XAIHttpTTSService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = XAIHttpTTSService(
|
||||
api_key=os.getenv("XAI_API_KEY"),
|
||||
aiohttp_session=session,
|
||||
settings=XAIHttpTTSService.Settings(
|
||||
voice="eve",
|
||||
),
|
||||
)
|
||||
|
||||
llm = GrokLLMService(
|
||||
api_key=os.getenv("XAI_API_KEY"),
|
||||
settings=GrokLLMService.Settings(
|
||||
system_instruction="You are a helpful assistant in a voice conversation. Your responses will be spoken aloud, so avoid emojis, bullet points, or other formatting that can't be spoken. Respond to what the user said in a creative, helpful, and brief way.",
|
||||
),
|
||||
)
|
||||
|
||||
context = LLMContext()
|
||||
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
|
||||
context,
|
||||
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt,
|
||||
user_aggregator, # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
assistant_aggregator, # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_connected")
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
@@ -102,7 +102,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message({"role": "user", "content": "Please introduce yourself to the user."})
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
|
||||
@@ -102,7 +102,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message({"role": "user", "content": "Please introduce yourself to the user."})
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
|
||||
@@ -106,7 +106,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message({"role": "user", "content": "Please introduce yourself to the user."})
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
|
||||
@@ -108,7 +108,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message({"role": "user", "content": "Please introduce yourself to the user."})
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
|
||||
@@ -103,7 +103,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message({"role": "user", "content": "Please introduce yourself to the user."})
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
|
||||
@@ -104,7 +104,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message(
|
||||
{"role": "user", "content": "Please introduce yourself to the user."}
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
|
||||
@@ -114,7 +114,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message({"role": "user", "content": "Please introduce yourself to the user."})
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
|
||||
@@ -109,7 +109,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message({"role": "user", "content": "Please introduce yourself to the user."})
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
|
||||
@@ -99,7 +99,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message({"role": "user", "content": "Please introduce yourself to the user."})
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
|
||||
@@ -95,7 +95,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message({"role": "user", "content": "Please introduce yourself to the user."})
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
|
||||
@@ -151,7 +151,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
LLMMessagesAppendFrame(
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"role": "developer",
|
||||
"content": f"Greet the user and introduce yourself. Don't use emojis.",
|
||||
}
|
||||
],
|
||||
|
||||
@@ -66,7 +66,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
settings=AWSBedrockLLMService.Settings(
|
||||
model="us.anthropic.claude-sonnet-4-6",
|
||||
temperature=0.8,
|
||||
system_instruction="You are a helpful assistant in a voice conversation. Your responses will be spoken aloud, so avoid emojis, bullet points, or other formatting that can't be spoken. Respond to what the user said in a creative, helpful, and brief way.",
|
||||
# system_instruction="You are a helpful assistant in a voice conversation. Your responses will be spoken aloud, so avoid emojis, bullet points, or other formatting that can't be spoken. Respond to what the user said in a creative, helpful, and brief way.",
|
||||
),
|
||||
)
|
||||
|
||||
@@ -101,7 +101,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message({"role": "user", "content": "Please introduce yourself to the user."})
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
|
||||
@@ -124,7 +124,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation with a styled introduction
|
||||
context.add_message({"role": "user", "content": "Please introduce yourself to the user."})
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
|
||||
@@ -128,7 +128,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
# Kick off the conversation
|
||||
context.add_message(
|
||||
{
|
||||
"role": "user",
|
||||
"role": "developer",
|
||||
"content": "You are an AI assistant. You can help with a variety of tasks. Introduce yourself and ask the user what they would like to know.",
|
||||
}
|
||||
)
|
||||
|
||||
@@ -112,7 +112,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message({"role": "user", "content": "Please introduce yourself to the user."})
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
|
||||
@@ -112,7 +112,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message({"role": "user", "content": "Please introduce yourself to the user."})
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
|
||||
@@ -153,7 +153,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message({"role": "user", "content": "Please introduce yourself to the user."})
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
|
||||
@@ -102,7 +102,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message({"role": "user", "content": "Please introduce yourself to the user."})
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
|
||||
@@ -104,7 +104,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
user_turn_strategies=UserTurnStrategies(
|
||||
stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=KrispVivaTurn())]
|
||||
),
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_analyzer=SileroVADAnalyzer(), # or KrispVivaVadAnalyzer
|
||||
),
|
||||
)
|
||||
|
||||
@@ -134,7 +134,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message({"role": "user", "content": "Please introduce yourself to the user."})
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
|
||||
@@ -103,7 +103,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message({"role": "user", "content": "Please introduce yourself to the user."})
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
|
||||
@@ -107,7 +107,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message(
|
||||
{"role": "user", "content": "Please introduce yourself to the user."}
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
|
||||
@@ -99,7 +99,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message({"role": "user", "content": "Please introduce yourself to the user."})
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
|
||||
@@ -95,7 +95,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message({"role": "user", "content": "Please introduce yourself to the user."})
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
|
||||
@@ -269,7 +269,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message({"role": "user", "content": "Please introduce yourself to the user."})
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
|
||||
@@ -100,7 +100,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message({"role": "user", "content": "Please introduce yourself to the user."})
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
|
||||
@@ -105,7 +105,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message(
|
||||
{"role": "user", "content": "Please introduce yourself to the user."}
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
|
||||
@@ -99,7 +99,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
async def on_client_connected(transport, client):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message({"role": "user", "content": "Please introduce yourself to the user."})
|
||||
context.add_message(
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
|
||||
@@ -106,7 +106,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Client connected")
|
||||
# Kick off the conversation.
|
||||
context.add_message(
|
||||
{"role": "user", "content": "Please introduce yourself to the user."}
|
||||
{"role": "developer", "content": "Please introduce yourself to the user."}
|
||||
)
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
|
||||
@@ -82,7 +82,7 @@ async def main():
|
||||
),
|
||||
)
|
||||
|
||||
context.add_message({"role": "user", "content": "Please introduce yourself to the user."})
|
||||
context.add_message({"role": "developer", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([LLMRunFrame()])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user