Merge pull request #3838 from pipecat-ai/mb/remove-playht

Remove PlayHT TTS services
This commit is contained in:
Mark Backman
2026-02-25 14:34:15 -05:00
committed by GitHub
11 changed files with 16 additions and 1115 deletions

View File

@@ -81,19 +81,19 @@ Catch new features, interviews, and how-tos on our [Pipecat TV](https://www.yout
## 🧩 Available services
| Category | Services |
| ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/stt/elevenlabs), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Gradium](https://docs.pipecat.ai/server/services/stt/gradium), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [Hathora](https://docs.pipecat.ai/server/services/stt/hathora), [NVIDIA Riva](https://docs.pipecat.ai/server/services/stt/riva), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [SambaNova (Whisper)](https://docs.pipecat.ai/server/services/stt/sambanova), [Sarvam](https://docs.pipecat.ai/server/services/stt/sarvam), [Soniox](https://docs.pipecat.ai/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/server/services/stt/speechmatics), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) |
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [Mistral](https://docs.pipecat.ai/server/services/llm/mistral), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/server/services/llm/sambanova) [Together AI](https://docs.pipecat.ai/server/services/llm/together) |
| Text-to-Speech | [Async](https://docs.pipecat.ai/server/services/tts/asyncai), [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Camb AI](https://docs.pipecat.ai/server/services/tts/camb), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [Gradium](https://docs.pipecat.ai/server/services/tts/gradium), [Groq](https://docs.pipecat.ai/server/services/tts/groq), [Hathora](https://docs.pipecat.ai/server/services/tts/hathora), [Hume](https://docs.pipecat.ai/server/services/tts/hume), [Inworld](https://docs.pipecat.ai/server/services/tts/inworld), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [NVIDIA Riva](https://docs.pipecat.ai/server/services/tts/riva), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Resemble](https://docs.pipecat.ai/server/services/tts/resemble), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [Speechmatics](https://docs.pipecat.ai/server/services/tts/speechmatics), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
| Speech-to-Speech | [AWS Nova Sonic](https://docs.pipecat.ai/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [Grok Voice Agent](https://docs.pipecat.ai/server/services/s2s/grok), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai), [Ultravox](https://docs.pipecat.ai/server/services/s2s/ultravox), |
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local |
| Serializers | [Exotel](https://docs.pipecat.ai/server/utilities/serializers/exotel), [Plivo](https://docs.pipecat.ai/server/utilities/serializers/plivo), [Twilio](https://docs.pipecat.ai/server/utilities/serializers/twilio), [Telnyx](https://docs.pipecat.ai/server/utilities/serializers/telnyx), [Vonage](https://docs.pipecat.ai/server/utilities/serializers/vonage) |
| Video | [HeyGen](https://docs.pipecat.ai/server/services/video/heygen), [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) |
| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) |
| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/google-imagen), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) |
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [ai-coustics](https://docs.pipecat.ai/server/utilities/audio/aic-filter) |
| Analytics & Metrics | [OpenTelemetry](https://docs.pipecat.ai/server/utilities/opentelemetry), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) |
| Category | Services |
| ------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/stt/elevenlabs), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Gradium](https://docs.pipecat.ai/server/services/stt/gradium), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [Hathora](https://docs.pipecat.ai/server/services/stt/hathora), [NVIDIA Riva](https://docs.pipecat.ai/server/services/stt/riva), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [SambaNova (Whisper)](https://docs.pipecat.ai/server/services/stt/sambanova), [Sarvam](https://docs.pipecat.ai/server/services/stt/sarvam), [Soniox](https://docs.pipecat.ai/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/server/services/stt/speechmatics), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) |
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [Mistral](https://docs.pipecat.ai/server/services/llm/mistral), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/server/services/llm/sambanova) [Together AI](https://docs.pipecat.ai/server/services/llm/together) |
| Text-to-Speech | [Async](https://docs.pipecat.ai/server/services/tts/asyncai), [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Camb AI](https://docs.pipecat.ai/server/services/tts/camb), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [Gradium](https://docs.pipecat.ai/server/services/tts/gradium), [Groq](https://docs.pipecat.ai/server/services/tts/groq), [Hathora](https://docs.pipecat.ai/server/services/tts/hathora), [Hume](https://docs.pipecat.ai/server/services/tts/hume), [Inworld](https://docs.pipecat.ai/server/services/tts/inworld), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [NVIDIA Riva](https://docs.pipecat.ai/server/services/tts/riva), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [Resemble](https://docs.pipecat.ai/server/services/tts/resemble), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [Speechmatics](https://docs.pipecat.ai/server/services/tts/speechmatics), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
| Speech-to-Speech | [AWS Nova Sonic](https://docs.pipecat.ai/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [Grok Voice Agent](https://docs.pipecat.ai/server/services/s2s/grok), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai), [Ultravox](https://docs.pipecat.ai/server/services/s2s/ultravox), |
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local |
| Serializers | [Exotel](https://docs.pipecat.ai/server/utilities/serializers/exotel), [Plivo](https://docs.pipecat.ai/server/utilities/serializers/plivo), [Twilio](https://docs.pipecat.ai/server/utilities/serializers/twilio), [Telnyx](https://docs.pipecat.ai/server/utilities/serializers/telnyx), [Vonage](https://docs.pipecat.ai/server/utilities/serializers/vonage) |
| Video | [HeyGen](https://docs.pipecat.ai/server/services/video/heygen), [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) |
| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) |
| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/google-imagen), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) |
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [ai-coustics](https://docs.pipecat.ai/server/utilities/audio/aic-filter) |
| Analytics & Metrics | [OpenTelemetry](https://docs.pipecat.ai/server/utilities/opentelemetry), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) |
📚 [View full services documentation →](https://docs.pipecat.ai/server/services/supported-services)

View File

@@ -0,0 +1 @@
- ⚠️ Removed `PlayHTTTSService` and `PlayHTHttpTTSService`. PlayHT has been shut down and is no longer available.

View File

@@ -42,7 +42,7 @@ This script:
- Creates a fresh virtual environment
- Installs all dependencies as specified in requirements files
- Handles conflicting dependencies (like grpcio versions for Riva and PlayHT)
- Handles conflicting dependencies (like grpcio versions for Riva)
- Builds the documentation in an isolated environment
- Provides detailed logging of the build process
@@ -74,7 +74,6 @@ start _build/html/index.html
├── index.rst # Main documentation entry point
├── requirements-base.txt # Base documentation dependencies
├── requirements-riva.txt # Riva-specific dependencies
├── requirements-playht.txt # PlayHT-specific dependencies
├── build-docs.sh # Local build script
└── rtd-test.py # ReadTheDocs test build script
```

View File

@@ -147,10 +147,6 @@ KOALA_ACCESS_KEY=...
# Piper
PIPER_BASE_URL=...
# PlayHT
PLAYHT_USER_ID=...
PLAYHT_API_KEY=...
# Plivo
PLIVO_AUTH_ID=...
PLIVO_AUTH_TOKEN=...

View File

@@ -1,125 +0,0 @@
#
# Copyright (c) 2024-2026, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import os
from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMRunFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.llm_context import LLMContext
from pipecat.processors.aggregators.llm_response_universal import (
LLMContextAggregatorPair,
LLMUserAggregatorParams,
)
from pipecat.runner.types import RunnerArguments
from pipecat.runner.utils import create_transport
from pipecat.services.deepgram.stt import DeepgramSTTService
from pipecat.services.openai.llm import OpenAILLMService
from pipecat.services.playht.tts import PlayHTHttpTTSService
from pipecat.transports.base_transport import BaseTransport, TransportParams
from pipecat.transports.daily.transport import DailyParams
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
load_dotenv(override=True)
# We use lambdas to defer transport parameter creation until the transport
# type is selected at runtime.
transport_params = {
"daily": lambda: DailyParams(
audio_in_enabled=True,
audio_out_enabled=True,
),
"twilio": lambda: FastAPIWebsocketParams(
audio_in_enabled=True,
audio_out_enabled=True,
),
"webrtc": lambda: TransportParams(
audio_in_enabled=True,
audio_out_enabled=True,
),
}
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
tts = PlayHTHttpTTSService(
user_id=os.getenv("PLAYHT_USER_ID"),
api_key=os.getenv("PLAYHT_API_KEY"),
voice_url="s3://voice-cloning-zero-shot/d9ff78ba-d016-47f6-b0ef-dd630f59414e/female-cs/manifest.json",
)
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
messages = [
{
"role": "system",
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
},
]
context = LLMContext(messages)
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
context,
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
)
pipeline = Pipeline(
[
transport.input(), # Transport user input
stt,
user_aggregator, # User responses
llm, # LLM
tts, # TTS
transport.output(), # Transport bot output
assistant_aggregator, # Assistant spoken responses
]
)
task = PipelineTask(
pipeline,
params=PipelineParams(
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
async def on_client_connected(transport, client):
logger.info(f"Client connected")
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport, runner_args)
if __name__ == "__main__":
from pipecat.runner.run import main
main()

View File

@@ -1,127 +0,0 @@
#
# Copyright (c) 2024-2026, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import os
from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMRunFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.llm_context import LLMContext
from pipecat.processors.aggregators.llm_response_universal import (
LLMContextAggregatorPair,
LLMUserAggregatorParams,
)
from pipecat.runner.types import RunnerArguments
from pipecat.runner.utils import create_transport
from pipecat.services.deepgram.stt import DeepgramSTTService
from pipecat.services.openai.llm import OpenAILLMService
from pipecat.services.playht.tts import PlayHTTTSService
from pipecat.transcriptions.language import Language
from pipecat.transports.base_transport import BaseTransport, TransportParams
from pipecat.transports.daily.transport import DailyParams
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
load_dotenv(override=True)
# We use lambdas to defer transport parameter creation until the transport
# type is selected at runtime.
transport_params = {
"daily": lambda: DailyParams(
audio_in_enabled=True,
audio_out_enabled=True,
),
"twilio": lambda: FastAPIWebsocketParams(
audio_in_enabled=True,
audio_out_enabled=True,
),
"webrtc": lambda: TransportParams(
audio_in_enabled=True,
audio_out_enabled=True,
),
}
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
tts = PlayHTTTSService(
user_id=os.getenv("PLAYHT_USER_ID"),
api_key=os.getenv("PLAYHT_API_KEY"),
voice_url="s3://voice-cloning-zero-shot/e46b4027-b38d-4d24-b292-38fbca2be0ef/original/manifest.json",
params=PlayHTTTSService.InputParams(language=Language.EN),
)
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
messages = [
{
"role": "system",
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
},
]
context = LLMContext(messages)
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
context,
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
)
pipeline = Pipeline(
[
transport.input(), # Transport user input
stt,
user_aggregator, # User responses
llm, # LLM
tts, # TTS
transport.output(), # Transport bot output
assistant_aggregator, # Assistant spoken responses
]
)
task = PipelineTask(
pipeline,
params=PipelineParams(
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
async def on_client_connected(transport, client):
logger.info(f"Client connected")
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([LLMRunFrame()])
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport, runner_args)
if __name__ == "__main__":
from pipecat.runner.run import main
main()

View File

@@ -1,126 +0,0 @@
#
# Copyright (c) 2024-2026, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import asyncio
import os
from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.llm_context import LLMContext
from pipecat.processors.aggregators.llm_response_universal import (
LLMContextAggregatorPair,
LLMUserAggregatorParams,
)
from pipecat.runner.types import RunnerArguments
from pipecat.runner.utils import create_transport
from pipecat.services.deepgram.stt import DeepgramSTTService
from pipecat.services.openai.llm import OpenAILLMService
from pipecat.services.playht.tts import PlayHTTTSService, PlayHTTTSSettings
from pipecat.transports.base_transport import BaseTransport, TransportParams
from pipecat.transports.daily.transport import DailyParams
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
load_dotenv(override=True)
transport_params = {
"daily": lambda: DailyParams(
audio_in_enabled=True,
audio_out_enabled=True,
),
"twilio": lambda: FastAPIWebsocketParams(
audio_in_enabled=True,
audio_out_enabled=True,
),
"webrtc": lambda: TransportParams(
audio_in_enabled=True,
audio_out_enabled=True,
),
}
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
tts = PlayHTTTSService(
api_key=os.getenv("PLAYHT_API_KEY"),
user_id=os.getenv("PLAYHT_USER_ID"),
voice_url=os.getenv("PLAYHT_VOICE_URL", ""),
)
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
messages = [
{
"role": "system",
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.",
},
]
context = LLMContext(messages)
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
context,
user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()),
)
pipeline = Pipeline(
[
transport.input(),
stt,
user_aggregator,
llm,
tts,
transport.output(),
assistant_aggregator,
]
)
task = PipelineTask(
pipeline,
params=PipelineParams(
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_connected")
async def on_client_connected(transport, client):
logger.info(f"Client connected")
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
await task.queue_frames([LLMRunFrame()])
await asyncio.sleep(10)
logger.info("Updating PlayHT TTS settings: speed=1.3")
await task.queue_frame(TTSUpdateSettingsFrame(delta=PlayHTTTSSettings(speed=1.3)))
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport, runner_args)
if __name__ == "__main__":
from pipecat.runner.run import main
main()

View File

@@ -100,7 +100,6 @@ openpipe = [ "openpipe>=4.50.0,<6" ]
openrouter = []
perplexity = []
piper = [ "piper-tts>=1.3.0,<2", "requests>=2.32.5,<3" ]
playht = [ "pipecat-ai[websockets-base]" ]
qwen = []
remote-smart-turn = []
resembleai = [ "pipecat-ai[websockets-base]" ]

View File

@@ -1,13 +0,0 @@
#
# Copyright (c) 2024-2026, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import sys
from pipecat.services import DeprecatedModuleProxy
from .tts import *
sys.modules[__name__] = DeprecatedModuleProxy(globals(), "playht", "playht.tts")

View File

@@ -1,699 +0,0 @@
#
# Copyright (c) 2024-2026, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
"""PlayHT text-to-speech service implementations.
This module provides integration with PlayHT's text-to-speech API
supporting both WebSocket streaming and HTTP-based synthesis.
"""
import io
import json
import struct
import uuid
import warnings
from dataclasses import dataclass, field
from typing import Any, AsyncGenerator, Optional
import aiohttp
from loguru import logger
from pydantic import BaseModel
from pipecat.frames.frames import (
CancelFrame,
EndFrame,
ErrorFrame,
Frame,
InterruptionFrame,
StartFrame,
TTSAudioRawFrame,
TTSStartedFrame,
TTSStoppedFrame,
)
from pipecat.processors.frame_processor import FrameDirection
from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven
from pipecat.services.tts_service import InterruptibleTTSService, TTSService
from pipecat.transcriptions.language import Language, resolve_language
from pipecat.utils.tracing.service_decorators import traced_tts
try:
from websockets.asyncio.client import connect as websocket_connect
from websockets.protocol import State
except ModuleNotFoundError as e:
logger.error(f"Exception: {e}")
logger.error("In order to use PlayHTTTSService, you need to `pip install pipecat-ai[playht]`.")
raise Exception(f"Missing module: {e}")
def language_to_playht_language(language: Language) -> Optional[str]:
"""Convert a Language enum to PlayHT language code.
Args:
language: The Language enum value to convert.
Returns:
The corresponding PlayHT language code, or None if not supported.
"""
LANGUAGE_MAP = {
Language.AF: "afrikans",
Language.AM: "amharic",
Language.AR: "arabic",
Language.BN: "bengali",
Language.BG: "bulgarian",
Language.CA: "catalan",
Language.CS: "czech",
Language.DA: "danish",
Language.DE: "german",
Language.EL: "greek",
Language.EN: "english",
Language.ES: "spanish",
Language.FR: "french",
Language.GL: "galician",
Language.HE: "hebrew",
Language.HI: "hindi",
Language.HR: "croatian",
Language.HU: "hungarian",
Language.ID: "indonesian",
Language.IT: "italian",
Language.JA: "japanese",
Language.KO: "korean",
Language.MS: "malay",
Language.NL: "dutch",
Language.PL: "polish",
Language.PT: "portuguese",
Language.RU: "russian",
Language.SQ: "albanian",
Language.SR: "serbian",
Language.SV: "swedish",
Language.TH: "thai",
Language.TL: "tagalog",
Language.TR: "turkish",
Language.UK: "ukrainian",
Language.UR: "urdu",
Language.XH: "xhosa",
Language.ZH: "mandarin",
}
return resolve_language(language, LANGUAGE_MAP, use_base_code=False)
@dataclass
class PlayHTTTSSettings(TTSSettings):
"""Settings for PlayHT TTS services.
Parameters:
output_format: Audio output format.
voice_engine: Voice engine to use.
speed: Speech speed multiplier. Defaults to 1.0.
seed: Random seed for voice consistency.
playht_sample_rate: Audio sample rate sent to the API.
"""
output_format: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
voice_engine: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
speed: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
seed: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
playht_sample_rate: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
class PlayHTTTSService(InterruptibleTTSService):
"""PlayHT WebSocket-based text-to-speech service.
.. deprecated:: 0.0.88
This class is deprecated and will be removed in a future version.
PlayHT is shutting down their API on December 31st, 2025.
Provides real-time text-to-speech synthesis using PlayHT's WebSocket API.
Supports streaming audio generation with configurable voice engines and
language settings.
"""
_settings: PlayHTTTSSettings
class InputParams(BaseModel):
"""Input parameters for PlayHT TTS configuration.
Parameters:
language: Language for synthesis. Defaults to English.
speed: Speech speed multiplier. Defaults to 1.0.
seed: Random seed for voice consistency.
"""
language: Optional[Language] = Language.EN
speed: Optional[float] = 1.0
seed: Optional[int] = None
def __init__(
self,
*,
api_key: str,
user_id: str,
voice_url: str,
voice_engine: str = "Play3.0-mini",
sample_rate: Optional[int] = None,
output_format: str = "wav",
params: Optional[InputParams] = None,
**kwargs,
):
"""Initialize the PlayHT WebSocket TTS service.
Args:
api_key: PlayHT API key for authentication.
user_id: PlayHT user ID for authentication.
voice_url: URL of the voice to use for synthesis.
voice_engine: Voice engine to use. Defaults to "Play3.0-mini".
sample_rate: Audio sample rate. If None, uses default.
output_format: Audio output format. Defaults to "wav".
params: Additional input parameters for voice customization.
**kwargs: Additional arguments passed to parent InterruptibleTTSService.
"""
super().__init__(
pause_frame_processing=True,
sample_rate=sample_rate,
**kwargs,
)
with warnings.catch_warnings():
warnings.simplefilter("always")
warnings.warn(
"PlayHT is shutting down their API on December 31st, 2025. "
"'PlayHTTTSService' is deprecated and will be removed in a future version.",
DeprecationWarning,
stacklevel=2,
)
params = params or PlayHTTTSService.InputParams()
self._api_key = api_key
self._user_id = user_id
self._websocket_url = None
self._receive_task = None
self._context_id = None
self._settings = PlayHTTTSSettings(
model=voice_engine,
voice=voice_url,
language=self.language_to_service_language(params.language)
if params.language
else "english",
output_format=output_format,
voice_engine=voice_engine,
speed=params.speed,
seed=params.seed,
playht_sample_rate=0,
)
self._sync_model_name_to_metrics()
def can_generate_metrics(self) -> bool:
"""Check if this service can generate processing metrics.
Returns:
True, as PlayHT service supports metrics generation.
"""
return True
async def _update_settings(self, delta: TTSSettings) -> dict[str, Any]:
"""Apply a settings delta.
Settings are stored but not applied to the active connection.
"""
changed = await super()._update_settings(delta)
if not changed:
return changed
# TODO: someday we could reconnect here to apply updated settings.
# Code might look something like the below:
# await self._disconnect()
# await self._connect()
self._warn_unhandled_updated_settings(changed)
return changed
def language_to_service_language(self, language: Language) -> Optional[str]:
"""Convert a Language enum to PlayHT service language format.
Args:
language: The language to convert.
Returns:
The PlayHT-specific language code, or None if not supported.
"""
return language_to_playht_language(language)
async def start(self, frame: StartFrame):
"""Start the PlayHT TTS service.
Args:
frame: The start frame containing initialization parameters.
"""
await super().start(frame)
await self._connect()
async def stop(self, frame: EndFrame):
"""Stop the PlayHT TTS service.
Args:
frame: The end frame.
"""
await super().stop(frame)
await self._disconnect()
async def cancel(self, frame: CancelFrame):
"""Cancel the PlayHT TTS service.
Args:
frame: The cancel frame.
"""
await super().cancel(frame)
await self._disconnect()
async def _connect(self):
"""Connect to PlayHT WebSocket and start receive task."""
await super()._connect()
await self._connect_websocket()
if self._websocket and not self._receive_task:
self._receive_task = self.create_task(self._receive_task_handler(self._report_error))
async def _disconnect(self):
"""Disconnect from PlayHT WebSocket and clean up tasks."""
await super()._disconnect()
if self._receive_task:
await self.cancel_task(self._receive_task)
self._receive_task = None
await self._disconnect_websocket()
async def _connect_websocket(self):
"""Connect to PlayHT websocket."""
try:
if self._websocket and self._websocket.state is State.OPEN:
return
logger.debug("Connecting to PlayHT")
if not self._websocket_url:
await self._get_websocket_url()
if not isinstance(self._websocket_url, str):
raise ValueError("WebSocket URL is not a string")
self._websocket = await websocket_connect(self._websocket_url)
await self._call_event_handler("on_connected")
except ValueError as e:
logger.error(f"{self} initialization error: {e}")
self._websocket = None
await self._call_event_handler("on_connection_error", f"{e}")
except Exception as e:
await self.push_error(error_msg=f"Error connecting: {e}", exception=e)
self._websocket = None
await self._call_event_handler("on_connection_error", f"{e}")
async def _disconnect_websocket(self):
"""Disconnect from PlayHT websocket."""
try:
await self.stop_all_metrics()
if self._websocket:
logger.debug("Disconnecting from PlayHT")
await self._websocket.close()
except Exception as e:
await self.push_error(error_msg=f"Error disconnecting: {e}", exception=e)
finally:
self._context_id = None
self._websocket = None
await self._call_event_handler("on_disconnected")
async def _get_websocket_url(self):
"""Retrieve WebSocket URL from PlayHT API."""
async with aiohttp.ClientSession() as session:
async with session.post(
"https://api.play.ht/api/v4/websocket-auth",
headers={
"Authorization": f"Bearer {self._api_key}",
"X-User-Id": self._user_id,
"Content-Type": "application/json",
},
) as response:
if response.status in (200, 201):
data = await response.json()
# Handle the new response format with multiple URLs
if "websocket_urls" in data:
# Select URL based on voice_engine
if self._settings.voice_engine in data["websocket_urls"]:
self._websocket_url = data["websocket_urls"][
self._settings.voice_engine
]
else:
raise ValueError(
f"Unsupported voice engine: {self._settings.voice_engine}"
)
else:
raise ValueError("Invalid response: missing websocket_urls")
else:
raise Exception(f"Failed to get WebSocket URL: {response.status}")
def _get_websocket(self):
"""Get the WebSocket connection if available."""
if self._websocket:
return self._websocket
raise Exception("Websocket not connected")
def create_context_id(self) -> str:
"""Generate a unique context ID for a TTS request in case we don't have one already in progress.
Returns:
A unique string identifier for the TTS context.
"""
# If a context ID does not exist, create a new one.
# If an ID exists, continue using the current ID.
# When interruptions happen, user speech results in
# an interruption, which resets the context ID.
if not self._context_id:
return str(uuid.uuid4())
return self._context_id
async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection):
"""Handle interruption by stopping metrics and clearing request ID."""
await super()._handle_interruption(frame, direction)
await self.stop_all_metrics()
self._context_id = None
async def _receive_messages(self):
"""Receive messages from PlayHT websocket."""
async for message in self._get_websocket():
if isinstance(message, bytes):
# Skip the WAV header message
if message.startswith(b"RIFF"):
continue
await self.stop_ttfb_metrics()
frame = TTSAudioRawFrame(message, self.sample_rate, 1, context_id=self._context_id)
await self.push_frame(frame)
else:
logger.debug(f"Received text message: {message}")
try:
msg = json.loads(message)
if msg.get("type") == "start":
# Handle start of stream
logger.debug(f"Started processing request: {msg.get('request_id')}")
elif msg.get("type") == "end":
# Handle end of stream
if "request_id" in msg and msg["request_id"] == self._context_id:
await self.push_frame(TTSStoppedFrame(context_id=self._context_id))
self._context_id = None
elif "error" in msg:
await self.push_error(error_msg=f"Error: {msg['error']}")
except json.JSONDecodeError:
logger.error(f"Invalid JSON message: {message}")
@traced_tts
async def run_tts(self, text: str, context_id: str) -> AsyncGenerator[Frame, None]:
"""Generate TTS audio from text using PlayHT's WebSocket API.
Args:
text: The text to synthesize into speech.
context_id: The context ID for tracking audio frames.
Yields:
Frame: Audio frames containing the synthesized speech.
"""
logger.debug(f"{self}: Generating TTS [{text}]")
try:
# Reconnect if the websocket is closed
if not self._websocket or self._websocket.state is State.CLOSED:
await self._connect()
if not self._context_id:
await self.start_ttfb_metrics()
yield TTSStartedFrame(context_id=context_id)
self._context_id = context_id
tts_command = {
"text": text,
"voice": self._settings.voice,
"voice_engine": self._settings.voice_engine,
"output_format": self._settings.output_format,
"sample_rate": self.sample_rate,
"language": self._settings.language,
"speed": self._settings.speed,
"seed": self._settings.seed,
"request_id": self._context_id,
}
try:
await self._get_websocket().send(json.dumps(tts_command))
await self.start_tts_usage_metrics(text)
except Exception as e:
yield ErrorFrame(error=f"Unknown error occurred: {e}")
yield TTSStoppedFrame(context_id=context_id)
await self._disconnect()
await self._connect()
return
# The actual audio frames will be handled in _receive_task_handler
yield None
except Exception as e:
yield ErrorFrame(error=f"Unknown error occurred: {e}")
class PlayHTHttpTTSService(TTSService):
"""PlayHT HTTP-based text-to-speech service.
.. deprecated:: 0.0.88
This class is deprecated and will be removed in a future version.
PlayHT is shutting down their API on December 31st, 2025.
Provides text-to-speech synthesis using PlayHT's HTTP API for simpler,
non-streaming synthesis. Suitable for use cases where streaming is not
required and simpler integration is preferred.
"""
_settings: PlayHTTTSSettings
class InputParams(BaseModel):
"""Input parameters for PlayHT HTTP TTS configuration.
Parameters:
language: Language for synthesis. Defaults to English.
speed: Speech speed multiplier. Defaults to 1.0.
seed: Random seed for voice consistency.
"""
language: Optional[Language] = Language.EN
speed: Optional[float] = 1.0
seed: Optional[int] = None
def __init__(
self,
*,
api_key: str,
user_id: str,
voice_url: str,
voice_engine: str = "Play3.0-mini",
protocol: Optional[str] = None,
output_format: str = "wav",
sample_rate: Optional[int] = None,
params: Optional[InputParams] = None,
**kwargs,
):
"""Initialize the PlayHT HTTP TTS service.
Args:
api_key: PlayHT API key for authentication.
user_id: PlayHT user ID for authentication.
voice_url: URL of the voice to use for synthesis.
voice_engine: Voice engine to use. Defaults to "Play3.0-mini".
protocol: Protocol to use ("http" or "ws").
.. deprecated:: 0.0.80
This parameter no longer has any effect and will be removed in a future version.
Use PlayHTTTSService for WebSocket or PlayHTHttpTTSService for HTTP.
output_format: Audio output format. Defaults to "wav".
sample_rate: Audio sample rate. If None, uses default.
params: Additional input parameters for voice customization.
**kwargs: Additional arguments passed to parent TTSService.
"""
super().__init__(sample_rate=sample_rate, **kwargs)
# Warn about deprecated protocol parameter if explicitly provided
if protocol:
with warnings.catch_warnings():
warnings.simplefilter("always")
warnings.warn(
"The 'protocol' parameter is deprecated and will be removed in a future version.",
DeprecationWarning,
stacklevel=2,
)
with warnings.catch_warnings():
warnings.simplefilter("always")
warnings.warn(
"PlayHT is shutting down their API on December 31st, 2025. "
"'PlayHTHttpTTSService' is deprecated and will be removed in a future version.",
DeprecationWarning,
stacklevel=2,
)
params = params or PlayHTHttpTTSService.InputParams()
self._user_id = user_id
self._api_key = api_key
# Check if voice_engine contains protocol information (backward compatibility)
if "-http" in voice_engine:
# Extract the base engine name
voice_engine = voice_engine.replace("-http", "")
elif "-ws" in voice_engine:
# Extract the base engine name
voice_engine = voice_engine.replace("-ws", "")
self._settings = PlayHTTTSSettings(
model=voice_engine,
voice=voice_url,
language=self.language_to_service_language(params.language)
if params.language
else "english",
output_format=output_format,
voice_engine=voice_engine,
speed=params.speed,
seed=params.seed,
playht_sample_rate=0,
)
self._sync_model_name_to_metrics()
async def start(self, frame: StartFrame):
"""Start the PlayHT HTTP TTS service.
Args:
frame: The start frame containing initialization parameters.
"""
await super().start(frame)
self._settings.playht_sample_rate = self.sample_rate
def can_generate_metrics(self) -> bool:
"""Check if this service can generate processing metrics.
Returns:
True, as PlayHT HTTP service supports metrics generation.
"""
return True
def language_to_service_language(self, language: Language) -> Optional[str]:
"""Convert a Language enum to PlayHT service language format.
Args:
language: The language to convert.
Returns:
The PlayHT-specific language code, or None if not supported.
"""
return language_to_playht_language(language)
@traced_tts
async def run_tts(self, text: str, context_id: str) -> AsyncGenerator[Frame, None]:
"""Generate TTS audio from text using PlayHT's HTTP API.
Args:
text: The text to synthesize into speech.
context_id: The context ID for tracking audio frames.
Yields:
Frame: Audio frames containing the synthesized speech.
"""
logger.debug(f"{self}: Generating TTS [{text}]")
try:
await self.start_ttfb_metrics()
# Prepare the request payload
payload = {
"text": text,
"voice": self._settings.voice,
"voice_engine": self._settings.voice_engine,
"output_format": self._settings.output_format,
"sample_rate": self.sample_rate,
"language": self._settings.language,
}
# Add optional parameters if they exist
if self._settings.speed is not None:
payload["speed"] = self._settings.speed
if self._settings.seed is not None:
payload["seed"] = self._settings.seed
headers = {
"Authorization": f"Bearer {self._api_key}",
"X-User-Id": self._user_id,
"Content-Type": "application/json",
"Accept": "*/*",
}
await self.start_tts_usage_metrics(text)
yield TTSStartedFrame(context_id=context_id)
async with aiohttp.ClientSession() as session:
async with session.post(
"https://api.play.ht/api/v2/tts/stream",
headers=headers,
json=payload,
) as response:
if response.status not in (200, 201):
error_text = await response.text()
raise Exception(f"PlayHT API error {response.status}: {error_text}")
in_header = True
buffer = b""
CHUNK_SIZE = self.chunk_size
async for chunk in response.content.iter_chunked(CHUNK_SIZE):
if len(chunk) == 0:
continue
# Skip the RIFF header
if in_header:
buffer += chunk
if len(buffer) <= 36:
continue
else:
fh = io.BytesIO(buffer)
fh.seek(36)
(data, size) = struct.unpack("<4sI", fh.read(8))
while data != b"data":
fh.read(size)
(data, size) = struct.unpack("<4sI", fh.read(8))
# Extract audio data after header
audio_data = buffer[fh.tell() :]
if len(audio_data) > 0:
await self.stop_ttfb_metrics()
frame = TTSAudioRawFrame(
audio_data, self.sample_rate, 1, context_id=context_id
)
yield frame
in_header = False
elif len(chunk) > 0:
await self.stop_ttfb_metrics()
frame = TTSAudioRawFrame(
chunk, self.sample_rate, 1, context_id=context_id
)
yield frame
except Exception as e:
yield ErrorFrame(error=f"Unknown error occurred: {e}")
finally:
await self.stop_ttfb_metrics()
yield TTSStoppedFrame(context_id=context_id)

6
uv.lock generated
View File

@@ -4550,9 +4550,6 @@ piper = [
{ name = "piper-tts" },
{ name = "requests" },
]
playht = [
{ name = "websockets" },
]
resembleai = [
{ name = "websockets" },
]
@@ -4722,7 +4719,6 @@ requires-dist = [
{ name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'lmnt'" },
{ name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'neuphonic'" },
{ name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'openai'" },
{ name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'playht'" },
{ name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'resembleai'" },
{ name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'rime'" },
{ name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'sarvam'" },
@@ -4763,7 +4759,7 @@ requires-dist = [
{ name = "wait-for2", marker = "python_full_version < '3.12'", specifier = ">=0.4.1" },
{ name = "websockets", marker = "extra == 'websockets-base'", specifier = ">=13.1,<16.0" },
]
provides-extras = ["aic", "anthropic", "assemblyai", "asyncai", "aws", "aws-nova-sonic", "azure", "cartesia", "camb", "cerebras", "daily", "deepgram", "deepseek", "elevenlabs", "fal", "fireworks", "fish", "gladia", "google", "gradium", "grok", "groq", "gstreamer", "heygen", "hume", "inworld", "koala", "kokoro", "krisp", "langchain", "livekit", "lmnt", "local", "local-smart-turn", "mcp", "mem0", "mistral", "mlx-whisper", "moondream", "neuphonic", "noisereduce", "nvidia", "openai", "rnnoise", "openpipe", "openrouter", "perplexity", "piper", "playht", "qwen", "remote-smart-turn", "resembleai", "rime", "riva", "runner", "sagemaker", "sambanova", "sarvam", "sentry", "silero", "simli", "soniox", "soundfile", "speechmatics", "strands", "tavus", "together", "tracing", "ultravox", "webrtc", "websocket", "websockets-base", "whisper"]
provides-extras = ["aic", "anthropic", "assemblyai", "asyncai", "aws", "aws-nova-sonic", "azure", "cartesia", "camb", "cerebras", "daily", "deepgram", "deepseek", "elevenlabs", "fal", "fireworks", "fish", "gladia", "google", "gradium", "grok", "groq", "gstreamer", "heygen", "hume", "inworld", "koala", "kokoro", "krisp", "langchain", "livekit", "lmnt", "local", "local-smart-turn", "mcp", "mem0", "mistral", "mlx-whisper", "moondream", "neuphonic", "noisereduce", "nvidia", "openai", "rnnoise", "openpipe", "openrouter", "perplexity", "piper", "qwen", "remote-smart-turn", "resembleai", "rime", "riva", "runner", "sagemaker", "sambanova", "sarvam", "sentry", "silero", "simli", "soniox", "soundfile", "speechmatics", "strands", "tavus", "together", "tracing", "ultravox", "webrtc", "websocket", "websockets-base", "whisper"]
[package.metadata.requires-dev]
dev = [