Merge pull request #280 from pipecat-ai/aleix/library-updates-070224
library updates 070224 and pipecat 0.0.36
This commit is contained in:
@@ -5,7 +5,7 @@ All notable changes to **pipecat** will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [Unreleased]
|
||||
## [0.0.36] - 2024-07-02
|
||||
|
||||
### Added
|
||||
|
||||
@@ -61,6 +61,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
### Other
|
||||
|
||||
- Added Fly.io deployment example in `examples/deployment/flyio-example`.
|
||||
|
||||
- Added new `17-detect-user-idle.py` example that shows how to use the new
|
||||
`UserIdleProcessor`.
|
||||
|
||||
|
||||
@@ -67,11 +67,12 @@ async def main(room_url: str, token):
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1024,
|
||||
camera_out_height=1024,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer()
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
)
|
||||
)
|
||||
|
||||
@@ -116,7 +117,7 @@ async def main(room_url: str, token):
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
participant_name = participant["info"]["userName"] or ''
|
||||
transport.capture_participant_transcription(participant["id"])
|
||||
await task.queue_frames([TextFrame(f"Hi, this is {participant_name}.")])
|
||||
await task.queue_frames([TextFrame(f"Hi there {participant_name}!")])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ aiosignal==1.3.1
|
||||
# via aiohttp
|
||||
annotated-types==0.7.0
|
||||
# via pydantic
|
||||
anthropic==0.25.9
|
||||
anthropic==0.28.1
|
||||
# via
|
||||
# openpipe
|
||||
# pipecat-ai (pyproject.toml)
|
||||
@@ -38,7 +38,7 @@ attrs==23.2.0
|
||||
# openpipe
|
||||
av==12.2.0
|
||||
# via faster-whisper
|
||||
azure-cognitiveservices-speech==1.37.0
|
||||
azure-cognitiveservices-speech==1.38.0
|
||||
# via pipecat-ai (pyproject.toml)
|
||||
blinker==1.8.2
|
||||
# via flask
|
||||
@@ -117,7 +117,7 @@ fsspec==2024.6.1
|
||||
# torch
|
||||
future==1.0.0
|
||||
# via pyloudnorm
|
||||
google-ai-generativelanguage==0.6.4
|
||||
google-ai-generativelanguage==0.6.6
|
||||
# via google-generativeai
|
||||
google-api-core[grpc]==2.19.1
|
||||
# via
|
||||
@@ -135,7 +135,7 @@ google-auth==2.31.0
|
||||
# google-generativeai
|
||||
google-auth-httplib2==0.2.0
|
||||
# via google-api-python-client
|
||||
google-generativeai==0.5.4
|
||||
google-generativeai==0.7.1
|
||||
# via pipecat-ai (pyproject.toml)
|
||||
googleapis-common-protos==1.63.2
|
||||
# via
|
||||
@@ -197,6 +197,8 @@ jinja2==3.1.4
|
||||
# fastapi
|
||||
# flask
|
||||
# torch
|
||||
jiter==0.5.0
|
||||
# via anthropic
|
||||
jsonpatch==1.33
|
||||
# via langchain-core
|
||||
jsonpointer==3.0.0
|
||||
@@ -217,7 +219,7 @@ langchain-openai==0.1.10
|
||||
# via pipecat-ai (pyproject.toml)
|
||||
langchain-text-splitters==0.2.2
|
||||
# via langchain
|
||||
langsmith==0.1.82
|
||||
langsmith==0.1.83
|
||||
# via
|
||||
# langchain
|
||||
# langchain-community
|
||||
@@ -294,12 +296,12 @@ nvidia-nvtx-cu12==12.1.105
|
||||
# via torch
|
||||
onnxruntime==1.18.1
|
||||
# via faster-whisper
|
||||
openai==1.26.0
|
||||
openai==1.27.0
|
||||
# via
|
||||
# langchain-openai
|
||||
# openpipe
|
||||
# pipecat-ai (pyproject.toml)
|
||||
openpipe==4.14.0
|
||||
openpipe==4.16.0
|
||||
# via pipecat-ai (pyproject.toml)
|
||||
orjson==3.10.5
|
||||
# via
|
||||
|
||||
@@ -17,7 +17,7 @@ aiosignal==1.3.1
|
||||
# via aiohttp
|
||||
annotated-types==0.7.0
|
||||
# via pydantic
|
||||
anthropic==0.25.9
|
||||
anthropic==0.28.1
|
||||
# via
|
||||
# openpipe
|
||||
# pipecat-ai (pyproject.toml)
|
||||
@@ -38,7 +38,7 @@ attrs==23.2.0
|
||||
# openpipe
|
||||
av==12.2.0
|
||||
# via faster-whisper
|
||||
azure-cognitiveservices-speech==1.37.0
|
||||
azure-cognitiveservices-speech==1.38.0
|
||||
# via pipecat-ai (pyproject.toml)
|
||||
blinker==1.8.2
|
||||
# via flask
|
||||
@@ -116,7 +116,7 @@ fsspec==2024.6.1
|
||||
# torch
|
||||
future==1.0.0
|
||||
# via pyloudnorm
|
||||
google-ai-generativelanguage==0.6.4
|
||||
google-ai-generativelanguage==0.6.6
|
||||
# via google-generativeai
|
||||
google-api-core[grpc]==2.19.1
|
||||
# via
|
||||
@@ -134,7 +134,7 @@ google-auth==2.31.0
|
||||
# google-generativeai
|
||||
google-auth-httplib2==0.2.0
|
||||
# via google-api-python-client
|
||||
google-generativeai==0.5.4
|
||||
google-generativeai==0.7.1
|
||||
# via pipecat-ai (pyproject.toml)
|
||||
googleapis-common-protos==1.63.2
|
||||
# via
|
||||
@@ -194,6 +194,8 @@ jinja2==3.1.4
|
||||
# fastapi
|
||||
# flask
|
||||
# torch
|
||||
jiter==0.5.0
|
||||
# via anthropic
|
||||
jsonpatch==1.33
|
||||
# via langchain-core
|
||||
jsonpointer==3.0.0
|
||||
@@ -214,7 +216,7 @@ langchain-openai==0.1.10
|
||||
# via pipecat-ai (pyproject.toml)
|
||||
langchain-text-splitters==0.2.2
|
||||
# via langchain
|
||||
langsmith==0.1.82
|
||||
langsmith==0.1.83
|
||||
# via
|
||||
# langchain
|
||||
# langchain-community
|
||||
@@ -260,12 +262,12 @@ numpy==1.26.4
|
||||
# transformers
|
||||
onnxruntime==1.18.1
|
||||
# via faster-whisper
|
||||
openai==1.26.0
|
||||
openai==1.27.0
|
||||
# via
|
||||
# langchain-openai
|
||||
# openpipe
|
||||
# pipecat-ai (pyproject.toml)
|
||||
openpipe==4.14.0
|
||||
openpipe==4.16.0
|
||||
# via pipecat-ai (pyproject.toml)
|
||||
orjson==3.10.5
|
||||
# via
|
||||
|
||||
@@ -34,25 +34,25 @@ Source = "https://github.com/pipecat-ai/pipecat"
|
||||
Website = "https://pipecat.ai"
|
||||
|
||||
[project.optional-dependencies]
|
||||
anthropic = [ "anthropic~=0.25.7" ]
|
||||
azure = [ "azure-cognitiveservices-speech~=1.37.0" ]
|
||||
cartesia = [ "cartesia~=1.0.0" ]
|
||||
anthropic = [ "anthropic~=0.28.1" ]
|
||||
azure = [ "azure-cognitiveservices-speech~=1.38.0" ]
|
||||
cartesia = [ "cartesia~=1.0.3" ]
|
||||
daily = [ "daily-python~=0.10.1" ]
|
||||
deepgram = [ "deepgram-sdk~=3.2.7" ]
|
||||
examples = [ "python-dotenv~=1.0.0", "flask~=3.0.3", "flask_cors~=4.0.1" ]
|
||||
fal = [ "fal-client~=0.4.0" ]
|
||||
fal = [ "fal-client~=0.4.1" ]
|
||||
gladia = [ "websockets~=12.0" ]
|
||||
google = [ "google-generativeai~=0.5.3" ]
|
||||
fireworks = [ "openai~=1.26.0" ]
|
||||
langchain = [ "langchain~=0.2.1", "langchain-community~=0.2.1", "langchain-openai~=0.1.8" ]
|
||||
google = [ "google-generativeai~=0.7.1" ]
|
||||
fireworks = [ "openai~=1.27.0" ]
|
||||
langchain = [ "langchain~=0.2.6", "langchain-community~=0.2.6", "langchain-openai~=0.1.10" ]
|
||||
local = [ "pyaudio~=0.2.0" ]
|
||||
moondream = [ "einops~=0.8.0", "timm~=0.9.16", "transformers~=4.40.2" ]
|
||||
openai = [ "openai~=1.26.0" ]
|
||||
openpipe = [ "openpipe~=4.14.0" ]
|
||||
openai = [ "openai~=1.27.0" ]
|
||||
openpipe = [ "openpipe~=4.16.0" ]
|
||||
playht = [ "pyht~=0.0.28" ]
|
||||
silero = [ "torch~=2.3.0", "torchaudio~=2.3.0" ]
|
||||
silero = [ "torch~=2.3.1", "torchaudio~=2.3.1" ]
|
||||
websocket = [ "websockets~=12.0", "fastapi~=0.111.0" ]
|
||||
whisper = [ "faster-whisper~=1.0.2" ]
|
||||
whisper = [ "faster-whisper~=1.0.3" ]
|
||||
xtts = [ "resampy~=0.4.3" ]
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
|
||||
@@ -19,12 +19,11 @@ from pipecat.frames.frames import (
|
||||
ErrorFrame,
|
||||
Frame,
|
||||
StartFrame,
|
||||
StartInterruptionFrame,
|
||||
SystemFrame,
|
||||
TranscriptionFrame,
|
||||
URLImageRawFrame)
|
||||
from pipecat.processors.frame_processor import FrameDirection
|
||||
from pipecat.services.ai_services import AIService, AsyncAIService, TTSService, ImageGenService
|
||||
from pipecat.services.ai_services import AsyncAIService, TTSService, ImageGenService
|
||||
from pipecat.services.openai import BaseOpenAILLMService
|
||||
|
||||
from loguru import logger
|
||||
@@ -83,7 +82,7 @@ class AzureTTSService(TTSService):
|
||||
return True
|
||||
|
||||
async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
|
||||
logger.debug(f"Generating TTS: {text}")
|
||||
logger.debug(f"Generating TTS: [{text}]")
|
||||
|
||||
await self.start_ttfb_metrics()
|
||||
|
||||
@@ -148,9 +147,11 @@ class AzureSTTService(AsyncAIService):
|
||||
|
||||
async def stop(self, frame: EndFrame):
|
||||
self._speech_recognizer.stop_continuous_recognition_async()
|
||||
self._audio_stream.close()
|
||||
|
||||
async def cancel(self, frame: CancelFrame):
|
||||
self._speech_recognizer.stop_continuous_recognition_async()
|
||||
self._audio_stream.close()
|
||||
|
||||
def _on_handle_recognized(self, event):
|
||||
if event.result.reason == ResultReason.RecognizedSpeech and len(event.result.text) > 0:
|
||||
|
||||
@@ -8,7 +8,7 @@ from cartesia import AsyncCartesia
|
||||
|
||||
from typing import AsyncGenerator
|
||||
|
||||
from pipecat.frames.frames import AudioRawFrame, Frame
|
||||
from pipecat.frames.frames import AudioRawFrame, CancelFrame, EndFrame, Frame, StartFrame
|
||||
from pipecat.services.ai_services import TTSService
|
||||
|
||||
from loguru import logger
|
||||
@@ -28,22 +28,33 @@ class CartesiaTTSService(TTSService):
|
||||
super().__init__(**kwargs)
|
||||
|
||||
self._api_key = api_key
|
||||
self._voice_id = voice_id
|
||||
self._model_id = model_id
|
||||
self._output_format = {
|
||||
"container": "raw",
|
||||
"encoding": encoding,
|
||||
"sample_rate": sample_rate,
|
||||
}
|
||||
|
||||
try:
|
||||
self._client = AsyncCartesia(api_key=self._api_key)
|
||||
self._voice = self._client.voices.get(id=voice_id)
|
||||
except Exception as e:
|
||||
logger.exception(f"{self} initialization error: {e}")
|
||||
self._client = None
|
||||
|
||||
def can_generate_metrics(self) -> bool:
|
||||
return True
|
||||
|
||||
async def start(self, frame: StartFrame):
|
||||
try:
|
||||
self._client = AsyncCartesia(api_key=self._api_key)
|
||||
self._voice = self._client.voices.get(id=self._voice_id)
|
||||
except Exception as e:
|
||||
logger.exception(f"{self} initialization error: {e}")
|
||||
|
||||
async def stop(self, frame: EndFrame):
|
||||
if self._client:
|
||||
await self._client.close()
|
||||
|
||||
async def cancel(self, frame: CancelFrame):
|
||||
if self._client:
|
||||
await self._client.close()
|
||||
|
||||
async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
|
||||
logger.debug(f"Generating TTS: [{text}]")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user