Merge pull request #280 from pipecat-ai/aleix/library-updates-070224

library updates 070224 and pipecat 0.0.36
This commit is contained in:
Aleix Conchillo Flaqué
2024-07-02 10:14:03 -07:00
committed by GitHub
7 changed files with 57 additions and 38 deletions

View File

@@ -5,7 +5,7 @@ All notable changes to **pipecat** will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [Unreleased]
## [0.0.36] - 2024-07-02
### Added
@@ -61,6 +61,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Other
- Added Fly.io deployment example in `examples/deployment/flyio-example`.
- Added new `17-detect-user-idle.py` example that shows how to use the new
`UserIdleProcessor`.

View File

@@ -67,11 +67,12 @@ async def main(room_url: str, token):
"Respond bot",
DailyParams(
audio_out_enabled=True,
camera_out_enabled=True,
camera_out_width=1024,
camera_out_height=1024,
transcription_enabled=True,
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer()
vad_analyzer=SileroVADAnalyzer(),
)
)
@@ -116,7 +117,7 @@ async def main(room_url: str, token):
async def on_first_participant_joined(transport, participant):
participant_name = participant["info"]["userName"] or ''
transport.capture_participant_transcription(participant["id"])
await task.queue_frames([TextFrame(f"Hi, this is {participant_name}.")])
await task.queue_frames([TextFrame(f"Hi there {participant_name}!")])
runner = PipelineRunner()

View File

@@ -17,7 +17,7 @@ aiosignal==1.3.1
# via aiohttp
annotated-types==0.7.0
# via pydantic
anthropic==0.25.9
anthropic==0.28.1
# via
# openpipe
# pipecat-ai (pyproject.toml)
@@ -38,7 +38,7 @@ attrs==23.2.0
# openpipe
av==12.2.0
# via faster-whisper
azure-cognitiveservices-speech==1.37.0
azure-cognitiveservices-speech==1.38.0
# via pipecat-ai (pyproject.toml)
blinker==1.8.2
# via flask
@@ -117,7 +117,7 @@ fsspec==2024.6.1
# torch
future==1.0.0
# via pyloudnorm
google-ai-generativelanguage==0.6.4
google-ai-generativelanguage==0.6.6
# via google-generativeai
google-api-core[grpc]==2.19.1
# via
@@ -135,7 +135,7 @@ google-auth==2.31.0
# google-generativeai
google-auth-httplib2==0.2.0
# via google-api-python-client
google-generativeai==0.5.4
google-generativeai==0.7.1
# via pipecat-ai (pyproject.toml)
googleapis-common-protos==1.63.2
# via
@@ -197,6 +197,8 @@ jinja2==3.1.4
# fastapi
# flask
# torch
jiter==0.5.0
# via anthropic
jsonpatch==1.33
# via langchain-core
jsonpointer==3.0.0
@@ -217,7 +219,7 @@ langchain-openai==0.1.10
# via pipecat-ai (pyproject.toml)
langchain-text-splitters==0.2.2
# via langchain
langsmith==0.1.82
langsmith==0.1.83
# via
# langchain
# langchain-community
@@ -294,12 +296,12 @@ nvidia-nvtx-cu12==12.1.105
# via torch
onnxruntime==1.18.1
# via faster-whisper
openai==1.26.0
openai==1.27.0
# via
# langchain-openai
# openpipe
# pipecat-ai (pyproject.toml)
openpipe==4.14.0
openpipe==4.16.0
# via pipecat-ai (pyproject.toml)
orjson==3.10.5
# via

View File

@@ -17,7 +17,7 @@ aiosignal==1.3.1
# via aiohttp
annotated-types==0.7.0
# via pydantic
anthropic==0.25.9
anthropic==0.28.1
# via
# openpipe
# pipecat-ai (pyproject.toml)
@@ -38,7 +38,7 @@ attrs==23.2.0
# openpipe
av==12.2.0
# via faster-whisper
azure-cognitiveservices-speech==1.37.0
azure-cognitiveservices-speech==1.38.0
# via pipecat-ai (pyproject.toml)
blinker==1.8.2
# via flask
@@ -116,7 +116,7 @@ fsspec==2024.6.1
# torch
future==1.0.0
# via pyloudnorm
google-ai-generativelanguage==0.6.4
google-ai-generativelanguage==0.6.6
# via google-generativeai
google-api-core[grpc]==2.19.1
# via
@@ -134,7 +134,7 @@ google-auth==2.31.0
# google-generativeai
google-auth-httplib2==0.2.0
# via google-api-python-client
google-generativeai==0.5.4
google-generativeai==0.7.1
# via pipecat-ai (pyproject.toml)
googleapis-common-protos==1.63.2
# via
@@ -194,6 +194,8 @@ jinja2==3.1.4
# fastapi
# flask
# torch
jiter==0.5.0
# via anthropic
jsonpatch==1.33
# via langchain-core
jsonpointer==3.0.0
@@ -214,7 +216,7 @@ langchain-openai==0.1.10
# via pipecat-ai (pyproject.toml)
langchain-text-splitters==0.2.2
# via langchain
langsmith==0.1.82
langsmith==0.1.83
# via
# langchain
# langchain-community
@@ -260,12 +262,12 @@ numpy==1.26.4
# transformers
onnxruntime==1.18.1
# via faster-whisper
openai==1.26.0
openai==1.27.0
# via
# langchain-openai
# openpipe
# pipecat-ai (pyproject.toml)
openpipe==4.14.0
openpipe==4.16.0
# via pipecat-ai (pyproject.toml)
orjson==3.10.5
# via

View File

@@ -34,25 +34,25 @@ Source = "https://github.com/pipecat-ai/pipecat"
Website = "https://pipecat.ai"
[project.optional-dependencies]
anthropic = [ "anthropic~=0.25.7" ]
azure = [ "azure-cognitiveservices-speech~=1.37.0" ]
cartesia = [ "cartesia~=1.0.0" ]
anthropic = [ "anthropic~=0.28.1" ]
azure = [ "azure-cognitiveservices-speech~=1.38.0" ]
cartesia = [ "cartesia~=1.0.3" ]
daily = [ "daily-python~=0.10.1" ]
deepgram = [ "deepgram-sdk~=3.2.7" ]
examples = [ "python-dotenv~=1.0.0", "flask~=3.0.3", "flask_cors~=4.0.1" ]
fal = [ "fal-client~=0.4.0" ]
fal = [ "fal-client~=0.4.1" ]
gladia = [ "websockets~=12.0" ]
google = [ "google-generativeai~=0.5.3" ]
fireworks = [ "openai~=1.26.0" ]
langchain = [ "langchain~=0.2.1", "langchain-community~=0.2.1", "langchain-openai~=0.1.8" ]
google = [ "google-generativeai~=0.7.1" ]
fireworks = [ "openai~=1.27.0" ]
langchain = [ "langchain~=0.2.6", "langchain-community~=0.2.6", "langchain-openai~=0.1.10" ]
local = [ "pyaudio~=0.2.0" ]
moondream = [ "einops~=0.8.0", "timm~=0.9.16", "transformers~=4.40.2" ]
openai = [ "openai~=1.26.0" ]
openpipe = [ "openpipe~=4.14.0" ]
openai = [ "openai~=1.27.0" ]
openpipe = [ "openpipe~=4.16.0" ]
playht = [ "pyht~=0.0.28" ]
silero = [ "torch~=2.3.0", "torchaudio~=2.3.0" ]
silero = [ "torch~=2.3.1", "torchaudio~=2.3.1" ]
websocket = [ "websockets~=12.0", "fastapi~=0.111.0" ]
whisper = [ "faster-whisper~=1.0.2" ]
whisper = [ "faster-whisper~=1.0.3" ]
xtts = [ "resampy~=0.4.3" ]
[tool.setuptools.packages.find]

View File

@@ -19,12 +19,11 @@ from pipecat.frames.frames import (
ErrorFrame,
Frame,
StartFrame,
StartInterruptionFrame,
SystemFrame,
TranscriptionFrame,
URLImageRawFrame)
from pipecat.processors.frame_processor import FrameDirection
from pipecat.services.ai_services import AIService, AsyncAIService, TTSService, ImageGenService
from pipecat.services.ai_services import AsyncAIService, TTSService, ImageGenService
from pipecat.services.openai import BaseOpenAILLMService
from loguru import logger
@@ -83,7 +82,7 @@ class AzureTTSService(TTSService):
return True
async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
logger.debug(f"Generating TTS: {text}")
logger.debug(f"Generating TTS: [{text}]")
await self.start_ttfb_metrics()
@@ -148,9 +147,11 @@ class AzureSTTService(AsyncAIService):
async def stop(self, frame: EndFrame):
self._speech_recognizer.stop_continuous_recognition_async()
self._audio_stream.close()
async def cancel(self, frame: CancelFrame):
self._speech_recognizer.stop_continuous_recognition_async()
self._audio_stream.close()
def _on_handle_recognized(self, event):
if event.result.reason == ResultReason.RecognizedSpeech and len(event.result.text) > 0:

View File

@@ -8,7 +8,7 @@ from cartesia import AsyncCartesia
from typing import AsyncGenerator
from pipecat.frames.frames import AudioRawFrame, Frame
from pipecat.frames.frames import AudioRawFrame, CancelFrame, EndFrame, Frame, StartFrame
from pipecat.services.ai_services import TTSService
from loguru import logger
@@ -28,22 +28,33 @@ class CartesiaTTSService(TTSService):
super().__init__(**kwargs)
self._api_key = api_key
self._voice_id = voice_id
self._model_id = model_id
self._output_format = {
"container": "raw",
"encoding": encoding,
"sample_rate": sample_rate,
}
try:
self._client = AsyncCartesia(api_key=self._api_key)
self._voice = self._client.voices.get(id=voice_id)
except Exception as e:
logger.exception(f"{self} initialization error: {e}")
self._client = None
def can_generate_metrics(self) -> bool:
return True
async def start(self, frame: StartFrame):
try:
self._client = AsyncCartesia(api_key=self._api_key)
self._voice = self._client.voices.get(id=self._voice_id)
except Exception as e:
logger.exception(f"{self} initialization error: {e}")
async def stop(self, frame: EndFrame):
if self._client:
await self._client.close()
async def cancel(self, frame: CancelFrame):
if self._client:
await self._client.close()
async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
logger.debug(f"Generating TTS: [{text}]")