From d2f5ee2915bc6fa623e1bae7bbea8fa58bc20536 Mon Sep 17 00:00:00 2001 From: Kwindla Hultman Kramer Date: Sat, 29 Mar 2025 13:16:15 -0700 Subject: [PATCH 1/3] Changelog entry for mem0 service --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4b4aae8ba..9a0758c75 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- Added `Mem0MemoryService`. Mem0 is a self-improving memory layer for LLM applications. (see https://mem0.ai/) + - Added `SmallWebRTCTransport`, a new P2P WebRTC transport. - Created two examples in `p2p-webrtc`: - **video-transform**: Demonstrates sending and receiving audio/video with `SmallWebRTCTransport` using `TypeScript`. From b9ea3f0fd9a250da8c804da3759f4d92893d58ff Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Sat, 29 Mar 2025 17:54:59 -0400 Subject: [PATCH 2/3] Update README, organize pyproject.toml --- CHANGELOG.md | 17 +++++++++++------ README.md | 1 + pyproject.toml | 2 +- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9a0758c75..cde663bc7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,15 +9,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added -- Added `Mem0MemoryService`. Mem0 is a self-improving memory layer for LLM applications. (see https://mem0.ai/) +- Added `Mem0MemoryService`. Mem0 is a self-improving memory layer for LLM + applications. Learn more at: https://mem0.ai/. - Added `SmallWebRTCTransport`, a new P2P WebRTC transport. - - Created two examples in `p2p-webrtc`: - - **video-transform**: Demonstrates sending and receiving audio/video with `SmallWebRTCTransport` using `TypeScript`. - Includes video frame processing with OpenCV. - - **voice-agent**: A minimal example of creating a voice agent with `SmallWebRTCTransport`. -- Added support to `ProtobufFrameSerializer` to send the messages from `TransportMessageFrame` and `TransportMessageUrgentFrame`. + - Created two examples in `p2p-webrtc`: + - **video-transform**: Demonstrates sending and receiving audio/video with + `SmallWebRTCTransport` using `TypeScript`. Includes video frame + processing with OpenCV. + - **voice-agent**: A minimal example of creating a voice agent with + `SmallWebRTCTransport`. + +- Added support to `ProtobufFrameSerializer` to send the messages from + `TransportMessageFrame` and `TransportMessageUrgentFrame`. - Added support for a new TTS service, `PiperTTSService`. (see https://github.com/rhasspy/piper/) diff --git a/README.md b/README.md index 2670ecfb6..dda565987 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,7 @@ pip install "pipecat-ai[option,...]" | Speech-to-Speech | [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai) | `pip install "pipecat-ai[google]"` | | Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local | `pip install "pipecat-ai[daily]"` | | Video | [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) | `pip install "pipecat-ai[tavus,simli]"` | +| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) | `pip install "pipecat-ai[mem0]"` | | Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/fal), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) | `pip install "pipecat-ai[moondream]"` | | Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [Noisereduce](https://docs.pipecat.ai/server/utilities/audio/noisereduce-filter) | `pip install "pipecat-ai[silero]"` | | Analytics & Metrics | [Canonical AI](https://docs.pipecat.ai/server/services/analytics/canonical), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) | `pip install "pipecat-ai[canonical]"` | diff --git a/pyproject.toml b/pyproject.toml index 801a87d5d..938a39eca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -64,6 +64,7 @@ langchain = [ "langchain~=0.3.20", "langchain-community~=0.3.20", "langchain-ope livekit = [ "livekit~=0.22.0", "livekit-api~=0.8.2", "tenacity~=9.0.0" ] lmnt = [ "websockets~=13.1" ] local = [ "pyaudio~=0.2.14" ] +mem0 = [ "mem0ai~=0.1.76" ] mlx-whisper = [ "mlx-whisper~=0.4.2" ] moondream = [ "einops~=0.8.0", "timm~=1.0.13", "transformers~=4.48.0" ] nim = [] @@ -86,7 +87,6 @@ ultravox = [ "transformers~=4.48.0", "vllm~=0.7.3" ] webrtc = [ "aiortc~=1.10.1", "opencv-python~=4.11.0.86" ] websocket = [ "websockets~=13.1", "fastapi~=0.115.6" ] whisper = [ "faster-whisper~=1.1.1" ] -mem0 = [ "mem0ai~=0.1.76" ] [tool.setuptools.packages.find] # All the following settings are optional: From a978a5cd4a22f84193954141e65b8dc9afad3bd3 Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Sat, 29 Mar 2025 17:57:50 -0400 Subject: [PATCH 3/3] Fix Whisper formatting --- src/pipecat/services/whisper.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/pipecat/services/whisper.py b/src/pipecat/services/whisper.py index 4741ea99b..473a53406 100644 --- a/src/pipecat/services/whisper.py +++ b/src/pipecat/services/whisper.py @@ -9,10 +9,10 @@ import asyncio from enum import Enum from typing import AsyncGenerator, Optional -from typing_extensions import TYPE_CHECKING, override import numpy as np from loguru import logger +from typing_extensions import TYPE_CHECKING, override from pipecat.frames.frames import ErrorFrame, Frame, TranscriptionFrame from pipecat.services.ai_services import SegmentedSTTService @@ -26,7 +26,7 @@ if TYPE_CHECKING: logger.error(f"Exception: {e}") logger.error("In order to use Whisper, you need to `pip install pipecat-ai[whisper]`.") raise Exception(f"Missing module: {e}") - + try: import mlx_whisper except ModuleNotFoundError as e: @@ -332,6 +332,7 @@ class WhisperSTTService(SegmentedSTTService): """ try: from faster_whisper import WhisperModel + logger.debug("Loading Whisper model...") self._model = WhisperModel( self.model_name, device=self._device, compute_type=self._compute_type @@ -414,7 +415,7 @@ class WhisperSTTServiceMLX(WhisperSTTService): ): # Skip WhisperSTTService.__init__ and call its parent directly SegmentedSTTService.__init__(self, **kwargs) - + self.set_model_name(model if isinstance(model, str) else model.value) self._no_speech_prob = no_speech_prob self._temperature = temperature @@ -422,14 +423,14 @@ class WhisperSTTServiceMLX(WhisperSTTService): self._settings = { "language": language, } - + # No need to call _load() as MLX Whisper loads models on demand @override def _load(self): """MLX Whisper loads models on demand, so this is a no-op.""" pass - + @override async def run_stt(self, audio: bytes) -> AsyncGenerator[Frame, None]: """Transcribes given audio using MLX Whisper. @@ -447,7 +448,7 @@ class WhisperSTTServiceMLX(WhisperSTTService): """ try: import mlx_whisper - + await self.start_processing_metrics() await self.start_ttfb_metrics() @@ -456,10 +457,11 @@ class WhisperSTTServiceMLX(WhisperSTTService): whisper_lang = self.language_to_service_language(self._settings["language"]) chunk = await asyncio.to_thread( - mlx_whisper.transcribe, audio_float, + mlx_whisper.transcribe, + audio_float, path_or_hf_repo=self.model_name, temperature=self._temperature, - language=whisper_lang + language=whisper_lang, ) text: str = "" for segment in chunk.get("segments", []): @@ -475,11 +477,11 @@ class WhisperSTTServiceMLX(WhisperSTTService): await self.stop_ttfb_metrics() await self.stop_processing_metrics() - + if text: logger.debug(f"Transcription: [{text}]") yield TranscriptionFrame(text, "", time_now_iso8601(), self._settings["language"]) - + except Exception as e: logger.exception(f"MLX Whisper transcription error: {e}") yield ErrorFrame(f"MLX Whisper transcription error: {str(e)}")