Merge pull request #1474 from pipecat-ai/khk/mem0-changelog

Changelog entry for mem0 service
This commit is contained in:
Mark Backman
2025-03-29 18:02:32 -04:00
committed by GitHub
4 changed files with 27 additions and 17 deletions

View File

@@ -9,13 +9,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added
- Added `SmallWebRTCTransport`, a new P2P WebRTC transport.
- Created two examples in `p2p-webrtc`:
- **video-transform**: Demonstrates sending and receiving audio/video with `SmallWebRTCTransport` using `TypeScript`.
Includes video frame processing with OpenCV.
- **voice-agent**: A minimal example of creating a voice agent with `SmallWebRTCTransport`.
- Added `Mem0MemoryService`. Mem0 is a self-improving memory layer for LLM
applications. Learn more at: https://mem0.ai/.
- Added support to `ProtobufFrameSerializer` to send the messages from `TransportMessageFrame` and `TransportMessageUrgentFrame`.
- Added `SmallWebRTCTransport`, a new P2P WebRTC transport.
- Created two examples in `p2p-webrtc`:
- **video-transform**: Demonstrates sending and receiving audio/video with
`SmallWebRTCTransport` using `TypeScript`. Includes video frame
processing with OpenCV.
- **voice-agent**: A minimal example of creating a voice agent with
`SmallWebRTCTransport`.
- Added support to `ProtobufFrameSerializer` to send the messages from
`TransportMessageFrame` and `TransportMessageUrgentFrame`.
- Added support for a new TTS service, `PiperTTSService`.
(see https://github.com/rhasspy/piper/)

View File

@@ -63,6 +63,7 @@ pip install "pipecat-ai[option,...]"
| Speech-to-Speech | [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai) | `pip install "pipecat-ai[google]"` |
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local | `pip install "pipecat-ai[daily]"` |
| Video | [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) | `pip install "pipecat-ai[tavus,simli]"` |
| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) | `pip install "pipecat-ai[mem0]"` |
| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/fal), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) | `pip install "pipecat-ai[moondream]"` |
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [Noisereduce](https://docs.pipecat.ai/server/utilities/audio/noisereduce-filter) | `pip install "pipecat-ai[silero]"` |
| Analytics & Metrics | [Canonical AI](https://docs.pipecat.ai/server/services/analytics/canonical), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) | `pip install "pipecat-ai[canonical]"` |

View File

@@ -64,6 +64,7 @@ langchain = [ "langchain~=0.3.20", "langchain-community~=0.3.20", "langchain-ope
livekit = [ "livekit~=0.22.0", "livekit-api~=0.8.2", "tenacity~=9.0.0" ]
lmnt = [ "websockets~=13.1" ]
local = [ "pyaudio~=0.2.14" ]
mem0 = [ "mem0ai~=0.1.76" ]
mlx-whisper = [ "mlx-whisper~=0.4.2" ]
moondream = [ "einops~=0.8.0", "timm~=1.0.13", "transformers~=4.48.0" ]
nim = []
@@ -86,7 +87,6 @@ ultravox = [ "transformers~=4.48.0", "vllm~=0.7.3" ]
webrtc = [ "aiortc~=1.10.1", "opencv-python~=4.11.0.86" ]
websocket = [ "websockets~=13.1", "fastapi~=0.115.6" ]
whisper = [ "faster-whisper~=1.1.1" ]
mem0 = [ "mem0ai~=0.1.76" ]
[tool.setuptools.packages.find]
# All the following settings are optional:

View File

@@ -9,10 +9,10 @@
import asyncio
from enum import Enum
from typing import AsyncGenerator, Optional
from typing_extensions import TYPE_CHECKING, override
import numpy as np
from loguru import logger
from typing_extensions import TYPE_CHECKING, override
from pipecat.frames.frames import ErrorFrame, Frame, TranscriptionFrame
from pipecat.services.ai_services import SegmentedSTTService
@@ -26,7 +26,7 @@ if TYPE_CHECKING:
logger.error(f"Exception: {e}")
logger.error("In order to use Whisper, you need to `pip install pipecat-ai[whisper]`.")
raise Exception(f"Missing module: {e}")
try:
import mlx_whisper
except ModuleNotFoundError as e:
@@ -332,6 +332,7 @@ class WhisperSTTService(SegmentedSTTService):
"""
try:
from faster_whisper import WhisperModel
logger.debug("Loading Whisper model...")
self._model = WhisperModel(
self.model_name, device=self._device, compute_type=self._compute_type
@@ -414,7 +415,7 @@ class WhisperSTTServiceMLX(WhisperSTTService):
):
# Skip WhisperSTTService.__init__ and call its parent directly
SegmentedSTTService.__init__(self, **kwargs)
self.set_model_name(model if isinstance(model, str) else model.value)
self._no_speech_prob = no_speech_prob
self._temperature = temperature
@@ -422,14 +423,14 @@ class WhisperSTTServiceMLX(WhisperSTTService):
self._settings = {
"language": language,
}
# No need to call _load() as MLX Whisper loads models on demand
@override
def _load(self):
"""MLX Whisper loads models on demand, so this is a no-op."""
pass
@override
async def run_stt(self, audio: bytes) -> AsyncGenerator[Frame, None]:
"""Transcribes given audio using MLX Whisper.
@@ -447,7 +448,7 @@ class WhisperSTTServiceMLX(WhisperSTTService):
"""
try:
import mlx_whisper
await self.start_processing_metrics()
await self.start_ttfb_metrics()
@@ -456,10 +457,11 @@ class WhisperSTTServiceMLX(WhisperSTTService):
whisper_lang = self.language_to_service_language(self._settings["language"])
chunk = await asyncio.to_thread(
mlx_whisper.transcribe, audio_float,
mlx_whisper.transcribe,
audio_float,
path_or_hf_repo=self.model_name,
temperature=self._temperature,
language=whisper_lang
language=whisper_lang,
)
text: str = ""
for segment in chunk.get("segments", []):
@@ -475,11 +477,11 @@ class WhisperSTTServiceMLX(WhisperSTTService):
await self.stop_ttfb_metrics()
await self.stop_processing_metrics()
if text:
logger.debug(f"Transcription: [{text}]")
yield TranscriptionFrame(text, "", time_now_iso8601(), self._settings["language"])
except Exception as e:
logger.exception(f"MLX Whisper transcription error: {e}")
yield ErrorFrame(f"MLX Whisper transcription error: {str(e)}")