Merge pull request #1474 from pipecat-ai/khk/mem0-changelog
Changelog entry for mem0 service
This commit is contained in:
19
CHANGELOG.md
19
CHANGELOG.md
@@ -9,13 +9,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
### Added
|
||||
|
||||
- Added `SmallWebRTCTransport`, a new P2P WebRTC transport.
|
||||
- Created two examples in `p2p-webrtc`:
|
||||
- **video-transform**: Demonstrates sending and receiving audio/video with `SmallWebRTCTransport` using `TypeScript`.
|
||||
Includes video frame processing with OpenCV.
|
||||
- **voice-agent**: A minimal example of creating a voice agent with `SmallWebRTCTransport`.
|
||||
- Added `Mem0MemoryService`. Mem0 is a self-improving memory layer for LLM
|
||||
applications. Learn more at: https://mem0.ai/.
|
||||
|
||||
- Added support to `ProtobufFrameSerializer` to send the messages from `TransportMessageFrame` and `TransportMessageUrgentFrame`.
|
||||
- Added `SmallWebRTCTransport`, a new P2P WebRTC transport.
|
||||
|
||||
- Created two examples in `p2p-webrtc`:
|
||||
- **video-transform**: Demonstrates sending and receiving audio/video with
|
||||
`SmallWebRTCTransport` using `TypeScript`. Includes video frame
|
||||
processing with OpenCV.
|
||||
- **voice-agent**: A minimal example of creating a voice agent with
|
||||
`SmallWebRTCTransport`.
|
||||
|
||||
- Added support to `ProtobufFrameSerializer` to send the messages from
|
||||
`TransportMessageFrame` and `TransportMessageUrgentFrame`.
|
||||
|
||||
- Added support for a new TTS service, `PiperTTSService`.
|
||||
(see https://github.com/rhasspy/piper/)
|
||||
|
||||
@@ -63,6 +63,7 @@ pip install "pipecat-ai[option,...]"
|
||||
| Speech-to-Speech | [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai) | `pip install "pipecat-ai[google]"` |
|
||||
| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local | `pip install "pipecat-ai[daily]"` |
|
||||
| Video | [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) | `pip install "pipecat-ai[tavus,simli]"` |
|
||||
| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) | `pip install "pipecat-ai[mem0]"` |
|
||||
| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/fal), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) | `pip install "pipecat-ai[moondream]"` |
|
||||
| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [Noisereduce](https://docs.pipecat.ai/server/utilities/audio/noisereduce-filter) | `pip install "pipecat-ai[silero]"` |
|
||||
| Analytics & Metrics | [Canonical AI](https://docs.pipecat.ai/server/services/analytics/canonical), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) | `pip install "pipecat-ai[canonical]"` |
|
||||
|
||||
@@ -64,6 +64,7 @@ langchain = [ "langchain~=0.3.20", "langchain-community~=0.3.20", "langchain-ope
|
||||
livekit = [ "livekit~=0.22.0", "livekit-api~=0.8.2", "tenacity~=9.0.0" ]
|
||||
lmnt = [ "websockets~=13.1" ]
|
||||
local = [ "pyaudio~=0.2.14" ]
|
||||
mem0 = [ "mem0ai~=0.1.76" ]
|
||||
mlx-whisper = [ "mlx-whisper~=0.4.2" ]
|
||||
moondream = [ "einops~=0.8.0", "timm~=1.0.13", "transformers~=4.48.0" ]
|
||||
nim = []
|
||||
@@ -86,7 +87,6 @@ ultravox = [ "transformers~=4.48.0", "vllm~=0.7.3" ]
|
||||
webrtc = [ "aiortc~=1.10.1", "opencv-python~=4.11.0.86" ]
|
||||
websocket = [ "websockets~=13.1", "fastapi~=0.115.6" ]
|
||||
whisper = [ "faster-whisper~=1.1.1" ]
|
||||
mem0 = [ "mem0ai~=0.1.76" ]
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
# All the following settings are optional:
|
||||
|
||||
@@ -9,10 +9,10 @@
|
||||
import asyncio
|
||||
from enum import Enum
|
||||
from typing import AsyncGenerator, Optional
|
||||
from typing_extensions import TYPE_CHECKING, override
|
||||
|
||||
import numpy as np
|
||||
from loguru import logger
|
||||
from typing_extensions import TYPE_CHECKING, override
|
||||
|
||||
from pipecat.frames.frames import ErrorFrame, Frame, TranscriptionFrame
|
||||
from pipecat.services.ai_services import SegmentedSTTService
|
||||
@@ -26,7 +26,7 @@ if TYPE_CHECKING:
|
||||
logger.error(f"Exception: {e}")
|
||||
logger.error("In order to use Whisper, you need to `pip install pipecat-ai[whisper]`.")
|
||||
raise Exception(f"Missing module: {e}")
|
||||
|
||||
|
||||
try:
|
||||
import mlx_whisper
|
||||
except ModuleNotFoundError as e:
|
||||
@@ -332,6 +332,7 @@ class WhisperSTTService(SegmentedSTTService):
|
||||
"""
|
||||
try:
|
||||
from faster_whisper import WhisperModel
|
||||
|
||||
logger.debug("Loading Whisper model...")
|
||||
self._model = WhisperModel(
|
||||
self.model_name, device=self._device, compute_type=self._compute_type
|
||||
@@ -414,7 +415,7 @@ class WhisperSTTServiceMLX(WhisperSTTService):
|
||||
):
|
||||
# Skip WhisperSTTService.__init__ and call its parent directly
|
||||
SegmentedSTTService.__init__(self, **kwargs)
|
||||
|
||||
|
||||
self.set_model_name(model if isinstance(model, str) else model.value)
|
||||
self._no_speech_prob = no_speech_prob
|
||||
self._temperature = temperature
|
||||
@@ -422,14 +423,14 @@ class WhisperSTTServiceMLX(WhisperSTTService):
|
||||
self._settings = {
|
||||
"language": language,
|
||||
}
|
||||
|
||||
|
||||
# No need to call _load() as MLX Whisper loads models on demand
|
||||
|
||||
@override
|
||||
def _load(self):
|
||||
"""MLX Whisper loads models on demand, so this is a no-op."""
|
||||
pass
|
||||
|
||||
|
||||
@override
|
||||
async def run_stt(self, audio: bytes) -> AsyncGenerator[Frame, None]:
|
||||
"""Transcribes given audio using MLX Whisper.
|
||||
@@ -447,7 +448,7 @@ class WhisperSTTServiceMLX(WhisperSTTService):
|
||||
"""
|
||||
try:
|
||||
import mlx_whisper
|
||||
|
||||
|
||||
await self.start_processing_metrics()
|
||||
await self.start_ttfb_metrics()
|
||||
|
||||
@@ -456,10 +457,11 @@ class WhisperSTTServiceMLX(WhisperSTTService):
|
||||
|
||||
whisper_lang = self.language_to_service_language(self._settings["language"])
|
||||
chunk = await asyncio.to_thread(
|
||||
mlx_whisper.transcribe, audio_float,
|
||||
mlx_whisper.transcribe,
|
||||
audio_float,
|
||||
path_or_hf_repo=self.model_name,
|
||||
temperature=self._temperature,
|
||||
language=whisper_lang
|
||||
language=whisper_lang,
|
||||
)
|
||||
text: str = ""
|
||||
for segment in chunk.get("segments", []):
|
||||
@@ -475,11 +477,11 @@ class WhisperSTTServiceMLX(WhisperSTTService):
|
||||
|
||||
await self.stop_ttfb_metrics()
|
||||
await self.stop_processing_metrics()
|
||||
|
||||
|
||||
if text:
|
||||
logger.debug(f"Transcription: [{text}]")
|
||||
yield TranscriptionFrame(text, "", time_now_iso8601(), self._settings["language"])
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"MLX Whisper transcription error: {e}")
|
||||
yield ErrorFrame(f"MLX Whisper transcription error: {str(e)}")
|
||||
|
||||
Reference in New Issue
Block a user