transport(livekit): force specifying a vad analyzer

Don't default to SileroVADAnalyzer(). Also, resample to input sample rate.
This commit is contained in:
Aleix Conchillo Flaqué
2024-10-17 10:41:58 -07:00
parent d67e08be4d
commit 8108423742
4 changed files with 16 additions and 30 deletions

View File

@@ -33,6 +33,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Fixed
- Fixed an issue that would cause an error if no VAD analyzer was passed to
`LiveKitTransport` params.
- Fixed `SileroVAD` processor to support interruptions properly.
### Other

View File

@@ -4,9 +4,6 @@ import os
import sys
import aiohttp
from dotenv import load_dotenv
from livekit import api # pip install livekit-api
from loguru import logger
from pipecat.frames.frames import TextFrame
from pipecat.pipeline.pipeline import Pipeline
@@ -15,6 +12,12 @@ from pipecat.pipeline.task import PipelineTask
from pipecat.services.cartesia import CartesiaTTSService
from pipecat.transports.services.livekit import LiveKitParams, LiveKitTransport
from livekit import api
from loguru import logger
from dotenv import load_dotenv
load_dotenv(override=True)
logger.remove(0)

View File

@@ -51,7 +51,7 @@ google = [ "google-generativeai~=0.7.2", "google-cloud-texttospeech~=2.17.2" ]
gstreamer = [ "pygobject~=3.48.2" ]
fireworks = [ "openai~=1.37.2" ]
langchain = [ "langchain~=0.2.14", "langchain-community~=0.2.12", "langchain-openai~=0.1.20" ]
livekit = [ "livekit~=0.13.1", "tenacity~=9.0.0" ]
livekit = [ "livekit~=0.17.5", "livekit-api~=0.7.1", "tenacity~=8.5.0" ]
lmnt = [ "lmnt~=1.1.4" ]
local = [ "pyaudio~=0.2.14" ]
moondream = [ "einops~=0.8.0", "timm~=1.0.8", "transformers~=4.44.0" ]

View File

@@ -11,7 +11,6 @@ from typing import Any, Awaitable, Callable, List
from pydantic import BaseModel
from pipecat.audio.utils import resample_audio
from pipecat.audio.vad.vad_analyzer import VADAnalyzer
from pipecat.frames.frames import (
AudioRawFrame,
CancelFrame,
@@ -50,11 +49,7 @@ class LiveKitTransportMessageUrgentFrame(TransportMessageUrgentFrame):
class LiveKitParams(TransportParams):
audio_out_sample_rate: int = 48000
audio_out_channels: int = 1
vad_enabled: bool = True
vad_analyzer: VADAnalyzer | None = None
audio_in_sample_rate: int = 16000
pass
class LiveKitCallbacks(BaseModel):
@@ -310,11 +305,6 @@ class LiveKitInputTransport(BaseInputTransport):
self._client = client
self._audio_in_task = None
self._vad_analyzer: VADAnalyzer | None = params.vad_analyzer
self._current_sample_rate: int = params.audio_in_sample_rate
if params.vad_enabled and not params.vad_analyzer:
self._vad_analyzer = VADAnalyzer(
sample_rate=self._current_sample_rate, num_channels=self._params.audio_in_channels
)
async def start(self, frame: StartFrame):
await super().start(frame)
@@ -384,24 +374,14 @@ class LiveKitInputTransport(BaseInputTransport):
audio_data = audio_frame.data
original_sample_rate = audio_frame.sample_rate
# Allow 8kHz and 16kHz, convert anything else to 16kHz
if original_sample_rate not in [8000, 16000]:
audio_data = resample_audio(audio_data, original_sample_rate, 16000)
sample_rate = 16000
else:
sample_rate = original_sample_rate
if sample_rate != self._current_sample_rate:
self._current_sample_rate = sample_rate
if self._params.vad_enabled:
self._vad_analyzer = VADAnalyzer(
sample_rate=self._current_sample_rate,
num_channels=self._params.audio_in_channels,
)
if original_sample_rate != self._params.audio_in_sample_rate:
audio_data = resample_audio(
audio_data, original_sample_rate, self._params.audio_in_sample_rate
)
return AudioRawFrame(
audio=audio_data,
sample_rate=sample_rate,
sample_rate=self._params.audio_in_sample_rate,
num_channels=audio_frame.num_channels,
)