transport(livekit): force specifying a vad analyzer
Don't default to SileroVADAnalyzer(). Also, resample to input sample rate.
This commit is contained in:
@@ -33,6 +33,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue that would cause an error if no VAD analyzer was passed to
|
||||
`LiveKitTransport` params.
|
||||
|
||||
- Fixed `SileroVAD` processor to support interruptions properly.
|
||||
|
||||
### Other
|
||||
|
||||
@@ -4,9 +4,6 @@ import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from livekit import api # pip install livekit-api
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.frames.frames import TextFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
@@ -15,6 +12,12 @@ from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.transports.services.livekit import LiveKitParams, LiveKitTransport
|
||||
|
||||
from livekit import api
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
|
||||
@@ -51,7 +51,7 @@ google = [ "google-generativeai~=0.7.2", "google-cloud-texttospeech~=2.17.2" ]
|
||||
gstreamer = [ "pygobject~=3.48.2" ]
|
||||
fireworks = [ "openai~=1.37.2" ]
|
||||
langchain = [ "langchain~=0.2.14", "langchain-community~=0.2.12", "langchain-openai~=0.1.20" ]
|
||||
livekit = [ "livekit~=0.13.1", "tenacity~=9.0.0" ]
|
||||
livekit = [ "livekit~=0.17.5", "livekit-api~=0.7.1", "tenacity~=8.5.0" ]
|
||||
lmnt = [ "lmnt~=1.1.4" ]
|
||||
local = [ "pyaudio~=0.2.14" ]
|
||||
moondream = [ "einops~=0.8.0", "timm~=1.0.8", "transformers~=4.44.0" ]
|
||||
|
||||
@@ -11,7 +11,6 @@ from typing import Any, Awaitable, Callable, List
|
||||
from pydantic import BaseModel
|
||||
|
||||
from pipecat.audio.utils import resample_audio
|
||||
from pipecat.audio.vad.vad_analyzer import VADAnalyzer
|
||||
from pipecat.frames.frames import (
|
||||
AudioRawFrame,
|
||||
CancelFrame,
|
||||
@@ -50,11 +49,7 @@ class LiveKitTransportMessageUrgentFrame(TransportMessageUrgentFrame):
|
||||
|
||||
|
||||
class LiveKitParams(TransportParams):
|
||||
audio_out_sample_rate: int = 48000
|
||||
audio_out_channels: int = 1
|
||||
vad_enabled: bool = True
|
||||
vad_analyzer: VADAnalyzer | None = None
|
||||
audio_in_sample_rate: int = 16000
|
||||
pass
|
||||
|
||||
|
||||
class LiveKitCallbacks(BaseModel):
|
||||
@@ -310,11 +305,6 @@ class LiveKitInputTransport(BaseInputTransport):
|
||||
self._client = client
|
||||
self._audio_in_task = None
|
||||
self._vad_analyzer: VADAnalyzer | None = params.vad_analyzer
|
||||
self._current_sample_rate: int = params.audio_in_sample_rate
|
||||
if params.vad_enabled and not params.vad_analyzer:
|
||||
self._vad_analyzer = VADAnalyzer(
|
||||
sample_rate=self._current_sample_rate, num_channels=self._params.audio_in_channels
|
||||
)
|
||||
|
||||
async def start(self, frame: StartFrame):
|
||||
await super().start(frame)
|
||||
@@ -384,24 +374,14 @@ class LiveKitInputTransport(BaseInputTransport):
|
||||
audio_data = audio_frame.data
|
||||
original_sample_rate = audio_frame.sample_rate
|
||||
|
||||
# Allow 8kHz and 16kHz, convert anything else to 16kHz
|
||||
if original_sample_rate not in [8000, 16000]:
|
||||
audio_data = resample_audio(audio_data, original_sample_rate, 16000)
|
||||
sample_rate = 16000
|
||||
else:
|
||||
sample_rate = original_sample_rate
|
||||
|
||||
if sample_rate != self._current_sample_rate:
|
||||
self._current_sample_rate = sample_rate
|
||||
if self._params.vad_enabled:
|
||||
self._vad_analyzer = VADAnalyzer(
|
||||
sample_rate=self._current_sample_rate,
|
||||
num_channels=self._params.audio_in_channels,
|
||||
)
|
||||
if original_sample_rate != self._params.audio_in_sample_rate:
|
||||
audio_data = resample_audio(
|
||||
audio_data, original_sample_rate, self._params.audio_in_sample_rate
|
||||
)
|
||||
|
||||
return AudioRawFrame(
|
||||
audio=audio_data,
|
||||
sample_rate=sample_rate,
|
||||
sample_rate=self._params.audio_in_sample_rate,
|
||||
num_channels=audio_frame.num_channels,
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user