drop v1 support from aic.

2026-01-16 12:31:41 +01:00
parent d3bdd2d246
commit a90c15362c
4 changed files with 12 additions and 489 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -48,7 +48,7 @@ Issues = "https://github.com/pipecat-ai/pipecat/issues"
 Changelog = "https://github.com/pipecat-ai/pipecat/blob/main/CHANGELOG.md"

 [project.optional-dependencies]
-aic = [ "aic-sdk>=1.2.0" ]
+aic = [ "aic-sdk>=2.0.0" ]
 anthropic = [ "anthropic~=0.49.0" ]
 assemblyai = [ "pipecat-ai[websockets-base]" ]
 asyncai = [ "pipecat-ai[websockets-base]" ]
--- a/src/pipecat/audio/filters/aic_filter.py
+++ b/src/pipecat/audio/filters/aic_filter.py
@@ -11,8 +11,7 @@ enhance audio streams in real time. It mirrors the structure of other filters li
 the Koala filter and integrates with Pipecat's input transport pipeline.

 Classes:
-    AICFilter: For aic-sdk < 2.0.0 (uses 'aic' module)
-    AICFilterV2: For aic-sdk >= 2.0.0 (uses 'aic_sdk' module)
+    AICFilter: For aic-sdk >= 2.0.0 (uses 'aic_sdk' module)
 """

 import os
@@ -31,258 +30,8 @@ class AICFilter(BaseAudioFilter):
    Buffers incoming audio to the model's preferred block size and processes
    planar frames in-place using float32 samples in the linear -1..+1 range.

-    .. note::
-        This class requires aic-sdk < 2.0.0 (uses 'aic' module).
-        For aic-sdk >= 2.0.0, use :class:`AICFilterV2` instead.
-    """
-
-    def __init__(
-        self,
-        *,
-        license_key: str = "",
-        model_type: Optional["AICModelType"] = None,
-        enhancement_level: Optional[float] = 1.0,
-        voice_gain: Optional[float] = 1.0,
-        noise_gate_enable: Optional[bool] = True,
-    ) -> None:
-        """Initialize the AIC filter.
-
-        Args:
-            license_key: ai-coustics license key for authentication.
-            model_type: Model variant to load. If None, defaults to AICModelType.QUAIL_STT.
-            enhancement_level: Optional overall enhancement strength (0.0..1.0).
-            voice_gain: Optional linear gain applied to detected speech (0.0..4.0).
-            noise_gate_enable: Optional enable/disable noise gate (default: True).
-
-                .. deprecated:: 1.3.0
-                    The `noise_gate_enable` parameter is deprecated and no longer has any effect.
-                    It will be removed in a future version.
-        """
-        from pipecat.audio.utils import check_aic_sdk_version
-
-        check_aic_sdk_version("v1")
-
-        # Import AIC SDK v1 types
-        from aic import AICModelType
-
-        self._license_key = license_key
-        self._model_type = model_type if model_type is not None else AICModelType.QUAIL_STT
-
-        self._enhancement_level = enhancement_level
-        self._voice_gain = voice_gain
-        if noise_gate_enable is not None:
-            import warnings
-
-            with warnings.catch_warnings():
-                warnings.simplefilter("always")
-                warnings.warn(
-                    "Parameter `noise_gate_enable` is deprecated and no longer has any effect. "
-                    "It will be removed in a future version. Use AIC VAD instead (create_vad_analyzer()).",
-                    DeprecationWarning,
-                )
-
-        self._noise_gate_enable = noise_gate_enable
-
-        self._enabled = True
-        self._sample_rate = 0
-        self._aic_ready = False
-        self._frames_per_block = 0
-        self._audio_buffer = bytearray()
-        # Model will be created in start() since the API now requires sample_rate
-        self._aic = None
-
-    def get_vad_factory(self):
-        """Return a zero-arg factory that will create the VAD once the model exists.
-
-        Returns:
-            A zero-argument callable that, when invoked, returns an initialized
-            VoiceActivityDetector bound to the underlying AIC model. Raises a
-            RuntimeError if the model has not been initialized (i.e. start()
-            has not been called successfully).
-        """
-
-        def _factory():
-            if self._aic is None:
-                raise RuntimeError("AIC model not initialized yet. Call start(sample_rate) first.")
-            return self._aic.create_vad()
-
-        return _factory
-
-    def create_vad_analyzer(
-        self,
-        *,
-        lookback_buffer_size: Optional[float] = None,
-        sensitivity: Optional[float] = None,
-    ):
-        """Return an analyzer that will lazily instantiate the AIC VAD when ready.
-
-        AIC VAD parameters:
-          - lookback_buffer_size:
-              Number of window-length audio buffers used as a lookback buffer.
-              Higher values increase prediction stability but add latency.
-              Range: 1.0 .. 20.0, Default (SDK): 6.0
-          - sensitivity:
-              Energy threshold sensitivity. Energy threshold = 10 ** (-sensitivity).
-              Range: 1.0 .. 15.0, Default (SDK): 6.0
-
-        Args:
-            lookback_buffer_size: Optional lookback buffer size to configure on the VAD.
-                Range: 1.0 .. 20.0. If None, SDK default is used.
-            sensitivity: Optional sensitivity (energy threshold) to configure on the VAD.
-                Range: 1.0 .. 15.0. If None, SDK default is used.
-
-        Returns:
-            A lazily-initialized AICVADAnalyzer that will bind to the VAD backend
-            once the filter's model has been created (after start(sample_rate)).
-        """
-        from pipecat.audio.vad.aic_vad import AICVADAnalyzer
-
-        return AICVADAnalyzer(
-            vad_factory=self.get_vad_factory(),
-            lookback_buffer_size=lookback_buffer_size,
-            sensitivity=sensitivity,
-        )
-
-    async def start(self, sample_rate: int):
-        """Initialize the filter with the transport's sample rate.
-
-        Args:
-            sample_rate: The sample rate of the input transport in Hz.
-
-        Returns:
-            None
-        """
-        from aic import AICParameter, Model
-
-        self._sample_rate = sample_rate
-
-        try:
-            # Create model with required runtime parameters
-            self._aic = Model(
-                model_type=self._model_type,
-                license_key=self._license_key or None,
-                sample_rate=self._sample_rate,
-                channels=1,
-            )
-            self._frames_per_block = self._aic.optimal_num_frames()
-
-            # Optional parameter configuration
-            if self._enhancement_level is not None:
-                self._aic.set_parameter(
-                    AICParameter.ENHANCEMENT_LEVEL,
-                    float(self._enhancement_level if self._enabled else 0.0),
-                )
-            if self._voice_gain is not None:
-                self._aic.set_parameter(AICParameter.VOICE_GAIN, float(self._voice_gain))
-
-            self._aic_ready = True
-
-            # Log processor information
-            logger.debug(f"ai-coustics filter started:")
-            logger.debug(f"  Sample rate: {self._sample_rate} Hz")
-            logger.debug(f"  Frames per chunk: {self._frames_per_block}")
-            logger.debug(f"  Enhancement strength: {int(self._enhancement_level * 100)}%")
-            logger.debug(f"  Optimal input buffer size: {self._aic.optimal_num_frames()} samples")
-            logger.debug(f"  Optimal sample rate: {self._aic.optimal_sample_rate()} Hz")
-            logger.debug(
-                f"  Current algorithmic latency: {self._aic.processing_latency() / self._sample_rate * 1000:.2f}ms"
-            )
-        except Exception as e:  # noqa: BLE001 - surfacing SDK initialization errors
-            logger.error(f"AIC model initialization failed: {e}")
-            self._aic_ready = False
-
-    async def stop(self):
-        """Clean up the AIC model when stopping.
-
-        Returns:
-            None
-        """
-        try:
-            if self._aic is not None:
-                self._aic.close()
-        finally:
-            self._aic = None
-            self._aic_ready = False
-            self._audio_buffer.clear()
-
-    async def process_frame(self, frame: FilterControlFrame):
-        """Process control frames to enable/disable filtering.
-
-        Args:
-            frame: The control frame containing filter commands.
-
-        Returns:
-            None
-        """
-        if isinstance(frame, FilterEnableFrame):
-            from aic import AICParameter
-
-            self._enabled = frame.enable
-            if self._aic is not None:
-                try:
-                    level = float(self._enhancement_level if self._enabled else 0.0)
-                    self._aic.set_parameter(AICParameter.ENHANCEMENT_LEVEL, level)
-                except Exception as e:  # noqa: BLE001
-                    logger.error(f"AIC set_parameter failed: {e}")
-
-    async def filter(self, audio: bytes) -> bytes:
-        """Apply AIC enhancement to audio data.
-
-        Buffers incoming audio and processes it in chunks that match the AIC
-        model's required block length. Returns enhanced audio data.
-
-        Args:
-            audio: Raw audio data as bytes to be filtered (int16 PCM, planar).
-
-        Returns:
-            Enhanced audio data as bytes (int16 PCM, planar).
-        """
-        if not self._aic_ready or self._aic is None:
-            return audio
-
-        self._audio_buffer.extend(audio)
-
-        filtered_chunks: List[bytes] = []
-
-        # Number of int16 samples currently buffered
-        available_frames = len(self._audio_buffer) // 2
-
-        while available_frames >= self._frames_per_block:
-            # Consume exactly one block worth of frames
-            samples_to_consume = self._frames_per_block * 1
-            bytes_to_consume = samples_to_consume * 2
-            block_bytes = bytes(self._audio_buffer[:bytes_to_consume])
-
-            # Convert to float32 in -1..+1 range and reshape to planar (channels, frames)
-            block_i16 = np.frombuffer(block_bytes, dtype=np.int16)
-            block_f32 = (block_i16.astype(np.float32) / 32768.0).reshape(
-                (1, self._frames_per_block)
-            )
-
-            # Process planar in-place; returns ndarray (same shape)
-            out_f32 = await self._aic.process_async(block_f32)
-
-            # Convert back to int16 bytes, planar layout
-            out_i16 = np.clip(out_f32 * 32768.0, -32768, 32767).astype(np.int16)
-            filtered_chunks.append(out_i16.reshape(-1).tobytes())
-
-            # Slide buffer
-            self._audio_buffer = self._audio_buffer[bytes_to_consume:]
-            available_frames = len(self._audio_buffer) // 2
-
-        # Do not flush incomplete frames; keep them buffered for the next call
-        return b"".join(filtered_chunks)
-
-
-class AICFilterV2(BaseAudioFilter):
-    """Audio filter using ai-coustics' AIC SDK v2 for real-time enhancement.
-
-    Buffers incoming audio to the model's preferred block size and processes
-    planar frames in-place using float32 samples in the linear -1..+1 range.
-
    .. note::
        This class requires aic-sdk >= 2.0.0 (uses 'aic_sdk' module).
-        For aic-sdk < 2.0.0, use :class:`AICFilter` instead.
    """

    def __init__(
@@ -311,10 +60,6 @@ class AICFilterV2(BaseAudioFilter):
        Raises:
            ValueError: If neither model_id nor model_path is provided.
        """
-        from pipecat.audio.utils import check_aic_sdk_version
-
-        check_aic_sdk_version("v2")
-
        if model_id is None and model_path is None:
            raise ValueError(
                "Either 'model_id' or 'model_path' must be provided. "
@@ -337,7 +82,7 @@ class AICFilterV2(BaseAudioFilter):
        self._frames_per_block = 0
        self._audio_buffer = bytearray()

-        # v2 API objects
+        # AIC SDK objects
        self._model = None
        self._processor = None
        self._processor_ctx = None
@@ -362,7 +107,7 @@ class AICFilterV2(BaseAudioFilter):
    ):
        """Return an analyzer that will lazily instantiate the AIC VAD when ready.

-        AIC VAD parameters (v2):
+        AIC VAD parameters:
          - speech_hold_duration:
              How long VAD continues detecting after speech ends (in seconds).
              Range: 0.0 .. 20x model window length, Default (SDK): 0.05s
@@ -377,12 +122,12 @@ class AICFilterV2(BaseAudioFilter):
                Range: 1.0 .. 15.0. If None, SDK default (6.0) is used.

        Returns:
-            A lazily-initialized AICVADAnalyzerV2 that will bind to the VAD context
+            A lazily-initialized AICVADAnalyzer that will bind to the VAD context
            once the filter's processor has been created (after start(sample_rate)).
        """
-        from pipecat.audio.vad.aic_vad import AICVADAnalyzerV2
+        from pipecat.audio.vad.aic_vad import AICVADAnalyzer

-        return AICVADAnalyzerV2(
+        return AICVADAnalyzer(
            vad_context_factory=lambda: self.get_vad_context(),
            speech_hold_duration=speech_hold_duration,
            sensitivity=sensitivity,
@@ -446,7 +191,7 @@ class AICFilterV2(BaseAudioFilter):
            self._aic_ready = True

            # Log processor information
-            logger.debug(f"ai-coustics filter (v2) started:")
+            logger.debug(f"ai-coustics filter started:")
            logger.debug(f"  Model ID: {self._model.get_id()}")
            logger.debug(f"  Sample rate: {self._sample_rate} Hz")
            logger.debug(f"  Frames per chunk: {self._frames_per_block}")
--- a/src/pipecat/audio/utils.py
+++ b/src/pipecat/audio/utils.py
@@ -14,10 +14,9 @@ various audio formats used in Pipecat pipelines.
 import audioop
 from typing import Literal

-from loguru import logger
-
 import numpy as np
 import pyloudnorm as pyln
+from loguru import logger

 from pipecat.audio.resamplers.base_audio_resampler import BaseAudioResampler
 from pipecat.audio.resamplers.soxr_resampler import SOXRAudioResampler
@@ -314,69 +313,3 @@ def is_silence(pcm_bytes: bytes) -> bool:

    # If max value is lower than SPEAKING_THRESHOLD, consider it as silence
    return max_value <= SPEAKING_THRESHOLD
-
-
-def is_aic_sdk_v2() -> bool:
-    """Detect if aic-sdk v2 is installed by checking the module name.
-
-    In v2, the module was renamed from 'aic' to 'aic_sdk'.
-
-    Returns:
-        True if aic-sdk v2 (aic_sdk module) is installed, False if v1 (aic module).
-
-    Raises:
-        ImportError: If neither aic nor aic_sdk module is installed.
-    """
-    try:
-        import aic_sdk  # noqa: F401
-
-        return True
-    except ModuleNotFoundError:
-        pass
-
-    try:
-        import aic  # noqa: F401
-
-        return False
-    except ModuleNotFoundError:
-        logger.error("In order to use the AIC filter, you need to `pip install pipecat-ai[aic]`.")
-        raise ImportError(
-            "aic-sdk is not installed. Install with 'pip install pipecat-ai[aic]'."
-        )
-
-
-def check_aic_sdk_version(required_version: Literal["v1", "v2"]) -> None:
-    """Check if the aic-sdk is installed and compatible with the module.
-
-    This function checks both that the aic-sdk is installed and that its version
-    is compatible with the module requirements. Version detection is based on
-    the module name: v2 uses 'aic_sdk', v1 uses 'aic'.
-
-    Args:
-        required_version: Either "v1" (for aic-sdk < 2.0.0) or "v2" (for aic-sdk >= 2.0.0).
-
-    Raises:
-        ImportError: If aic-sdk is not installed or version is incompatible.
-    """
-    is_v2 = is_aic_sdk_v2()
-
-    if required_version == "v1" and is_v2:
-        error_msg = (
-            "aic-sdk v2 (aic_sdk module) detected, but v1 (aic module) is required. "
-            "Please use the v2 classes instead: "
-            "'from pipecat.audio.filters.aic_filter import AICFilterV2' or "
-            "'from pipecat.audio.vad.aic_vad import AICVADAnalyzerV2'."
-        )
-        logger.error(error_msg)
-        raise ImportError(error_msg)
-
-    if required_version == "v2" and not is_v2:
-        error_msg = (
-            "aic-sdk v1 (aic module) detected, but v2 (aic_sdk module) is required. "
-            "Please update with 'pip install --upgrade aic-sdk>=2.0.0' "
-            "or use the v1 classes: "
-            "'from pipecat.audio.filters.aic_filter import AICFilter' or "
-            "'from pipecat.audio.vad.aic_vad import AICVADAnalyzer'."
-        )
-        logger.error(error_msg)
-        raise ImportError(error_msg)
--- a/src/pipecat/audio/vad/aic_vad.py
+++ b/src/pipecat/audio/vad/aic_vad.py
@@ -5,8 +5,7 @@ is_speech_detected() and map it to a float confidence (1.0/0.0). They use
 10 ms windows based on the sample rate and apply optional AIC VAD parameters.

 Classes:
-    AICVADAnalyzer: For aic-sdk < 2.0.0 (uses 'aic' module)
-    AICVADAnalyzerV2: For aic-sdk >= 2.0.0 (uses 'aic_sdk' module)
+    AICVADAnalyzer: For aic-sdk >= 2.0.0 (uses 'aic_sdk' module)
 """

 from typing import Any, Callable, Optional
@@ -17,155 +16,6 @@ from pipecat.audio.vad.vad_analyzer import VADAnalyzer, VADParams


 class AICVADAnalyzer(VADAnalyzer):
-    """VAD analyzer that lazily instantiates the AIC VoiceActivityDetector via a factory.
-
-    The analyzer can be constructed before the AIC Model exists. Once the filter has
-    started and the Model is available, the provided factory will succeed and the
-    backend VAD will be created. We then switch to single-sample updates where
-    num_frames_required() returns 1 and confidence is derived from the backend's
-    boolean is_speech_detected() state.
-
-    AIC VAD runtime parameters:
-      - lookback_buffer_size:
-          Controls the lookback buffer size used by the VAD, i.e. the number of
-          window-length audio buffers used as a lookback buffer. Larger values improve
-          stability but increase latency.
-          Range: 1.0 .. 20.0
-          Default (SDK): 6.0
-      - sensitivity:
-          Controls the energy threshold sensitivity. Higher values make the detector
-          less sensitive (require more energy to count as speech).
-          Range: 1.0 .. 15.0
-          Formula: Energy threshold = 10 ** (-sensitivity)
-          Default (SDK): 6.0
-
-    .. note::
-        This class requires aic-sdk < 2.0.0 (uses 'aic' module).
-        For aic-sdk >= 2.0.0, use :class:`AICVADAnalyzerV2` instead.
-    """
-
-    def __init__(
-        self,
-        *,
-        vad_factory: Optional[Callable[[], Any]] = None,
-        lookback_buffer_size: Optional[float] = None,
-        sensitivity: Optional[float] = None,
-    ):
-        """Create an AIC VAD analyzer.
-
-        Args:
-            vad_factory:
-                Zero-arg callable that returns an initialized AIC VoiceActivityDetector.
-                This may raise until the filter's Model has been created; the analyzer
-                will retry on set_sample_rate/first use.
-            lookback_buffer_size:
-                Optional override for AIC VAD lookback buffer size.
-                Range: 1.0 .. 20.0. Larger values increase stability at the cost of latency.
-                If None, the SDK default (6.0) is used.
-            sensitivity:
-                Optional override for AIC VAD sensitivity (energy threshold).
-                Range: 1.0 .. 15.0. Energy threshold = 10 ** (-sensitivity).
-                If None, the SDK default (6.0) is used.
-        """
-        from pipecat.audio.utils import check_aic_sdk_version
-
-        check_aic_sdk_version("v1")
-
-        # Use fixed VAD parameters for AIC: no user override
-        fixed_params = VADParams(confidence=0.5, start_secs=0.0, stop_secs=0.0, min_volume=0.0)
-        super().__init__(sample_rate=None, params=fixed_params)
-        self._vad_factory = vad_factory
-        self._backend_vad: Optional[Any] = None
-        self._pending_lookback: Optional[float] = lookback_buffer_size
-        self._pending_sensitivity: Optional[float] = sensitivity
-
-    def bind_vad_factory(self, vad_factory: Callable[[], Any]):
-        """Attach or replace the factory post-construction."""
-        self._vad_factory = vad_factory
-        self._ensure_backend_initialized()
-
-    def _apply_backend_params(self):
-        """Apply optional AIC VAD parameters if available."""
-        from aic import AICVadParameter
-
-        if self._backend_vad is None or AICVadParameter is None:
-            return
-        try:
-            if self._pending_lookback is not None:
-                self._backend_vad.set_parameter(
-                    AICVadParameter.LOOKBACK_BUFFER_SIZE, float(self._pending_lookback)
-                )
-            if self._pending_sensitivity is not None:
-                self._backend_vad.set_parameter(
-                    AICVadParameter.SENSITIVITY, float(self._pending_sensitivity)
-                )
-        except Exception as e:  # noqa: BLE001
-            logger.debug(f"AIC VAD parameter application deferred/failed: {e}")
-
-    def _ensure_backend_initialized(self):
-        if self._backend_vad is not None:
-            return
-        if not self._vad_factory:
-            return
-        try:
-            self._backend_vad = self._vad_factory()
-            self._apply_backend_params()
-            # With backend ready, recompute internal frame sizing
-            super().set_params(self._params)
-            logger.debug("AIC VAD backend initialized in analyzer.")
-        except Exception as e:  # noqa: BLE001
-            # Filter may not be started yet; try again later
-            logger.debug(f"Deferring AIC VAD backend initialization: {e}")
-
-    def set_sample_rate(self, sample_rate: int):
-        """Set the sample rate for audio processing.
-
-        Args:
-            sample_rate: Audio sample rate in Hz.
-        """
-        # Set rate and attempt backend initialization once we know SR
-        self._sample_rate = self._init_sample_rate or sample_rate
-        self._ensure_backend_initialized()
-        # Ensure params are initialized even if backend not ready yet
-        try:
-            super().set_params(self._params)
-        except Exception:
-            pass
-
-    def num_frames_required(self) -> int:
-        """Get the number of audio frames required for analysis.
-
-        Returns:
-            Number of frames needed for VAD processing.
-        """
-        # Use 10 ms windows based on sample rate
-        return int(self.sample_rate * 0.01) if self.sample_rate > 0 else 160
-
-    def voice_confidence(self, buffer: bytes) -> float:
-        """Calculate voice activity confidence for the given audio buffer.
-
-        Args:
-            buffer: Audio buffer to analyze.
-
-        Returns:
-            Voice confidence score is 0.0 or 1.0.
-        """
-        # Ensure backend exists (filter might have started since last call)
-        self._ensure_backend_initialized()
-        if self._backend_vad is None:
-            return 0.0
-
-        # We do not need to analyze 'buffer' here since the model's VAD is updated
-        # as part of the enhancement pipeline. Simply query the boolean and map it.
-        try:
-            is_speech = self._backend_vad.is_speech_detected()
-            return 1.0 if is_speech else 0.0
-        except Exception as e:  # noqa: BLE001
-            logger.error(f"AIC VAD inference error: {e}")
-            return 0.0
-
-
-class AICVADAnalyzerV2(VADAnalyzer):
    """VAD analyzer that lazily binds to the AIC VadContext via a factory.

    The analyzer can be constructed before the AIC Processor exists. Once the filter has
@@ -173,7 +23,7 @@ class AICVADAnalyzerV2(VADAnalyzer):
    VadContext will be obtained. We then use the context's is_speech_detected() state
    to derive confidence values.

-    AIC VAD runtime parameters (v2):
+    AIC VAD runtime parameters:
      - speech_hold_duration:
          Controls for how long the VAD continues to detect speech after the audio signal
          no longer contains speech (in seconds).
@@ -188,7 +38,6 @@ class AICVADAnalyzerV2(VADAnalyzer):

    .. note::
        This class requires aic-sdk >= 2.0.0 (uses 'aic_sdk' module).
-        For aic-sdk < 2.0.0, use :class:`AICVADAnalyzer` instead.
    """

    def __init__(
@@ -214,10 +63,6 @@ class AICVADAnalyzerV2(VADAnalyzer):
                Range: 1.0 .. 15.0. Energy threshold = 10 ** (-sensitivity).
                If None, the SDK default (6.0) is used.
        """
-        from pipecat.audio.utils import check_aic_sdk_version
-
-        check_aic_sdk_version("v2")
-
        # Use fixed VAD parameters for AIC: no user override
        fixed_params = VADParams(confidence=0.5, start_secs=0.0, stop_secs=0.0, min_volume=0.0)
        super().__init__(sample_rate=None, params=fixed_params)
@@ -259,7 +104,7 @@ class AICVADAnalyzerV2(VADAnalyzer):
            self._apply_vad_params()
            # With VAD context ready, recompute internal frame sizing
            super().set_params(self._params)
-            logger.debug("AIC VAD context (v2) initialized in analyzer.")
+            logger.debug("AIC VAD context initialized in analyzer.")
        except Exception as e:  # noqa: BLE001
            # Filter may not be started yet; try again later
            logger.debug(f"Deferring AIC VAD context initialization: {e}")