From b6341ffaa59f518fdb70978d473fc8829524eeab Mon Sep 17 00:00:00 2001 From: marcus-daily <111281783+marcus-daily@users.noreply.github.com> Date: Thu, 22 Jan 2026 13:24:30 +0000 Subject: [PATCH] Save Smart Turn input data if SMART_TURN_LOG_DATA is set --- .gitignore | 11 +++- .../turn/smart_turn/local_smart_turn_v3.py | 54 ++++++++++++++++++- src/pipecat/utils/env.py | 54 +++++++++++++++++++ 3 files changed, 116 insertions(+), 3 deletions(-) create mode 100644 src/pipecat/utils/env.py diff --git a/.gitignore b/.gitignore index 426f9be3a..512ec7316 100644 --- a/.gitignore +++ b/.gitignore @@ -4,7 +4,14 @@ __pycache__/ *~ venv .venv -/.idea +.idea +.gradle +.next +next-env.d.ts +local.properties +*.log +*.lock +smart_turn_audio_log #*# # Distribution / Packaging @@ -27,7 +34,7 @@ share/python-wheels/ *.egg MANIFEST .DS_Store -.env +.env* fly.toml # Examples diff --git a/src/pipecat/audio/turn/smart_turn/local_smart_turn_v3.py b/src/pipecat/audio/turn/smart_turn/local_smart_turn_v3.py index 0907ab28f..e62131e0c 100644 --- a/src/pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +++ b/src/pipecat/audio/turn/smart_turn/local_smart_turn_v3.py @@ -16,6 +16,7 @@ import numpy as np from loguru import logger from pipecat.audio.turn.smart_turn.base_smart_turn import BaseSmartTurn +from pipecat.utils.env import env_truthy try: import onnxruntime as ort @@ -48,6 +49,8 @@ class LocalSmartTurnAnalyzerV3(BaseSmartTurn): """ super().__init__(**kwargs) + self._log_data = env_truthy("SMART_TURN_LOG_DATA", default=False) + if not smart_turn_model_path: # Load bundled model model_name = "smart-turn-v3.2-cpu.onnx" @@ -81,6 +84,49 @@ class LocalSmartTurnAnalyzerV3(BaseSmartTurn): logger.debug("Loaded Local Smart Turn v3.x") + def _write_audio_to_wav( + self, audio_array: np.ndarray, sample_rate: int = 16000, suffix: str = "" + ) -> None: + """Write audio data to a WAV file in a background thread. + + Args: + audio_array: The audio data as a numpy array (float32, normalized to [-1, 1]). + sample_rate: The sample rate of the audio data. + suffix: Optional suffix to append to the filename (e.g., "_raw", "_padded"). + """ + import wave + from datetime import datetime + import os + import threading + + # Generate filename with current timestamp (millisecond precision) + timestamp = datetime.now().strftime("%Y-%m-%d__%H:%M:%S.%f")[:-3] + log_dir = "./smart_turn_audio_log" + os.makedirs(log_dir, exist_ok=True) + filename = os.path.join(log_dir, f"{timestamp}{suffix}.wav") + + # Make a copy of the audio data to avoid issues with the array being modified + audio_copy = audio_array.copy() + + def write_wav(): + try: + # Convert float32 audio to int16 for WAV file + audio_int16 = (audio_copy * 32767).astype(np.int16) + + with wave.open(filename, "wb") as wav_file: + wav_file.setnchannels(1) # Mono + wav_file.setsampwidth(2) # 2 bytes for int16 + wav_file.setframerate(sample_rate) + wav_file.writeframes(audio_int16.tobytes()) + + logger.debug(f"Wrote audio to {filename}") + except Exception as e: + logger.error(f"Failed to write audio to {filename}: {e}") + + # Start background thread to write the WAV file + thread = threading.Thread(target=write_wav, daemon=True) + thread.start() + def _predict_endpoint(self, audio_array: np.ndarray) -> Dict[str, Any]: """Predict end-of-turn using local ONNX model.""" @@ -95,6 +141,8 @@ class LocalSmartTurnAnalyzerV3(BaseSmartTurn): return np.pad(audio_array, (padding, 0), mode="constant", constant_values=0) return audio_array + audio_for_logging = audio_array + # Truncate to 8 seconds (keeping the end) or pad to 8 seconds audio_array = truncate_audio_to_last_n_seconds(audio_array, n_seconds=8) @@ -122,7 +170,11 @@ class LocalSmartTurnAnalyzerV3(BaseSmartTurn): # Make prediction (1 for Complete, 0 for Incomplete) prediction = 1 if probability > 0.5 else 0 + if self._log_data: + suffix = "_complete" if prediction == 1 else "_incomplete" + self._write_audio_to_wav(audio_for_logging, sample_rate=16000, suffix=suffix) + return { "prediction": prediction, "probability": probability, - } + } \ No newline at end of file diff --git a/src/pipecat/utils/env.py b/src/pipecat/utils/env.py new file mode 100644 index 000000000..b3467c1f4 --- /dev/null +++ b/src/pipecat/utils/env.py @@ -0,0 +1,54 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +"""Environment variable helpers. + +This module provides small, centralized parsing helpers for environment variables. +""" + +from __future__ import annotations + +import os + + +class InvalidEnvVarValueError(ValueError): + """Raised when an environment variable value cannot be parsed.""" + + def __init__(self, name: str, value: str, expected: str): + super().__init__(f"Invalid value for env var {name!r}: {value!r}. Expected {expected}.") + self.name = name + self.value = value + self.expected = expected + + +def env_truthy(name: str, default: bool = False) -> bool: + """Interpret an environment variable as a boolean. + + - If the variable is **not set**, returns `default`. + - If the variable is set to a recognized boolean string, returns the parsed value. + - Otherwise, raises `InvalidEnvVarValueError`. + + Recognized values (case-insensitive, whitespace ignored): + - Truthy: "1", "true", "yes", "y", "on" + - Falsy: "0", "false", "no", "n", "off", "" + """ + + raw = os.getenv(name) + if raw is None: + return default + + val = raw.strip().lower() + if val in {"1", "true", "yes", "y", "on"}: + return True + if val in {"0", "false", "no", "n", "off", ""}: + return False + + raise InvalidEnvVarValueError( + name=name, + value=raw, + expected='true or false', + ) +