diff --git a/pyproject.toml b/pyproject.toml index 4f1c7af13..1172d347a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -159,7 +159,7 @@ where = ["src"] "src/pipecat/audio/dtmf/dtmf-star.wav", ] "pipecat.services.aws_nova_sonic" = ["src/pipecat/services/aws_nova_sonic/ready.wav"] -"pipecat.audio.turn.smart_turn.data" = ["src/pipecat/audio/turn/smart_turn/data/smart-turn-v3.0.onnx"] +"pipecat.audio.turn.smart_turn.data" = ["src/pipecat/audio/turn/smart_turn/data/smart-turn-v3.1-cpu.onnx"] [tool.pytest.ini_options] addopts = "--verbose" diff --git a/src/pipecat/audio/turn/smart_turn/data/smart-turn-v3.0.onnx b/src/pipecat/audio/turn/smart_turn/data/smart-turn-v3.1-cpu.onnx similarity index 56% rename from src/pipecat/audio/turn/smart_turn/data/smart-turn-v3.0.onnx rename to src/pipecat/audio/turn/smart_turn/data/smart-turn-v3.1-cpu.onnx index e2ff710a0..9a2360f19 100644 Binary files a/src/pipecat/audio/turn/smart_turn/data/smart-turn-v3.0.onnx and b/src/pipecat/audio/turn/smart_turn/data/smart-turn-v3.1-cpu.onnx differ diff --git a/src/pipecat/audio/turn/smart_turn/local_smart_turn_v3.py b/src/pipecat/audio/turn/smart_turn/local_smart_turn_v3.py index 08b9f3cd1..406abe165 100644 --- a/src/pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +++ b/src/pipecat/audio/turn/smart_turn/local_smart_turn_v3.py @@ -42,17 +42,15 @@ class LocalSmartTurnAnalyzerV3(BaseSmartTurn): Args: smart_turn_model_path: Path to the ONNX model file. If this is not - set, the bundled smart-turn-v3.0 model will be used. + set, the bundled smart-turn-v3.1-cpu model will be used. cpu_count: The number of CPUs to use for inference. Defaults to 1. **kwargs: Additional arguments passed to BaseSmartTurn. """ super().__init__(**kwargs) - logger.debug("Loading Local Smart Turn v3 model...") - if not smart_turn_model_path: # Load bundled model - model_name = "smart-turn-v3.0.onnx" + model_name = "smart-turn-v3.1-cpu.onnx" package_path = "pipecat.audio.turn.smart_turn.data" try: @@ -70,6 +68,8 @@ class LocalSmartTurnAnalyzerV3(BaseSmartTurn): impresources.files(package_path).joinpath(model_name) ) + logger.debug(f"Loading Local Smart Turn v3.x model from {smart_turn_model_path}...") + so = ort.SessionOptions() so.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL so.inter_op_num_threads = 1 @@ -79,7 +79,7 @@ class LocalSmartTurnAnalyzerV3(BaseSmartTurn): self._feature_extractor = WhisperFeatureExtractor(chunk_length=8) self._session = ort.InferenceSession(smart_turn_model_path, sess_options=so) - logger.debug("Loaded Local Smart Turn v3") + logger.debug("Loaded Local Smart Turn v3.x") def _predict_endpoint(self, audio_array: np.ndarray) -> Dict[str, Any]: """Predict end-of-turn using local ONNX model."""