Compare commits

...

2 Commits

Author SHA1 Message Date
Mark Backman
8780cb1fa2 Remove uv.lock 2025-09-16 12:13:39 -04:00
marcus-daily
dfe7815dc5 Smart Turn v3: removing torch and torchaudio deps 2025-09-16 16:02:41 +01:00
5 changed files with 11 additions and 7848 deletions

3
.gitignore vendored
View File

@@ -51,4 +51,5 @@ docs/api/_build/
docs/api/api
# uv
.python-version
.python-version
uv.lock

View File

@@ -18,6 +18,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
async def on_pipeline_finished(task: PipelineTask, frame: Frame):
...
```
### Changed
- `torch` and `torchaudio` are no longer required for running Smart Turn
locally. This avoids gigabytes of dependencies being installed.
### Deprecated

View File

@@ -95,7 +95,7 @@ sambanova = []
sarvam = [ "websockets>=13.1,<15.0" ]
sentry = [ "sentry-sdk~=2.23.1" ]
local-smart-turn = [ "coremltools>=8.0", "transformers", "torch>=2.5.0,<3", "torchaudio>=2.5.0,<3" ]
local-smart-turn-v3 = [ "transformers", "torch>=2.5.0,<3", "torchaudio>=2.5.0,<3", "onnxruntime>=1.20.1, <2" ]
local-smart-turn-v3 = [ "transformers", "onnxruntime>=1.20.1, <2" ]
remote-smart-turn = []
silero = [ "onnxruntime>=1.20.1, <2" ]
simli = [ "simli-ai~=0.1.10"]

View File

@@ -98,15 +98,15 @@ class LocalSmartTurnAnalyzerV3(BaseSmartTurn):
inputs = self._feature_extractor(
audio_array,
sampling_rate=16000,
return_tensors="pt",
return_tensors="np",
padding="max_length",
max_length=8 * 16000,
truncation=True,
do_normalize=True,
)
# Convert to numpy and ensure correct shape for ONNX
input_features = inputs.input_features.squeeze(0).numpy().astype(np.float32)
# Extract features and ensure correct shape for ONNX
input_features = inputs.input_features.squeeze(0).astype(np.float32)
input_features = np.expand_dims(input_features, axis=0) # Add batch dimension
# Run ONNX inference

7843
uv.lock generated

File diff suppressed because it is too large Load Diff