Smart Turn v3: removing torch and torchaudio deps
This commit is contained in:
@@ -18,6 +18,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
async def on_pipeline_finished(task: PipelineTask, frame: Frame):
|
||||
...
|
||||
```
|
||||
|
||||
### Changed
|
||||
|
||||
- `torch` and `torchaudio` are no longer required for running Smart Turn
|
||||
locally. This avoids gigabytes of dependencies being installed.
|
||||
|
||||
### Deprecated
|
||||
|
||||
|
||||
@@ -95,7 +95,7 @@ sambanova = []
|
||||
sarvam = [ "websockets>=13.1,<15.0" ]
|
||||
sentry = [ "sentry-sdk~=2.23.1" ]
|
||||
local-smart-turn = [ "coremltools>=8.0", "transformers", "torch>=2.5.0,<3", "torchaudio>=2.5.0,<3" ]
|
||||
local-smart-turn-v3 = [ "transformers", "torch>=2.5.0,<3", "torchaudio>=2.5.0,<3", "onnxruntime>=1.20.1, <2" ]
|
||||
local-smart-turn-v3 = [ "transformers", "onnxruntime>=1.20.1, <2" ]
|
||||
remote-smart-turn = []
|
||||
silero = [ "onnxruntime>=1.20.1, <2" ]
|
||||
simli = [ "simli-ai~=0.1.10"]
|
||||
|
||||
@@ -98,15 +98,15 @@ class LocalSmartTurnAnalyzerV3(BaseSmartTurn):
|
||||
inputs = self._feature_extractor(
|
||||
audio_array,
|
||||
sampling_rate=16000,
|
||||
return_tensors="pt",
|
||||
return_tensors="np",
|
||||
padding="max_length",
|
||||
max_length=8 * 16000,
|
||||
truncation=True,
|
||||
do_normalize=True,
|
||||
)
|
||||
|
||||
# Convert to numpy and ensure correct shape for ONNX
|
||||
input_features = inputs.input_features.squeeze(0).numpy().astype(np.float32)
|
||||
# Extract features and ensure correct shape for ONNX
|
||||
input_features = inputs.input_features.squeeze(0).astype(np.float32)
|
||||
input_features = np.expand_dims(input_features, axis=0) # Add batch dimension
|
||||
|
||||
# Run ONNX inference
|
||||
|
||||
4
uv.lock
generated
4
uv.lock
generated
@@ -4309,8 +4309,6 @@ local-smart-turn = [
|
||||
]
|
||||
local-smart-turn-v3 = [
|
||||
{ name = "onnxruntime" },
|
||||
{ name = "torch" },
|
||||
{ name = "torchaudio" },
|
||||
{ name = "transformers" },
|
||||
]
|
||||
mcp = [
|
||||
@@ -4495,9 +4493,7 @@ requires-dist = [
|
||||
{ name = "tenacity", marker = "extra == 'livekit'", specifier = ">=8.2.3,<10.0.0" },
|
||||
{ name = "timm", marker = "extra == 'moondream'", specifier = "~=1.0.13" },
|
||||
{ name = "torch", marker = "extra == 'local-smart-turn'", specifier = ">=2.5.0,<3" },
|
||||
{ name = "torch", marker = "extra == 'local-smart-turn-v3'", specifier = ">=2.5.0,<3" },
|
||||
{ name = "torchaudio", marker = "extra == 'local-smart-turn'", specifier = ">=2.5.0,<3" },
|
||||
{ name = "torchaudio", marker = "extra == 'local-smart-turn-v3'", specifier = ">=2.5.0,<3" },
|
||||
{ name = "transformers", marker = "extra == 'local-smart-turn'" },
|
||||
{ name = "transformers", marker = "extra == 'local-smart-turn-v3'" },
|
||||
{ name = "transformers", marker = "extra == 'moondream'", specifier = ">=4.48.0" },
|
||||
|
||||
Reference in New Issue
Block a user