Smart Turn analyzer now uses the full context of the turn rather than just the audio since VAD last triggered (fixes #3094)
This commit is contained in:
@@ -40,6 +40,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
- Updated Smart Turn model weights to v3.1.
|
||||
|
||||
- Smart Turn analyzer now uses the full context of the turn rather than just the
|
||||
audio since VAD last triggered.
|
||||
|
||||
### Deprecated
|
||||
|
||||
- Package `pipecat.sync` is deprecated, use `pipecat.utils.sync` instead.
|
||||
|
||||
@@ -28,7 +28,6 @@ from pipecat.metrics.metrics import MetricsData, SmartTurnMetricsData
|
||||
STOP_SECS = 3
|
||||
PRE_SPEECH_MS = 0
|
||||
MAX_DURATION_SECONDS = 8 # Max allowed segment duration
|
||||
USE_ONLY_LAST_VAD_SEGMENT = True
|
||||
|
||||
|
||||
class SmartTurnParams(BaseTurnParams):
|
||||
@@ -43,8 +42,6 @@ class SmartTurnParams(BaseTurnParams):
|
||||
stop_secs: float = STOP_SECS
|
||||
pre_speech_ms: float = PRE_SPEECH_MS
|
||||
max_duration_secs: float = MAX_DURATION_SECONDS
|
||||
# not exposing this for now yet until the model can handle it.
|
||||
# use_only_last_vad_segment: bool = USE_ONLY_LAST_VAD_SEGMENT
|
||||
|
||||
|
||||
class SmartTurnTimeoutException(Exception):
|
||||
@@ -160,7 +157,7 @@ class BaseSmartTurn(BaseTurnAnalyzer):
|
||||
state, result = await loop.run_in_executor(
|
||||
self._executor, self._process_speech_segment, self._audio_buffer
|
||||
)
|
||||
if state == EndOfTurnState.COMPLETE or USE_ONLY_LAST_VAD_SEGMENT:
|
||||
if state == EndOfTurnState.COMPLETE:
|
||||
self._clear(state)
|
||||
logger.debug(f"End of Turn result: {state}")
|
||||
return state, result
|
||||
|
||||
Reference in New Issue
Block a user