Add ASR interim results support in Assistant model and API
- Introduced `asr_interim_enabled` field in the Assistant model to control interim ASR results. - Updated AssistantBase and AssistantUpdate schemas to include the new field. - Modified the database schema to add the `asr_interim_enabled` column. - Enhanced runtime metadata to reflect interim ASR settings. - Updated API endpoints and tests to validate the new functionality. - Adjusted documentation to include details about interim ASR results configuration.
This commit is contained in:
@@ -599,6 +599,7 @@ class DuplexPipeline:
|
||||
"provider": asr_provider,
|
||||
"mode": self._resolve_asr_mode(asr_provider, self._runtime_asr.get("mode")),
|
||||
"model": str(self._runtime_asr.get("model") or settings.asr_model or ""),
|
||||
"enableInterim": self._asr_interim_enabled(),
|
||||
"interimIntervalMs": int(self._runtime_asr.get("interimIntervalMs") or settings.asr_interim_interval_ms),
|
||||
"minAudioMs": int(self._runtime_asr.get("minAudioMs") or settings.asr_min_audio_ms),
|
||||
},
|
||||
@@ -865,6 +866,20 @@ class DuplexPipeline:
|
||||
return self._runtime_barge_in_min_duration_ms
|
||||
return self._barge_in_min_duration_ms
|
||||
|
||||
def _asr_interim_enabled(self) -> bool:
|
||||
current_mode = self._asr_mode
|
||||
if not self.asr_service:
|
||||
current_mode = self._resolve_asr_mode(
|
||||
self._runtime_asr.get("provider") or settings.asr_provider,
|
||||
self._runtime_asr.get("mode"),
|
||||
)
|
||||
if current_mode != "offline":
|
||||
return True
|
||||
enabled = self._coerce_bool(self._runtime_asr.get("enableInterim"))
|
||||
if enabled is not None:
|
||||
return enabled
|
||||
return bool(settings.asr_enable_interim)
|
||||
|
||||
def _barge_in_silence_tolerance_frames(self) -> int:
|
||||
"""Convert silence tolerance from ms to frame count using current chunk size."""
|
||||
chunk_ms = max(1, settings.chunk_size_ms)
|
||||
@@ -991,6 +1006,9 @@ class DuplexPipeline:
|
||||
asr_api_key = self._runtime_asr.get("apiKey")
|
||||
asr_api_url = self._runtime_asr.get("baseUrl") or settings.asr_api_url
|
||||
asr_model = self._runtime_asr.get("model") or settings.asr_model
|
||||
asr_enable_interim = self._coerce_bool(self._runtime_asr.get("enableInterim"))
|
||||
if asr_enable_interim is None:
|
||||
asr_enable_interim = bool(settings.asr_enable_interim)
|
||||
asr_interim_interval = int(self._runtime_asr.get("interimIntervalMs") or settings.asr_interim_interval_ms)
|
||||
asr_min_audio_ms = int(self._runtime_asr.get("minAudioMs") or settings.asr_min_audio_ms)
|
||||
asr_mode = self._resolve_asr_mode(asr_provider, self._runtime_asr.get("mode"))
|
||||
@@ -1004,6 +1022,7 @@ class DuplexPipeline:
|
||||
api_key=str(asr_api_key).strip() if asr_api_key else None,
|
||||
api_url=str(asr_api_url).strip() if asr_api_url else None,
|
||||
model=str(asr_model).strip() if asr_model else None,
|
||||
enable_interim=asr_enable_interim,
|
||||
interim_interval_ms=asr_interim_interval,
|
||||
min_audio_for_interim_ms=asr_min_audio_ms,
|
||||
on_transcript=self._on_transcript_callback,
|
||||
@@ -1481,6 +1500,9 @@ class DuplexPipeline:
|
||||
text: Transcribed text
|
||||
is_final: Whether this is the final transcription
|
||||
"""
|
||||
if not is_final and not self._asr_interim_enabled():
|
||||
return
|
||||
|
||||
# Avoid sending duplicate transcripts
|
||||
if text == self._last_sent_transcript and not is_final:
|
||||
return
|
||||
@@ -1550,7 +1572,8 @@ class DuplexPipeline:
|
||||
if self._asr_mode == "streaming":
|
||||
await self._streaming_asr().begin_utterance()
|
||||
else:
|
||||
await self._offline_asr().start_interim_transcription()
|
||||
if self._asr_interim_enabled():
|
||||
await self._offline_asr().start_interim_transcription()
|
||||
|
||||
# Prime ASR with a short pre-speech context window so the utterance
|
||||
# start isn't lost while waiting for VAD to transition to Speech.
|
||||
|
||||
@@ -22,6 +22,7 @@ class ASRServiceSpec:
|
||||
api_key: Optional[str] = None
|
||||
api_url: Optional[str] = None
|
||||
model: Optional[str] = None
|
||||
enable_interim: bool = False
|
||||
interim_interval_ms: int = 500
|
||||
min_audio_for_interim_ms: int = 300
|
||||
on_transcript: Optional[TranscriptCallback] = None
|
||||
|
||||
Reference in New Issue
Block a user