Add ASR interim results support in Assistant model and API

- Introduced `asr_interim_enabled` field in the Assistant model to control interim ASR results.
- Updated AssistantBase and AssistantUpdate schemas to include the new field.
- Modified the database schema to add the `asr_interim_enabled` column.
- Enhanced runtime metadata to reflect interim ASR settings.
- Updated API endpoints and tests to validate the new functionality.
- Adjusted documentation to include details about interim ASR results configuration.
This commit is contained in:
Xin Wang
2026-03-06 12:58:54 +08:00
parent e11c3abb9e
commit da38157638
19 changed files with 183 additions and 5 deletions

View File

@@ -53,6 +53,7 @@ class OpenAICompatibleASRService(BaseASRService):
model: str = "FunAudioLLM/SenseVoiceSmall",
sample_rate: int = 16000,
language: str = "auto",
enable_interim: bool = False,
interim_interval_ms: int = 500, # How often to send interim results
min_audio_for_interim_ms: int = 300, # Min audio before first interim
on_transcript: Optional[Callable[[str, bool], Awaitable[None]]] = None
@@ -66,6 +67,7 @@ class OpenAICompatibleASRService(BaseASRService):
model: ASR model name or alias
sample_rate: Audio sample rate (16000 recommended)
language: Language code (auto for automatic detection)
enable_interim: Whether to generate interim transcriptions in offline mode
interim_interval_ms: How often to generate interim transcriptions
min_audio_for_interim_ms: Minimum audio duration before first interim
on_transcript: Callback for transcription results (text, is_final)
@@ -80,6 +82,7 @@ class OpenAICompatibleASRService(BaseASRService):
raw_api_url = api_url or os.getenv("ASR_API_URL") or self.API_URL
self.api_url = self._resolve_transcriptions_endpoint(raw_api_url)
self.model = self.MODELS.get(model.lower(), model)
self.enable_interim = bool(enable_interim)
self.interim_interval_ms = interim_interval_ms
self.min_audio_for_interim_ms = min_audio_for_interim_ms
self.on_transcript = on_transcript
@@ -181,6 +184,9 @@ class OpenAICompatibleASRService(BaseASRService):
if not self._session:
logger.warning("ASR session not connected")
return None
if not is_final and not self.enable_interim:
return None
# Check minimum audio duration
audio_duration_ms = len(self._audio_buffer) / (self.sample_rate * 2) * 1000
@@ -310,6 +316,9 @@ class OpenAICompatibleASRService(BaseASRService):
This periodically transcribes buffered audio for
real-time feedback to the user.
"""
if not self.enable_interim:
return
if self._interim_task and not self._interim_task.done():
return

View File

@@ -117,6 +117,7 @@ class DefaultRealtimeServiceFactory(RealtimeServiceFactory):
model=spec.model or self._DEFAULT_OPENAI_COMPATIBLE_ASR_MODEL,
sample_rate=spec.sample_rate,
language=spec.language,
enable_interim=spec.enable_interim,
interim_interval_ms=spec.interim_interval_ms,
min_audio_for_interim_ms=spec.min_audio_for_interim_ms,
on_transcript=spec.on_transcript,