Add ASR interim results support in Assistant model and API
- Introduced `asr_interim_enabled` field in the Assistant model to control interim ASR results. - Updated AssistantBase and AssistantUpdate schemas to include the new field. - Modified the database schema to add the `asr_interim_enabled` column. - Enhanced runtime metadata to reflect interim ASR settings. - Updated API endpoints and tests to validate the new functionality. - Adjusted documentation to include details about interim ASR results configuration.
This commit is contained in:
@@ -53,6 +53,7 @@ class OpenAICompatibleASRService(BaseASRService):
|
||||
model: str = "FunAudioLLM/SenseVoiceSmall",
|
||||
sample_rate: int = 16000,
|
||||
language: str = "auto",
|
||||
enable_interim: bool = False,
|
||||
interim_interval_ms: int = 500, # How often to send interim results
|
||||
min_audio_for_interim_ms: int = 300, # Min audio before first interim
|
||||
on_transcript: Optional[Callable[[str, bool], Awaitable[None]]] = None
|
||||
@@ -66,6 +67,7 @@ class OpenAICompatibleASRService(BaseASRService):
|
||||
model: ASR model name or alias
|
||||
sample_rate: Audio sample rate (16000 recommended)
|
||||
language: Language code (auto for automatic detection)
|
||||
enable_interim: Whether to generate interim transcriptions in offline mode
|
||||
interim_interval_ms: How often to generate interim transcriptions
|
||||
min_audio_for_interim_ms: Minimum audio duration before first interim
|
||||
on_transcript: Callback for transcription results (text, is_final)
|
||||
@@ -80,6 +82,7 @@ class OpenAICompatibleASRService(BaseASRService):
|
||||
raw_api_url = api_url or os.getenv("ASR_API_URL") or self.API_URL
|
||||
self.api_url = self._resolve_transcriptions_endpoint(raw_api_url)
|
||||
self.model = self.MODELS.get(model.lower(), model)
|
||||
self.enable_interim = bool(enable_interim)
|
||||
self.interim_interval_ms = interim_interval_ms
|
||||
self.min_audio_for_interim_ms = min_audio_for_interim_ms
|
||||
self.on_transcript = on_transcript
|
||||
@@ -181,6 +184,9 @@ class OpenAICompatibleASRService(BaseASRService):
|
||||
if not self._session:
|
||||
logger.warning("ASR session not connected")
|
||||
return None
|
||||
|
||||
if not is_final and not self.enable_interim:
|
||||
return None
|
||||
|
||||
# Check minimum audio duration
|
||||
audio_duration_ms = len(self._audio_buffer) / (self.sample_rate * 2) * 1000
|
||||
@@ -310,6 +316,9 @@ class OpenAICompatibleASRService(BaseASRService):
|
||||
This periodically transcribes buffered audio for
|
||||
real-time feedback to the user.
|
||||
"""
|
||||
if not self.enable_interim:
|
||||
return
|
||||
|
||||
if self._interim_task and not self._interim_task.done():
|
||||
return
|
||||
|
||||
|
||||
@@ -117,6 +117,7 @@ class DefaultRealtimeServiceFactory(RealtimeServiceFactory):
|
||||
model=spec.model or self._DEFAULT_OPENAI_COMPATIBLE_ASR_MODEL,
|
||||
sample_rate=spec.sample_rate,
|
||||
language=spec.language,
|
||||
enable_interim=spec.enable_interim,
|
||||
interim_interval_ms=spec.interim_interval_ms,
|
||||
min_audio_for_interim_ms=spec.min_audio_for_interim_ms,
|
||||
on_transcript=spec.on_transcript,
|
||||
|
||||
Reference in New Issue
Block a user