Files
AI-VideoAssistant/engine/core/ports/asr.py
Xin Wang 4e2450e800 Refactor backend integration and service architecture
- Removed the backend client compatibility wrapper and associated methods to streamline backend integration.
- Updated session management to utilize control plane gateways and runtime configuration providers.
- Adjusted TTS service implementations to remove the EdgeTTS service and simplify service dependencies.
- Enhanced documentation to reflect changes in backend integration and service architecture.
- Updated configuration files to remove deprecated TTS provider options and clarify available settings.
2026-03-06 09:00:43 +08:00

65 lines
1.9 KiB
Python

"""ASR extension port contracts."""
from __future__ import annotations
from dataclasses import dataclass
from typing import AsyncIterator, Awaitable, Callable, Optional, Protocol
from services.base import ASRResult
TranscriptCallback = Callable[[str, bool], Awaitable[None]]
@dataclass(frozen=True)
class ASRServiceSpec:
"""Resolved runtime configuration for ASR service creation."""
provider: str
sample_rate: int
language: str = "auto"
api_key: Optional[str] = None
api_url: Optional[str] = None
model: Optional[str] = None
interim_interval_ms: int = 500
min_audio_for_interim_ms: int = 300
on_transcript: Optional[TranscriptCallback] = None
class ASRPort(Protocol):
"""Port for speech recognition providers."""
async def connect(self) -> None:
"""Establish connection to ASR provider."""
async def disconnect(self) -> None:
"""Release ASR resources."""
async def send_audio(self, audio: bytes) -> None:
"""Push one PCM audio chunk for recognition."""
async def receive_transcripts(self) -> AsyncIterator[ASRResult]:
"""Stream partial/final recognition results."""
class ASRInterimControl(Protocol):
"""Optional extension for explicit interim transcription control."""
async def start_interim_transcription(self) -> None:
"""Start interim transcription loop if supported."""
async def stop_interim_transcription(self) -> None:
"""Stop interim transcription loop if supported."""
class ASRBufferControl(Protocol):
"""Optional extension for explicit ASR buffer lifecycle control."""
def clear_buffer(self) -> None:
"""Clear provider-side ASR buffer."""
async def get_final_transcription(self) -> str:
"""Return final transcription for the current utterance."""
def get_and_clear_text(self) -> str:
"""Return buffered text and clear internal state."""