Implement DashScope ASR provider and enhance ASR service architecture

- Added DashScope ASR service implementation for real-time streaming.
- Updated ASR provider logic to support DashScope alongside existing providers.
- Enhanced runtime metadata resolution to include DashScope as a valid ASR provider.
- Modified configuration files and documentation to reflect the addition of DashScope.
- Introduced tests to validate DashScope integration and ASR service behavior.
- Refactored ASR service factory to accommodate new provider options and modes.
This commit is contained in:
Xin Wang
2026-03-06 11:44:39 +08:00
parent 7e0b777923
commit e11c3abb9e
19 changed files with 940 additions and 44 deletions

View File

@@ -3,11 +3,12 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import AsyncIterator, Awaitable, Callable, Optional, Protocol
from typing import AsyncIterator, Awaitable, Callable, Literal, Optional, Protocol
from providers.common.base import ASRResult
TranscriptCallback = Callable[[str, bool], Awaitable[None]]
ASRMode = Literal["offline", "streaming"]
@dataclass(frozen=True)
@@ -16,6 +17,7 @@ class ASRServiceSpec:
provider: str
sample_rate: int
mode: Optional[ASRMode] = None
language: str = "auto"
api_key: Optional[str] = None
api_url: Optional[str] = None
@@ -28,6 +30,8 @@ class ASRServiceSpec:
class ASRPort(Protocol):
"""Port for speech recognition providers."""
mode: ASRMode
async def connect(self) -> None:
"""Establish connection to ASR provider."""
@@ -41,18 +45,16 @@ class ASRPort(Protocol):
"""Stream partial/final recognition results."""
class ASRInterimControl(Protocol):
"""Optional extension for explicit interim transcription control."""
class OfflineASRPort(ASRPort, Protocol):
"""Port for offline/buffered ASR providers."""
mode: Literal["offline"]
async def start_interim_transcription(self) -> None:
"""Start interim transcription loop if supported."""
"""Start interim transcription loop."""
async def stop_interim_transcription(self) -> None:
"""Stop interim transcription loop if supported."""
class ASRBufferControl(Protocol):
"""Optional extension for explicit ASR buffer lifecycle control."""
"""Stop interim transcription loop."""
def clear_buffer(self) -> None:
"""Clear provider-side ASR buffer."""
@@ -62,3 +64,21 @@ class ASRBufferControl(Protocol):
def get_and_clear_text(self) -> str:
"""Return buffered text and clear internal state."""
class StreamingASRPort(ASRPort, Protocol):
"""Port for streaming ASR providers."""
mode: Literal["streaming"]
async def begin_utterance(self) -> None:
"""Start a new utterance stream."""
async def end_utterance(self) -> None:
"""Signal end of current utterance stream."""
async def wait_for_final_transcription(self, timeout_ms: int = 800) -> str:
"""Wait for final transcript after utterance end."""
def clear_utterance(self) -> None:
"""Reset utterance-local state."""