Implement DashScope ASR provider and enhance ASR service architecture
- Added DashScope ASR service implementation for real-time streaming. - Updated ASR provider logic to support DashScope alongside existing providers. - Enhanced runtime metadata resolution to include DashScope as a valid ASR provider. - Modified configuration files and documentation to reflect the addition of DashScope. - Introduced tests to validate DashScope integration and ASR service behavior. - Refactored ASR service factory to accommodate new provider options and modes.
This commit is contained in:
@@ -3,11 +3,12 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import AsyncIterator, Awaitable, Callable, Optional, Protocol
|
||||
from typing import AsyncIterator, Awaitable, Callable, Literal, Optional, Protocol
|
||||
|
||||
from providers.common.base import ASRResult
|
||||
|
||||
TranscriptCallback = Callable[[str, bool], Awaitable[None]]
|
||||
ASRMode = Literal["offline", "streaming"]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
@@ -16,6 +17,7 @@ class ASRServiceSpec:
|
||||
|
||||
provider: str
|
||||
sample_rate: int
|
||||
mode: Optional[ASRMode] = None
|
||||
language: str = "auto"
|
||||
api_key: Optional[str] = None
|
||||
api_url: Optional[str] = None
|
||||
@@ -28,6 +30,8 @@ class ASRServiceSpec:
|
||||
class ASRPort(Protocol):
|
||||
"""Port for speech recognition providers."""
|
||||
|
||||
mode: ASRMode
|
||||
|
||||
async def connect(self) -> None:
|
||||
"""Establish connection to ASR provider."""
|
||||
|
||||
@@ -41,18 +45,16 @@ class ASRPort(Protocol):
|
||||
"""Stream partial/final recognition results."""
|
||||
|
||||
|
||||
class ASRInterimControl(Protocol):
|
||||
"""Optional extension for explicit interim transcription control."""
|
||||
class OfflineASRPort(ASRPort, Protocol):
|
||||
"""Port for offline/buffered ASR providers."""
|
||||
|
||||
mode: Literal["offline"]
|
||||
|
||||
async def start_interim_transcription(self) -> None:
|
||||
"""Start interim transcription loop if supported."""
|
||||
"""Start interim transcription loop."""
|
||||
|
||||
async def stop_interim_transcription(self) -> None:
|
||||
"""Stop interim transcription loop if supported."""
|
||||
|
||||
|
||||
class ASRBufferControl(Protocol):
|
||||
"""Optional extension for explicit ASR buffer lifecycle control."""
|
||||
"""Stop interim transcription loop."""
|
||||
|
||||
def clear_buffer(self) -> None:
|
||||
"""Clear provider-side ASR buffer."""
|
||||
@@ -62,3 +64,21 @@ class ASRBufferControl(Protocol):
|
||||
|
||||
def get_and_clear_text(self) -> str:
|
||||
"""Return buffered text and clear internal state."""
|
||||
|
||||
|
||||
class StreamingASRPort(ASRPort, Protocol):
|
||||
"""Port for streaming ASR providers."""
|
||||
|
||||
mode: Literal["streaming"]
|
||||
|
||||
async def begin_utterance(self) -> None:
|
||||
"""Start a new utterance stream."""
|
||||
|
||||
async def end_utterance(self) -> None:
|
||||
"""Signal end of current utterance stream."""
|
||||
|
||||
async def wait_for_final_transcription(self, timeout_ms: int = 800) -> str:
|
||||
"""Wait for final transcript after utterance end."""
|
||||
|
||||
def clear_utterance(self) -> None:
|
||||
"""Reset utterance-local state."""
|
||||
|
||||
Reference in New Issue
Block a user