Add backend api and engine

2026-02-06 14:01:34 +08:00
parent 590014e821
commit d5c1ab34b3
61 changed files with 10351 additions and 1 deletions
--- a/engine/services/asr.py
+++ b/engine/services/asr.py
@@ -0,0 +1,147 @@
+"""ASR (Automatic Speech Recognition) Service implementations.
+
+Provides speech-to-text capabilities with streaming support.
+"""
+
+import os
+import asyncio
+import json
+from typing import AsyncIterator, Optional
+from loguru import logger
+
+from services.base import BaseASRService, ASRResult, ServiceState
+
+# Try to import websockets for streaming ASR
+try:
+    import websockets
+    WEBSOCKETS_AVAILABLE = True
+except ImportError:
+    WEBSOCKETS_AVAILABLE = False
+
+
+class BufferedASRService(BaseASRService):
+    """
+    Buffered ASR service that accumulates audio and provides
+    a simple text accumulator for use with EOU detection.
+    
+    This is a lightweight implementation that works with the
+    existing VAD + EOU pattern without requiring external ASR.
+    """
+    
+    def __init__(
+        self,
+        sample_rate: int = 16000,
+        language: str = "en"
+    ):
+        super().__init__(sample_rate=sample_rate, language=language)
+        
+        self._audio_buffer: bytes = b""
+        self._current_text: str = ""
+        self._transcript_queue: asyncio.Queue[ASRResult] = asyncio.Queue()
+    
+    async def connect(self) -> None:
+        """No connection needed for buffered ASR."""
+        self.state = ServiceState.CONNECTED
+        logger.info("Buffered ASR service connected")
+    
+    async def disconnect(self) -> None:
+        """Clear buffers on disconnect."""
+        self._audio_buffer = b""
+        self._current_text = ""
+        self.state = ServiceState.DISCONNECTED
+        logger.info("Buffered ASR service disconnected")
+    
+    async def send_audio(self, audio: bytes) -> None:
+        """Buffer audio for later processing."""
+        self._audio_buffer += audio
+    
+    async def receive_transcripts(self) -> AsyncIterator[ASRResult]:
+        """Yield transcription results."""
+        while True:
+            try:
+                result = await asyncio.wait_for(
+                    self._transcript_queue.get(),
+                    timeout=0.1
+                )
+                yield result
+            except asyncio.TimeoutError:
+                continue
+            except asyncio.CancelledError:
+                break
+    
+    def set_text(self, text: str) -> None:
+        """
+        Set the current transcript text directly.
+        
+        This allows external integration (e.g., Whisper, other ASR)
+        to provide transcripts.
+        """
+        self._current_text = text
+        result = ASRResult(text=text, is_final=False)
+        asyncio.create_task(self._transcript_queue.put(result))
+    
+    def get_and_clear_text(self) -> str:
+        """Get accumulated text and clear buffer."""
+        text = self._current_text
+        self._current_text = ""
+        self._audio_buffer = b""
+        return text
+    
+    def get_audio_buffer(self) -> bytes:
+        """Get accumulated audio buffer."""
+        return self._audio_buffer
+    
+    def clear_audio_buffer(self) -> None:
+        """Clear audio buffer."""
+        self._audio_buffer = b""
+
+
+class MockASRService(BaseASRService):
+    """
+    Mock ASR service for testing without actual recognition.
+    """
+    
+    def __init__(self, sample_rate: int = 16000, language: str = "en"):
+        super().__init__(sample_rate=sample_rate, language=language)
+        self._transcript_queue: asyncio.Queue[ASRResult] = asyncio.Queue()
+        self._mock_texts = [
+            "Hello, how are you?",
+            "That's interesting.",
+            "Tell me more about that.",
+            "I understand.",
+        ]
+        self._text_index = 0
+    
+    async def connect(self) -> None:
+        self.state = ServiceState.CONNECTED
+        logger.info("Mock ASR service connected")
+    
+    async def disconnect(self) -> None:
+        self.state = ServiceState.DISCONNECTED
+        logger.info("Mock ASR service disconnected")
+    
+    async def send_audio(self, audio: bytes) -> None:
+        """Mock audio processing - generates fake transcripts periodically."""
+        pass
+    
+    def trigger_transcript(self) -> None:
+        """Manually trigger a transcript (for testing)."""
+        text = self._mock_texts[self._text_index % len(self._mock_texts)]
+        self._text_index += 1
+        
+        result = ASRResult(text=text, is_final=True, confidence=0.95)
+        asyncio.create_task(self._transcript_queue.put(result))
+    
+    async def receive_transcripts(self) -> AsyncIterator[ASRResult]:
+        """Yield transcription results."""
+        while True:
+            try:
+                result = await asyncio.wait_for(
+                    self._transcript_queue.get(),
+                    timeout=0.1
+                )
+                yield result
+            except asyncio.TimeoutError:
+                continue
+            except asyncio.CancelledError:
+                break