AI-VideoAssistant/engine/services/tts.py

"""TTS service implementations used by the engine runtime."""

import asyncio
from typing import AsyncIterator

from loguru import logger

from services.base import BaseTTSService, TTSChunk, ServiceState


class MockTTSService(BaseTTSService):
    """Mock TTS service for tests and no-provider fallback."""

    def __init__(
        self,
        voice: str = "mock",
        sample_rate: int = 16000,
        speed: float = 1.0,
    ):
        super().__init__(voice=voice, sample_rate=sample_rate, speed=speed)

    async def connect(self) -> None:
        self.state = ServiceState.CONNECTED
        logger.info("Mock TTS service connected")

    async def disconnect(self) -> None:
        self.state = ServiceState.DISCONNECTED
        logger.info("Mock TTS service disconnected")

    async def synthesize(self, text: str) -> bytes:
        """Generate silence based on text length."""
        word_count = len(text.split())
        duration_ms = word_count * 100
        samples = int(self.sample_rate * duration_ms / 1000)
        return bytes(samples * 2)

    async def synthesize_stream(self, text: str) -> AsyncIterator[TTSChunk]:
        """Generate silence chunks to emulate streaming synthesis."""
        audio = await self.synthesize(text)

        chunk_size = self.sample_rate * 2 // 10
        for i in range(0, len(audio), chunk_size):
            chunk_data = audio[i : i + chunk_size]
            yield TTSChunk(
                audio=chunk_data,
                sample_rate=self.sample_rate,
                is_final=(i + chunk_size >= len(audio)),
            )
            await asyncio.sleep(0.05)