Add edge fade for tts
This commit is contained in:
@@ -14,6 +14,7 @@ event-driven design.
|
||||
import asyncio
|
||||
import time
|
||||
from typing import Optional, Callable, Awaitable, Dict, Any
|
||||
import numpy as np
|
||||
from loguru import logger
|
||||
|
||||
from core.transports import BaseTransport
|
||||
@@ -608,6 +609,7 @@ class DuplexPipeline:
|
||||
return
|
||||
|
||||
try:
|
||||
is_first_chunk = True
|
||||
async for chunk in self.tts_service.synthesize_stream(text):
|
||||
# Check interrupt at the start of each iteration
|
||||
if self._interrupt_event.is_set():
|
||||
@@ -632,12 +634,51 @@ class DuplexPipeline:
|
||||
# Double-check interrupt right before sending audio
|
||||
if self._interrupt_event.is_set():
|
||||
break
|
||||
|
||||
await self.transport.send_audio(chunk.audio)
|
||||
|
||||
smoothed_audio = self._apply_edge_fade(
|
||||
pcm_bytes=chunk.audio,
|
||||
sample_rate=chunk.sample_rate,
|
||||
fade_in=is_first_chunk,
|
||||
fade_out=bool(chunk.is_final),
|
||||
fade_ms=8,
|
||||
)
|
||||
is_first_chunk = False
|
||||
|
||||
await self.transport.send_audio(smoothed_audio)
|
||||
except asyncio.CancelledError:
|
||||
logger.debug("TTS sentence cancelled")
|
||||
except Exception as e:
|
||||
logger.error(f"TTS sentence error: {e}")
|
||||
|
||||
def _apply_edge_fade(
|
||||
self,
|
||||
pcm_bytes: bytes,
|
||||
sample_rate: int,
|
||||
fade_in: bool = False,
|
||||
fade_out: bool = False,
|
||||
fade_ms: int = 8,
|
||||
) -> bytes:
|
||||
"""Apply short edge fades to reduce click/pop at sentence boundaries."""
|
||||
if not pcm_bytes or (not fade_in and not fade_out):
|
||||
return pcm_bytes
|
||||
|
||||
try:
|
||||
samples = np.frombuffer(pcm_bytes, dtype="<i2").astype(np.float32)
|
||||
if samples.size == 0:
|
||||
return pcm_bytes
|
||||
|
||||
fade_samples = int(sample_rate * (fade_ms / 1000.0))
|
||||
fade_samples = max(1, min(fade_samples, samples.size))
|
||||
|
||||
if fade_in:
|
||||
samples[:fade_samples] *= np.linspace(0.0, 1.0, fade_samples, endpoint=True)
|
||||
if fade_out:
|
||||
samples[-fade_samples:] *= np.linspace(1.0, 0.0, fade_samples, endpoint=True)
|
||||
|
||||
return np.clip(samples, -32768, 32767).astype("<i2").tobytes()
|
||||
except Exception:
|
||||
# Fallback: never block audio delivery on smoothing failure.
|
||||
return pcm_bytes
|
||||
|
||||
async def _speak(self, text: str) -> None:
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user