Merge pull request #857 from pipecat-ai/aleix/fix-riva-tts-audio-stuttering

riva: fix FastPitchTTSService audio stuttering
This commit is contained in:
Aleix Conchillo Flaqué
2024-12-12 22:20:00 -08:00
committed by GitHub
2 changed files with 17 additions and 6 deletions

View File

@@ -19,6 +19,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Fixed
- Fixed an audio stuttering issue in `FastPitchTTSService`.
- Fixed a `BaseOutputTransport` issue that was causing non-audio frames being
processed before the previous audio frames were played. This will allow, for
example, sending a frame `A` after a `TTSSpeakFrame` and the frame `A` will

View File

@@ -76,7 +76,10 @@ class FastPitchTTSService(TTSService):
)
async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
def read_audio_responses():
def read_audio_responses(queue: asyncio.Queue):
def add_response(r):
asyncio.run_coroutine_threadsafe(queue.put(r), self.get_event_loop())
try:
responses = self._service.synthesize_online(
text,
@@ -87,26 +90,32 @@ class FastPitchTTSService(TTSService):
quality=self._quality,
custom_dictionary={},
)
return responses
for r in responses:
add_response(r)
add_response(None)
except Exception as e:
logger.error(f"{self} exception: {e}")
return []
add_response(None)
await self.start_ttfb_metrics()
yield TTSStartedFrame()
logger.debug(f"Generating TTS: [{text}]")
responses = await asyncio.to_thread(read_audio_responses)
for resp in responses:
queue = asyncio.Queue()
await asyncio.to_thread(read_audio_responses, queue)
# Wait for the thread to start.
resp = await queue.get()
while resp:
await self.stop_ttfb_metrics()
frame = TTSAudioRawFrame(
audio=resp.audio,
sample_rate=self._sample_rate,
num_channels=1,
)
yield frame
resp = await queue.get()
await self.start_tts_usage_metrics(text)
yield TTSStoppedFrame()