From eb1bf1e44680d7ce40d4f5096d4a4613c5d290ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Wed, 28 Jan 2026 23:27:32 -0800 Subject: [PATCH 1/5] tts: rename PiperTTSService to PiperHttpTTSService --- examples/foundational/01-say-one-thing-piper.py | 4 ++-- src/pipecat/services/piper/tts.py | 7 ++++--- tests/test_piper_tts.py | 12 ++++++++---- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/examples/foundational/01-say-one-thing-piper.py b/examples/foundational/01-say-one-thing-piper.py index 5aa975b31..f6700f886 100644 --- a/examples/foundational/01-say-one-thing-piper.py +++ b/examples/foundational/01-say-one-thing-piper.py @@ -16,7 +16,7 @@ from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineTask from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport -from pipecat.services.piper.tts import PiperTTSService +from pipecat.services.piper.tts import PiperHttpTTSService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams @@ -39,7 +39,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): # Create an HTTP session async with aiohttp.ClientSession() as session: - tts = PiperTTSService( + tts = PiperHttpTTSService( base_url=os.getenv("PIPER_BASE_URL"), aiohttp_session=session, sample_rate=24000 ) diff --git a/src/pipecat/services/piper/tts.py b/src/pipecat/services/piper/tts.py index ce47c885e..19d9720f0 100644 --- a/src/pipecat/services/piper/tts.py +++ b/src/pipecat/services/piper/tts.py @@ -21,9 +21,10 @@ from pipecat.services.tts_service import TTSService from pipecat.utils.tracing.service_decorators import traced_tts -# This assumes a running TTS service running: https://github.com/OHF-Voice/piper1-gpl/blob/main/docs/API_HTTP.md -class PiperTTSService(TTSService): - """Piper TTS service implementation. +# This assumes a running TTS service running: +# https://github.com/OHF-Voice/piper1-gpl/blob/main/docs/API_HTTP.md +class PiperHttpTTSService(TTSService): + """Piper HTTP TTS service implementation. Provides integration with Piper's HTTP TTS server for text-to-speech synthesis. Supports streaming audio generation with configurable sample diff --git a/tests/test_piper_tts.py b/tests/test_piper_tts.py index e52be2d91..8d877099c 100644 --- a/tests/test_piper_tts.py +++ b/tests/test_piper_tts.py @@ -21,7 +21,7 @@ from pipecat.frames.frames import ( TTSStoppedFrame, TTSTextFrame, ) -from pipecat.services.piper.tts import PiperTTSService +from pipecat.services.piper.tts import PiperHttpTTSService from pipecat.tests.utils import run_test @@ -67,8 +67,10 @@ async def test_run_piper_tts_success(aiohttp_client): base_url = str(client.make_url("")).rstrip("/") async with aiohttp.ClientSession() as session: - # Instantiate PiperTTSService with our mock server - tts_service = PiperTTSService(base_url=base_url, aiohttp_session=session, sample_rate=24000) + # Instantiate PiperHttpTTSService with our mock server + tts_service = PiperHttpTTSService( + base_url=base_url, aiohttp_session=session, sample_rate=24000 + ) frames_to_send = [ TTSSpeakFrame(text="Hello world."), @@ -117,7 +119,9 @@ async def test_run_piper_tts_error(aiohttp_client): base_url = str(client.make_url("")).rstrip("/") async with aiohttp.ClientSession() as session: - tts_service = PiperTTSService(base_url=base_url, aiohttp_session=session, sample_rate=24000) + tts_service = PiperHttpTTSService( + base_url=base_url, aiohttp_session=session, sample_rate=24000 + ) frames_to_send = [ TTSSpeakFrame(text="Error case."), From 875614ff7a43cf7820c8cec3c385683126f313e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Wed, 28 Jan 2026 23:29:21 -0800 Subject: [PATCH 2/5] tts: add support for local PiperTTSService --- .github/workflows/coverage.yaml | 9 +- .github/workflows/tests.yaml | 9 +- .../foundational/07zi-interruptible-piper.py | 132 ++++++++++++++++++ pyproject.toml | 1 + scripts/evals/run-release-evals.py | 1 + src/pipecat/services/piper/tts.py | 122 +++++++++++++++- uv.lock | 27 +++- 7 files changed, 291 insertions(+), 10 deletions(-) create mode 100644 examples/foundational/07zi-interruptible-piper.py diff --git a/.github/workflows/coverage.yaml b/.github/workflows/coverage.yaml index faca8c03f..ae0cb9c57 100644 --- a/.github/workflows/coverage.yaml +++ b/.github/workflows/coverage.yaml @@ -33,7 +33,14 @@ jobs: - name: Install dependencies run: | - uv sync --group dev --extra anthropic --extra aws --extra google --extra langchain --extra livekit --extra websocket + uv sync --group dev \ + --extra anthropic \ + --extra aws \ + --extra google \ + --extra langchain \ + --extra livekit \ + --extra piper \ + --extra websocket - name: Run tests with coverage run: | diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 459725da7..cb35a169c 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -37,7 +37,14 @@ jobs: - name: Install dependencies run: | - uv sync --group dev --extra anthropic --extra aws --extra google --extra langchain --extra livekit --extra websocket + uv sync --group dev \ + --extra anthropic \ + --extra aws \ + --extra google \ + --extra langchain \ + --extra livekit \ + --extra piper \ + --extra websocket - name: Test with pytest run: | diff --git a/examples/foundational/07zi-interruptible-piper.py b/examples/foundational/07zi-interruptible-piper.py new file mode 100644 index 000000000..67286741d --- /dev/null +++ b/examples/foundational/07zi-interruptible-piper.py @@ -0,0 +1,132 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.audio.vad.vad_analyzer import VADParams +from pipecat.frames.frames import LLMRunFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.services.piper.tts import PiperTTSService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy +from pipecat.turns.user_turn_strategies import UserTurnStrategies + +load_dotenv(override=True) + +# We store functions so objects (e.g. SileroVADAnalyzer) don't get +# instantiated. The function will be called when the desired transport gets +# selected. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = PiperTTSService(voice_id="en_US-ryan-high") + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams( + user_turn_strategies=UserTurnStrategies( + stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] + ), + ), + ) + + pipeline = Pipeline( + [ + transport.input(), # Transport user input + stt, + user_aggregator, # User responses + llm, # LLM + tts, # TTS + transport.output(), # Transport bot output + assistant_aggregator, # Assistant spoken responses + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + # Kick off the conversation. + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/pyproject.toml b/pyproject.toml index 339b498f6..098ad917b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -95,6 +95,7 @@ rnnoise = [ "pyrnnoise~=0.4.1" ] openpipe = [ "openpipe>=4.50.0,<6" ] openrouter = [] perplexity = [] +piper = [ "piper-tts>=1.3.0,<2" ] playht = [ "pipecat-ai[websockets-base]" ] qwen = [] remote-smart-turn = [] diff --git a/scripts/evals/run-release-evals.py b/scripts/evals/run-release-evals.py index d4290be00..239012c36 100644 --- a/scripts/evals/run-release-evals.py +++ b/scripts/evals/run-release-evals.py @@ -138,6 +138,7 @@ TESTS_07 = [ ("07zf-interruptible-gradium.py", EVAL_SIMPLE_MATH), ("07zg-interruptible-camb.py", EVAL_SIMPLE_MATH), ("07zh-interruptible-hathora.py", EVAL_SIMPLE_MATH), + ("07zi-interruptible-piper.py", EVAL_SIMPLE_MATH), # Needs a local XTTS docker instance running. # ("07i-interruptible-xtts.py", EVAL_SIMPLE_MATH), # Needs a Krisp license. diff --git a/src/pipecat/services/piper/tts.py b/src/pipecat/services/piper/tts.py index 19d9720f0..ca809f469 100644 --- a/src/pipecat/services/piper/tts.py +++ b/src/pipecat/services/piper/tts.py @@ -6,11 +6,14 @@ """Piper TTS service implementation.""" -from typing import AsyncGenerator, Optional +import asyncio +from pathlib import Path +from typing import AsyncGenerator, AsyncIterator, Optional import aiohttp from loguru import logger +from pipecat.audio.utils import create_stream_resampler from pipecat.frames.frames import ( ErrorFrame, Frame, @@ -20,6 +23,123 @@ from pipecat.frames.frames import ( from pipecat.services.tts_service import TTSService from pipecat.utils.tracing.service_decorators import traced_tts +try: + from piper import PiperVoice + from piper.download_voices import download_voice +except ModuleNotFoundError as e: + logger.error(f"Exception: {e}") + logger.error("In order to use Piper, you need to `pip install pipecat-ai[piper]`.") + raise Exception(f"Missing module: {e}") + + +class PiperTTSService(TTSService): + """Piper TTS service implementation. + + Provides local text-to-speech synthesis using Piper voice models. Automatically + downloads voice models if not already present and resamples audio output to + match the configured sample rate. + """ + + def __init__( + self, + *, + voice_id: str, + download_dir: Optional[Path] = None, + force_redownload: bool = False, + use_cuda: bool = False, + **kwargs, + ): + """Initialize the Piper TTS service. + + Args: + voice_id: Piper voice model identifier (e.g. `en_US-ryan-high`). + download_dir: Directory for storing voice model files. Defaults to + the current working directory. + force_redownload: Re-download the voice model even if it already exists. + use_cuda: Use CUDA for GPU-accelerated inference. + **kwargs: Additional arguments passed to the parent `TTSService`. + """ + super().__init__(**kwargs) + + self._voice_id = voice_id + + self._resampler = create_stream_resampler() + + download_dir = download_dir or Path.cwd() + + model_file = f"{voice_id}.onnx" + model_path = Path(download_dir) / model_file + + if not model_path.exists(): + logger.debug(f"Downloading Piper '{voice_id}' model") + download_voice(voice_id, download_dir, force_redownload=force_redownload) + + logger.debug(f"Loading Piper '{voice_id}' model from {model_path}") + + self._voice = PiperVoice.load(model_path, use_cuda=use_cuda) + + logger.debug(f"Loaded Piper '{voice_id}' model") + + def can_generate_metrics(self) -> bool: + """Check if this service can generate processing metrics. + + Returns: + True, as Piper service supports metrics generation. + """ + return True + + @traced_tts + async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]: + """Generate speech from text using Piper. + + Args: + text: The text to convert to speech. + + Yields: + Frame: Audio frames containing the synthesized speech and status frames. + """ + + def async_next(it): + try: + return next(it) + except StopIteration: + return None + + async def async_iterator(iterator, sample_rate: int) -> AsyncIterator[bytes]: + while True: + item = await asyncio.to_thread(async_next, iterator) + if item is None: + return + + audio_data = await self._resampler.resample( + item.audio_int16_bytes, sample_rate, self.sample_rate + ) + + yield audio_data + + logger.debug(f"{self}: Generating TTS [{text}]") + + try: + await self.start_ttfb_metrics() + + await self.start_tts_usage_metrics(text) + + yield TTSStartedFrame() + + async for frame in self._stream_audio_frames_from_iterator( + async_iterator(self._voice.synthesize(text), self._voice.config.sample_rate), + strip_wav_header=False, + ): + await self.stop_ttfb_metrics() + yield frame + except Exception as e: + logger.error(f"{self} exception: {e}") + yield ErrorFrame(error=f"Unknown error occurred: {e}") + finally: + logger.debug(f"{self}: Finished TTS [{text}]") + await self.stop_ttfb_metrics() + yield TTSStoppedFrame() + # This assumes a running TTS service running: # https://github.com/OHF-Voice/piper1-gpl/blob/main/docs/API_HTTP.md diff --git a/uv.lock b/uv.lock index b35c9f25d..26f0cc202 100644 --- a/uv.lock +++ b/uv.lock @@ -2045,7 +2045,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/32/6a/33d1702184d94106d3cdd7bfb788e19723206fce152e303473ca3b946c7b/greenlet-3.3.0-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:6f8496d434d5cb2dce025773ba5597f71f5410ae499d5dd9533e0653258cdb3d", size = 273658, upload-time = "2025-12-04T14:23:37.494Z" }, { url = "https://files.pythonhosted.org/packages/d6/b7/2b5805bbf1907c26e434f4e448cd8b696a0b71725204fa21a211ff0c04a7/greenlet-3.3.0-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b96dc7eef78fd404e022e165ec55327f935b9b52ff355b067eb4a0267fc1cffb", size = 574810, upload-time = "2025-12-04T14:50:04.154Z" }, { url = "https://files.pythonhosted.org/packages/94/38/343242ec12eddf3d8458c73f555c084359883d4ddc674240d9e61ec51fd6/greenlet-3.3.0-cp310-cp310-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:73631cd5cccbcfe63e3f9492aaa664d278fda0ce5c3d43aeda8e77317e38efbd", size = 586248, upload-time = "2025-12-04T14:57:39.35Z" }, - { url = "https://files.pythonhosted.org/packages/f0/d0/0ae86792fb212e4384041e0ef8e7bc66f59a54912ce407d26a966ed2914d/greenlet-3.3.0-cp310-cp310-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b299a0cb979f5d7197442dccc3aee67fce53500cd88951b7e6c35575701c980b", size = 597403, upload-time = "2025-12-04T15:07:10.831Z" }, { url = "https://files.pythonhosted.org/packages/b6/a8/15d0aa26c0036a15d2659175af00954aaaa5d0d66ba538345bd88013b4d7/greenlet-3.3.0-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7dee147740789a4632cace364816046e43310b59ff8fb79833ab043aefa72fd5", size = 586910, upload-time = "2025-12-04T14:25:59.705Z" }, { url = "https://files.pythonhosted.org/packages/e1/9b/68d5e3b7ccaba3907e5532cf8b9bf16f9ef5056a008f195a367db0ff32db/greenlet-3.3.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:39b28e339fc3c348427560494e28d8a6f3561c8d2bcf7d706e1c624ed8d822b9", size = 1547206, upload-time = "2025-12-04T15:04:21.027Z" }, { url = "https://files.pythonhosted.org/packages/66/bd/e3086ccedc61e49f91e2cfb5ffad9d8d62e5dc85e512a6200f096875b60c/greenlet-3.3.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:b3c374782c2935cc63b2a27ba8708471de4ad1abaa862ffdb1ef45a643ddbb7d", size = 1613359, upload-time = "2025-12-04T14:27:26.548Z" }, @@ -2053,7 +2052,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1f/cb/48e964c452ca2b92175a9b2dca037a553036cb053ba69e284650ce755f13/greenlet-3.3.0-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:e29f3018580e8412d6aaf5641bb7745d38c85228dacf51a73bd4e26ddf2a6a8e", size = 274908, upload-time = "2025-12-04T14:23:26.435Z" }, { url = "https://files.pythonhosted.org/packages/28/da/38d7bff4d0277b594ec557f479d65272a893f1f2a716cad91efeb8680953/greenlet-3.3.0-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a687205fb22794e838f947e2194c0566d3812966b41c78709554aa883183fb62", size = 577113, upload-time = "2025-12-04T14:50:05.493Z" }, { url = "https://files.pythonhosted.org/packages/3c/f2/89c5eb0faddc3ff014f1c04467d67dee0d1d334ab81fadbf3744847f8a8a/greenlet-3.3.0-cp311-cp311-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4243050a88ba61842186cb9e63c7dfa677ec146160b0efd73b855a3d9c7fcf32", size = 590338, upload-time = "2025-12-04T14:57:41.136Z" }, - { url = "https://files.pythonhosted.org/packages/80/d7/db0a5085035d05134f8c089643da2b44cc9b80647c39e93129c5ef170d8f/greenlet-3.3.0-cp311-cp311-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:670d0f94cd302d81796e37299bcd04b95d62403883b24225c6b5271466612f45", size = 601098, upload-time = "2025-12-04T15:07:11.898Z" }, { url = "https://files.pythonhosted.org/packages/dc/a6/e959a127b630a58e23529972dbc868c107f9d583b5a9f878fb858c46bc1a/greenlet-3.3.0-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6cb3a8ec3db4a3b0eb8a3c25436c2d49e3505821802074969db017b87bc6a948", size = 590206, upload-time = "2025-12-04T14:26:01.254Z" }, { url = "https://files.pythonhosted.org/packages/48/60/29035719feb91798693023608447283b266b12efc576ed013dd9442364bb/greenlet-3.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2de5a0b09eab81fc6a382791b995b1ccf2b172a9fec934747a7a23d2ff291794", size = 1550668, upload-time = "2025-12-04T15:04:22.439Z" }, { url = "https://files.pythonhosted.org/packages/0a/5f/783a23754b691bfa86bd72c3033aa107490deac9b2ef190837b860996c9f/greenlet-3.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4449a736606bd30f27f8e1ff4678ee193bc47f6ca810d705981cfffd6ce0d8c5", size = 1615483, upload-time = "2025-12-04T14:27:28.083Z" }, @@ -2061,7 +2059,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f8/0a/a3871375c7b9727edaeeea994bfff7c63ff7804c9829c19309ba2e058807/greenlet-3.3.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:b01548f6e0b9e9784a2c99c5651e5dc89ffcbe870bc5fb2e5ef864e9cc6b5dcb", size = 276379, upload-time = "2025-12-04T14:23:30.498Z" }, { url = "https://files.pythonhosted.org/packages/43/ab/7ebfe34dce8b87be0d11dae91acbf76f7b8246bf9d6b319c741f99fa59c6/greenlet-3.3.0-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:349345b770dc88f81506c6861d22a6ccd422207829d2c854ae2af8025af303e3", size = 597294, upload-time = "2025-12-04T14:50:06.847Z" }, { url = "https://files.pythonhosted.org/packages/a4/39/f1c8da50024feecd0793dbd5e08f526809b8ab5609224a2da40aad3a7641/greenlet-3.3.0-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e8e18ed6995e9e2c0b4ed264d2cf89260ab3ac7e13555b8032b25a74c6d18655", size = 607742, upload-time = "2025-12-04T14:57:42.349Z" }, - { url = "https://files.pythonhosted.org/packages/77/cb/43692bcd5f7a0da6ec0ec6d58ee7cddb606d055ce94a62ac9b1aa481e969/greenlet-3.3.0-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c024b1e5696626890038e34f76140ed1daf858e37496d33f2af57f06189e70d7", size = 622297, upload-time = "2025-12-04T15:07:13.552Z" }, { url = "https://files.pythonhosted.org/packages/75/b0/6bde0b1011a60782108c01de5913c588cf51a839174538d266de15e4bf4d/greenlet-3.3.0-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:047ab3df20ede6a57c35c14bf5200fcf04039d50f908270d3f9a7a82064f543b", size = 609885, upload-time = "2025-12-04T14:26:02.368Z" }, { url = "https://files.pythonhosted.org/packages/49/0e/49b46ac39f931f59f987b7cd9f34bfec8ef81d2a1e6e00682f55be5de9f4/greenlet-3.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2d9ad37fc657b1102ec880e637cccf20191581f75c64087a549e66c57e1ceb53", size = 1567424, upload-time = "2025-12-04T15:04:23.757Z" }, { url = "https://files.pythonhosted.org/packages/05/f5/49a9ac2dff7f10091935def9165c90236d8f175afb27cbed38fb1d61ab6b/greenlet-3.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:83cd0e36932e0e7f36a64b732a6f60c2fc2df28c351bae79fbaf4f8092fe7614", size = 1636017, upload-time = "2025-12-04T14:27:29.688Z" }, @@ -2069,7 +2066,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/02/2f/28592176381b9ab2cafa12829ba7b472d177f3acc35d8fbcf3673d966fff/greenlet-3.3.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:a1e41a81c7e2825822f4e068c48cb2196002362619e2d70b148f20a831c00739", size = 275140, upload-time = "2025-12-04T14:23:01.282Z" }, { url = "https://files.pythonhosted.org/packages/2c/80/fbe937bf81e9fca98c981fe499e59a3f45df2a04da0baa5c2be0dca0d329/greenlet-3.3.0-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9f515a47d02da4d30caaa85b69474cec77b7929b2e936ff7fb853d42f4bf8808", size = 599219, upload-time = "2025-12-04T14:50:08.309Z" }, { url = "https://files.pythonhosted.org/packages/c2/ff/7c985128f0514271b8268476af89aee6866df5eec04ac17dcfbc676213df/greenlet-3.3.0-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7d2d9fd66bfadf230b385fdc90426fcd6eb64db54b40c495b72ac0feb5766c54", size = 610211, upload-time = "2025-12-04T14:57:43.968Z" }, - { url = "https://files.pythonhosted.org/packages/79/07/c47a82d881319ec18a4510bb30463ed6891f2ad2c1901ed5ec23d3de351f/greenlet-3.3.0-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:30a6e28487a790417d036088b3bcb3f3ac7d8babaa7d0139edbaddebf3af9492", size = 624311, upload-time = "2025-12-04T15:07:14.697Z" }, { url = "https://files.pythonhosted.org/packages/fd/8e/424b8c6e78bd9837d14ff7df01a9829fc883ba2ab4ea787d4f848435f23f/greenlet-3.3.0-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:087ea5e004437321508a8d6f20efc4cfec5e3c30118e1417ea96ed1d93950527", size = 612833, upload-time = "2025-12-04T14:26:03.669Z" }, { url = "https://files.pythonhosted.org/packages/b5/ba/56699ff9b7c76ca12f1cdc27a886d0f81f2189c3455ff9f65246780f713d/greenlet-3.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ab97cf74045343f6c60a39913fa59710e4bd26a536ce7ab2397adf8b27e67c39", size = 1567256, upload-time = "2025-12-04T15:04:25.276Z" }, { url = "https://files.pythonhosted.org/packages/1e/37/f31136132967982d698c71a281a8901daf1a8fbab935dce7c0cf15f942cc/greenlet-3.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5375d2e23184629112ca1ea89a53389dddbffcf417dad40125713d88eb5f96e8", size = 1636483, upload-time = "2025-12-04T14:27:30.804Z" }, @@ -2077,7 +2073,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d7/7c/f0a6d0ede2c7bf092d00bc83ad5bafb7e6ec9b4aab2fbdfa6f134dc73327/greenlet-3.3.0-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:60c2ef0f578afb3c8d92ea07ad327f9a062547137afe91f38408f08aacab667f", size = 275671, upload-time = "2025-12-04T14:23:05.267Z" }, { url = "https://files.pythonhosted.org/packages/44/06/dac639ae1a50f5969d82d2e3dd9767d30d6dbdbab0e1a54010c8fe90263c/greenlet-3.3.0-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0a5d554d0712ba1de0a6c94c640f7aeba3f85b3a6e1f2899c11c2c0428da9365", size = 646360, upload-time = "2025-12-04T14:50:10.026Z" }, { url = "https://files.pythonhosted.org/packages/e0/94/0fb76fe6c5369fba9bf98529ada6f4c3a1adf19e406a47332245ef0eb357/greenlet-3.3.0-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3a898b1e9c5f7307ebbde4102908e6cbfcb9ea16284a3abe15cab996bee8b9b3", size = 658160, upload-time = "2025-12-04T14:57:45.41Z" }, - { url = "https://files.pythonhosted.org/packages/93/79/d2c70cae6e823fac36c3bbc9077962105052b7ef81db2f01ec3b9bf17e2b/greenlet-3.3.0-cp314-cp314-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:dcd2bdbd444ff340e8d6bdf54d2f206ccddbb3ccfdcd3c25bf4afaa7b8f0cf45", size = 671388, upload-time = "2025-12-04T15:07:15.789Z" }, { url = "https://files.pythonhosted.org/packages/b8/14/bab308fc2c1b5228c3224ec2bf928ce2e4d21d8046c161e44a2012b5203e/greenlet-3.3.0-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5773edda4dc00e173820722711d043799d3adb4f01731f40619e07ea2750b955", size = 660166, upload-time = "2025-12-04T14:26:05.099Z" }, { url = "https://files.pythonhosted.org/packages/4b/d2/91465d39164eaa0085177f61983d80ffe746c5a1860f009811d498e7259c/greenlet-3.3.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ac0549373982b36d5fd5d30beb8a7a33ee541ff98d2b502714a09f1169f31b55", size = 1615193, upload-time = "2025-12-04T15:04:27.041Z" }, { url = "https://files.pythonhosted.org/packages/42/1b/83d110a37044b92423084d52d5d5a3b3a73cafb51b547e6d7366ff62eff1/greenlet-3.3.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d198d2d977460358c3b3a4dc844f875d1adb33817f0613f663a656f463764ccc", size = 1683653, upload-time = "2025-12-04T14:27:32.366Z" }, @@ -2085,7 +2080,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a0/66/bd6317bc5932accf351fc19f177ffba53712a202f9df10587da8df257c7e/greenlet-3.3.0-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:d6ed6f85fae6cdfdb9ce04c9bf7a08d666cfcfb914e7d006f44f840b46741931", size = 282638, upload-time = "2025-12-04T14:25:20.941Z" }, { url = "https://files.pythonhosted.org/packages/30/cf/cc81cb030b40e738d6e69502ccbd0dd1bced0588e958f9e757945de24404/greenlet-3.3.0-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d9125050fcf24554e69c4cacb086b87b3b55dc395a8b3ebe6487b045b2614388", size = 651145, upload-time = "2025-12-04T14:50:11.039Z" }, { url = "https://files.pythonhosted.org/packages/9c/ea/1020037b5ecfe95ca7df8d8549959baceb8186031da83d5ecceff8b08cd2/greenlet-3.3.0-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:87e63ccfa13c0a0f6234ed0add552af24cc67dd886731f2261e46e241608bee3", size = 654236, upload-time = "2025-12-04T14:57:47.007Z" }, - { url = "https://files.pythonhosted.org/packages/69/cc/1e4bae2e45ca2fa55299f4e85854606a78ecc37fead20d69322f96000504/greenlet-3.3.0-cp314-cp314t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2662433acbca297c9153a4023fe2161c8dcfdcc91f10433171cf7e7d94ba2221", size = 662506, upload-time = "2025-12-04T15:07:16.906Z" }, { url = "https://files.pythonhosted.org/packages/57/b9/f8025d71a6085c441a7eaff0fd928bbb275a6633773667023d19179fe815/greenlet-3.3.0-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3c6e9b9c1527a78520357de498b0e709fb9e2f49c3a513afd5a249007261911b", size = 653783, upload-time = "2025-12-04T14:26:06.225Z" }, { url = "https://files.pythonhosted.org/packages/f6/c7/876a8c7a7485d5d6b5c6821201d542ef28be645aa024cfe1145b35c120c1/greenlet-3.3.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:286d093f95ec98fdd92fcb955003b8a3d054b4e2cab3e2707a5039e7b50520fd", size = 1614857, upload-time = "2025-12-04T15:04:28.484Z" }, { url = "https://files.pythonhosted.org/packages/4f/dc/041be1dff9f23dac5f48a43323cd0789cb798342011c19a248d9c9335536/greenlet-3.3.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6c10513330af5b8ae16f023e8ddbfb486ab355d04467c4679c5cfe4659975dd9", size = 1676034, upload-time = "2025-12-04T14:27:33.531Z" }, @@ -4426,6 +4420,9 @@ openai = [ openpipe = [ { name = "openpipe" }, ] +piper = [ + { name = "piper-tts" }, +] playht = [ { name = "websockets" }, ] @@ -4602,6 +4599,7 @@ requires-dist = [ { name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'websocket'" }, { name = "pipecat-ai-krisp", marker = "extra == 'krisp'", specifier = "~=0.4.0" }, { name = "pipecat-ai-small-webrtc-prebuilt", marker = "extra == 'runner'", specifier = ">=2.0.4" }, + { name = "piper-tts", marker = "extra == 'piper'", specifier = ">=1.3.0,<2" }, { name = "protobuf", specifier = "~=5.29.3" }, { name = "pvkoala", marker = "extra == 'koala'", specifier = "~=2.0.3" }, { name = "pyaudio", marker = "extra == 'local'", specifier = "~=0.2.14" }, @@ -4631,7 +4629,7 @@ requires-dist = [ { name = "wait-for2", marker = "python_full_version < '3.12'", specifier = ">=0.4.1" }, { name = "websockets", marker = "extra == 'websockets-base'", specifier = ">=13.1,<16.0" }, ] -provides-extras = ["aic", "anthropic", "assemblyai", "asyncai", "aws", "aws-nova-sonic", "azure", "cartesia", "camb", "cerebras", "daily", "deepgram", "deepseek", "elevenlabs", "fal", "fireworks", "fish", "gladia", "google", "gradium", "grok", "groq", "gstreamer", "heygen", "hume", "inworld", "koala", "krisp", "langchain", "livekit", "lmnt", "local", "local-smart-turn", "local-smart-turn-v3", "mcp", "mem0", "mistral", "mlx-whisper", "moondream", "neuphonic", "noisereduce", "nvidia", "openai", "rnnoise", "openpipe", "openrouter", "perplexity", "playht", "qwen", "remote-smart-turn", "rime", "riva", "runner", "sagemaker", "sambanova", "sarvam", "sentry", "silero", "simli", "soniox", "soundfile", "speechmatics", "strands", "tavus", "together", "tracing", "ultravox", "webrtc", "websocket", "websockets-base", "whisper"] +provides-extras = ["aic", "anthropic", "assemblyai", "asyncai", "aws", "aws-nova-sonic", "azure", "cartesia", "camb", "cerebras", "daily", "deepgram", "deepseek", "elevenlabs", "fal", "fireworks", "fish", "gladia", "google", "gradium", "grok", "groq", "gstreamer", "heygen", "hume", "inworld", "koala", "krisp", "langchain", "livekit", "lmnt", "local", "local-smart-turn", "local-smart-turn-v3", "mcp", "mem0", "mistral", "mlx-whisper", "moondream", "neuphonic", "noisereduce", "nvidia", "openai", "rnnoise", "openpipe", "openrouter", "perplexity", "piper", "playht", "qwen", "remote-smart-turn", "rime", "riva", "runner", "sagemaker", "sambanova", "sarvam", "sentry", "silero", "simli", "soniox", "soundfile", "speechmatics", "strands", "tavus", "together", "tracing", "ultravox", "webrtc", "websocket", "websockets-base", "whisper"] [package.metadata.requires-dev] dev = [ @@ -4679,6 +4677,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9c/6e/332b78d1c7888ff426bd528b150aad0da05024f4f91e56502c359726c07b/pipecat_ai_small_webrtc_prebuilt-2.0.4-py3-none-any.whl", hash = "sha256:054b3cee843fe69191859dbb0693560d9ca08f7d57a9ff0457d0bc741f36f4df", size = 585606, upload-time = "2025-12-30T19:14:50.595Z" }, ] +[[package]] +name = "piper-tts" +version = "1.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "onnxruntime" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/9b/c0/d9b5f64869274be3ebc6dc483f13791a3c6ebbc0e37fad4e237a76d5365b/piper_tts-1.3.0-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:0af0c90aeddf762555ed940de1ac576acbefb3623e6d5ca4fb1a70359ee7e65d", size = 13819597, upload-time = "2025-07-10T21:07:22.893Z" }, + { url = "https://files.pythonhosted.org/packages/5b/17/6a059c0a45e582fadd4545ed092294fd0add7c679f6c09440af5cd2678b5/piper_tts-1.3.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:810c91a084d335d32b42928b1ef69d6480cf7e3a5a8b15eff98edd2ef55f2791", size = 13828403, upload-time = "2025-07-10T21:07:25.386Z" }, + { url = "https://files.pythonhosted.org/packages/8c/92/f37e5111440fc6c6336f42f8dab88afaa545394784dc930f808a68883c48/piper_tts-1.3.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8d39f85c3f4b6ade512976849579344fc72595ec613f374dbcf8521716398907", size = 13836863, upload-time = "2025-07-10T21:07:27.616Z" }, + { url = "https://files.pythonhosted.org/packages/2b/73/3d29175cfd93e791baaef3335819778d3f8c8898e2fe16cd0cc8b8163f84/piper_tts-1.3.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:234c25474655b26f3418b84522c815c43e9b1bc8a1fdb13c2b28514290c165f0", size = 13836748, upload-time = "2025-07-10T21:07:29.912Z" }, + { url = "https://files.pythonhosted.org/packages/10/a5/d782d469fc19db9bf19f1725d4a6ef77d2413515b61f5017340688f5d093/piper_tts-1.3.0-cp39-abi3-win_amd64.whl", hash = "sha256:dc6b5be4e15f3c0f4a6067b515bc6202ddf3e2b0c6cbd6c8bdeccab2453c89c7", size = 13826773, upload-time = "2025-07-10T21:07:31.95Z" }, +] + [[package]] name = "platformdirs" version = "4.5.1" From 11daa43b1b898493d7118233c577ad7ee25321e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Wed, 28 Jan 2026 23:54:17 -0800 Subject: [PATCH 3/5] TTSService: resample _stream_audio_frames_from_iterator() input audio if needed --- src/pipecat/services/tts_service.py | 42 ++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/src/pipecat/services/tts_service.py b/src/pipecat/services/tts_service.py index e04c4b649..37aad0372 100644 --- a/src/pipecat/services/tts_service.py +++ b/src/pipecat/services/tts_service.py @@ -24,6 +24,7 @@ from typing import ( from loguru import logger +from pipecat.audio.utils import create_stream_resampler from pipecat.frames.frames import ( AggregatedTextFrame, AggregationType, @@ -202,6 +203,8 @@ class TTSService(AIService): ) self._text_filters = [text_filter] + self._resampler = create_stream_resampler() + self._stop_frame_task: Optional[asyncio.Task] = None self._stop_frame_queue: asyncio.Queue = asyncio.Queue() @@ -505,12 +508,40 @@ class TTSService(AIService): await self._stop_frame_queue.put(frame) async def _stream_audio_frames_from_iterator( - self, iterator: AsyncIterator[bytes], *, strip_wav_header: bool + self, + iterator: AsyncIterator[bytes], + *, + strip_wav_header: bool = False, + in_sample_rate: Optional[int] = None, ) -> AsyncGenerator[Frame, None]: + """Stream audio frames from an async byte iterator with optional resampling. + + For WAV data, use `strip_wav_header=True` to strip the header and + auto-detect the source sample rate. For raw PCM data, pass + `in_sample_rate` directly. Audio is resampled to `self.sample_rate` when + the source rate differs. + + Args: + iterator: Async iterator yielding audio bytes. + strip_wav_header: Strip WAV header and parse source sample rate from it. + in_sample_rate: Source sample rate for raw PCM data. Overrides + WAV-detected rate if both are provided. + + """ buffer = bytearray() + source_sample_rate = in_sample_rate need_to_strip_wav_header = strip_wav_header + + async def maybe_resample(audio: bytes) -> bytes: + if source_sample_rate and source_sample_rate != self.sample_rate: + return await self._resampler.resample(audio, source_sample_rate, self.sample_rate) + return audio + async for chunk in iterator: if need_to_strip_wav_header and chunk.startswith(b"RIFF"): + # Parse sample rate from WAV header (bytes 24-28, little-endian uint32). + if len(chunk) >= 44 and source_sample_rate is None: + source_sample_rate = int.from_bytes(chunk[24:28], "little") chunk = chunk[44:] need_to_strip_wav_header = False @@ -520,19 +551,18 @@ class TTSService(AIService): # Round to nearest even number. aligned_length = len(buffer) & ~1 # 111111111...11110 if aligned_length > 0: - aligned_chunk = buffer[:aligned_length] + aligned_chunk = await maybe_resample(bytes(buffer[:aligned_length])) buffer = buffer[aligned_length:] # keep any leftover byte if len(aligned_chunk) > 0: - frame = TTSAudioRawFrame(bytes(aligned_chunk), self.sample_rate, 1) - yield frame + yield TTSAudioRawFrame(aligned_chunk, self.sample_rate, 1) if len(buffer) > 0: # Make sure we don't need an extra padding byte. if len(buffer) % 2 == 1: buffer.extend(b"\x00") - frame = TTSAudioRawFrame(bytes(buffer), self.sample_rate, 1) - yield frame + audio = await maybe_resample(bytes(buffer)) + yield TTSAudioRawFrame(audio, self.sample_rate, 1) async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection): self._processing_text = False From 5a85e27cc52393ba633b39095aec24308c2a9311 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Wed, 28 Jan 2026 23:55:07 -0800 Subject: [PATCH 4/5] PiperHttpTTSService: allow passing a voice id --- src/pipecat/services/piper/tts.py | 41 +++++++++++++++---------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/src/pipecat/services/piper/tts.py b/src/pipecat/services/piper/tts.py index ca809f469..1de1688b1 100644 --- a/src/pipecat/services/piper/tts.py +++ b/src/pipecat/services/piper/tts.py @@ -13,7 +13,6 @@ from typing import AsyncGenerator, AsyncIterator, Optional import aiohttp from loguru import logger -from pipecat.audio.utils import create_stream_resampler from pipecat.frames.frames import ( ErrorFrame, Frame, @@ -63,8 +62,6 @@ class PiperTTSService(TTSService): self._voice_id = voice_id - self._resampler = create_stream_resampler() - download_dir = download_dir or Path.cwd() model_file = f"{voice_id}.onnx" @@ -105,17 +102,12 @@ class PiperTTSService(TTSService): except StopIteration: return None - async def async_iterator(iterator, sample_rate: int) -> AsyncIterator[bytes]: + async def async_iterator(iterator) -> AsyncIterator[bytes]: while True: item = await asyncio.to_thread(async_next, iterator) if item is None: return - - audio_data = await self._resampler.resample( - item.audio_int16_bytes, sample_rate, self.sample_rate - ) - - yield audio_data + yield item.audio_int16_bytes logger.debug(f"{self}: Generating TTS [{text}]") @@ -127,8 +119,8 @@ class PiperTTSService(TTSService): yield TTSStartedFrame() async for frame in self._stream_audio_frames_from_iterator( - async_iterator(self._voice.synthesize(text), self._voice.config.sample_rate), - strip_wav_header=False, + async_iterator(self._voice.synthesize(text)), + in_sample_rate=self._voice.config.sample_rate, ): await self.stop_ttfb_metrics() yield frame @@ -143,6 +135,12 @@ class PiperTTSService(TTSService): # This assumes a running TTS service running: # https://github.com/OHF-Voice/piper1-gpl/blob/main/docs/API_HTTP.md +# +# Usage: +# +# $ uv pip install "piper-tts[http]" +# $ uv run python -m piper.http_server -m en_US-ryan-high +# class PiperHttpTTSService(TTSService): """Piper HTTP TTS service implementation. @@ -156,9 +154,7 @@ class PiperHttpTTSService(TTSService): *, base_url: str, aiohttp_session: aiohttp.ClientSession, - # When using Piper, the sample rate of the generated audio depends on the - # voice model being used. - sample_rate: Optional[int] = None, + voice_id: Optional[str] = None, **kwargs, ): """Initialize the Piper TTS service. @@ -166,10 +162,10 @@ class PiperHttpTTSService(TTSService): Args: base_url: Base URL for the Piper TTS HTTP server. aiohttp_session: aiohttp ClientSession for making HTTP requests. - sample_rate: Output sample rate. If None, uses the voice model's native rate. + voice_id: Piper voice model identifier (e.g. `en_US-ryan-high`). **kwargs: Additional arguments passed to the parent TTSService. """ - super().__init__(sample_rate=sample_rate, **kwargs) + super().__init__(**kwargs) if base_url.endswith("/"): logger.warning("Base URL ends with a slash, this is not allowed.") @@ -177,7 +173,7 @@ class PiperHttpTTSService(TTSService): self._base_url = base_url self._session = aiohttp_session - self._settings = {"base_url": base_url} + self._model_id = voice_id def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -204,9 +200,12 @@ class PiperHttpTTSService(TTSService): try: await self.start_ttfb_metrics() - async with self._session.post( - self._base_url, json={"text": text}, headers=headers - ) as response: + data = { + "text": text, + "voice": self._model_id, + } + + async with self._session.post(self._base_url, json=data, headers=headers) as response: if response.status != 200: error = await response.text() yield ErrorFrame( From bd005870920a6598dfaa5d471dfa770619b2a49c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Wed, 28 Jan 2026 23:59:37 -0800 Subject: [PATCH 5/5] changelog: add files for 3585 --- changelog/3585.added.md | 1 + changelog/3585.fixed.md | 1 + 2 files changed, 2 insertions(+) create mode 100644 changelog/3585.added.md create mode 100644 changelog/3585.fixed.md diff --git a/changelog/3585.added.md b/changelog/3585.added.md new file mode 100644 index 000000000..7335d2c18 --- /dev/null +++ b/changelog/3585.added.md @@ -0,0 +1 @@ +- Added local `PiperTTSService` for offline text-to-speech using Piper voice models. The existing HTTP-based service has been renamed to `PiperHttpTTSService`. diff --git a/changelog/3585.fixed.md b/changelog/3585.fixed.md new file mode 100644 index 000000000..4993ed2f7 --- /dev/null +++ b/changelog/3585.fixed.md @@ -0,0 +1 @@ +- Fixed `PiperHttpTTSService` (olf `PiperTTSService`) to resample audio output based on the model's sample rate parsed from the WAV header.