Compare commits

...

2 Commits

Author SHA1 Message Date
James Hush
a1e6807776 docs: add changelog entry for pause_frame_processing parameter 2025-12-03 14:41:46 +01:00
James Hush
3d8bec484f feat(cartesia): make pause_frame_processing configurable in CartesiaTTSService
Add pause_frame_processing parameter to CartesiaTTSService constructor
to allow users to disable frame processing pausing during TTS generation.

This is useful for use cases where text aggregation is disabled and
users want to send text directly to the TTS service without pausing
incoming frame processing.
2025-12-03 14:40:36 +01:00
2 changed files with 6 additions and 1 deletions

View File

@@ -9,6 +9,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added
- Added `pause_frame_processing` parameter to `CartesiaTTSService` to allow
disabling frame processing pausing during TTS generation.
- Added `wait_for_all` argument to the base `LLMService`. When enabled, this
ensures all function calls complete before returning results to the LLM (i.e.,
before running a new inference with those results).

View File

@@ -234,6 +234,7 @@ class CartesiaTTSService(AudioContextWordTTSService):
params: Optional[InputParams] = None,
text_aggregator: Optional[BaseTextAggregator] = None,
aggregate_sentences: Optional[bool] = True,
pause_frame_processing: Optional[bool] = True,
**kwargs,
):
"""Initialize the Cartesia TTS service.
@@ -254,6 +255,7 @@ class CartesiaTTSService(AudioContextWordTTSService):
Use an LLMTextProcessor before the TTSService for custom text aggregation.
aggregate_sentences: Whether to aggregate sentences within the TTSService.
pause_frame_processing: Whether to pause processing frames while receiving audio.
**kwargs: Additional arguments passed to the parent service.
"""
# Aggregating sentences still gives cleaner-sounding results and fewer
@@ -269,7 +271,7 @@ class CartesiaTTSService(AudioContextWordTTSService):
super().__init__(
aggregate_sentences=aggregate_sentences,
push_text_frames=False,
pause_frame_processing=True,
pause_frame_processing=pause_frame_processing,
sample_rate=sample_rate,
text_aggregator=text_aggregator,
**kwargs,