Files
pipecat/tests/test_cartesia_tts.py
filipi87 81bb81c1d0 test: add automated tests for word tracking, frame sequencing, and Cartesia TTS
Adds tests for AggregatedFrameSequencer, WordCompletionTracker, and
word_timestamp_utils (including CJK language scenarios). Updates existing
Cartesia TTS and TTS frame ordering tests to cover the new behaviours.
2026-05-20 10:03:26 -03:00

112 lines
3.6 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#
# Copyright (c) 2024-2026, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
from pipecat.services.cartesia.tts import CartesiaTTSService
from pipecat.services.settings import TTSSettings
from pipecat.utils.string import TextPartForConcatenation, concatenate_aggregated_text
def _service(language: str) -> CartesiaTTSService:
service = CartesiaTTSService.__new__(CartesiaTTSService)
service._settings = TTSSettings(language=language)
return service
def _process_word_timestamps(
words: list[str], starts: list[float], language: str
) -> list[tuple[str, float]]:
return _service(language)._normalize_word_timestamps(words, starts)
def _concatenate_processed_timestamps(
timestamp_groups: list[tuple[list[str], list[float]]], language: str
) -> str:
service = _service(language)
text_parts = []
for words, starts in timestamp_groups:
processed_timestamps = service._normalize_word_timestamps(words, starts)
includes_inter_frame_spaces = service._word_timestamps_include_inter_frame_spaces()
text_parts.extend(
TextPartForConcatenation(
word,
includes_inter_part_spaces=includes_inter_frame_spaces,
)
for word, _timestamp in processed_timestamps
)
return concatenate_aggregated_text(text_parts)
def test_cartesia_chinese_word_timestamps_join_without_spaces():
assert _process_word_timestamps(
words=["", "", ""],
starts=[0.0, 0.1, 0.2],
language="zh",
) == [("你好。", 0.0)]
def test_cartesia_japanese_word_timestamps_join_without_spaces():
assert _process_word_timestamps(
words=["", "", "", "", "", ""],
starts=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5],
language="ja",
) == [("こんにちは。", 0.0)]
def test_cartesia_korean_word_timestamps_preserve_words_and_timestamps():
assert _process_word_timestamps(
words=["안녕하세요", "반갑습니다"],
starts=[0.0, 0.2],
language="ko",
) == [("안녕하세요", 0.0), ("반갑습니다", 0.2)]
def test_cartesia_korean_word_timestamps_do_not_join_latin_and_hangul():
assert _process_word_timestamps(
words=["AI", "어시스턴트입니다."],
starts=[3.7026982, 4.1999383],
language="ko",
) == [("AI", 3.7026982), ("어시스턴트입니다.", 4.1999383)]
def test_cartesia_japanese_timestamp_groups_reassemble_without_spaces():
assert (
_concatenate_processed_timestamps(
[
(["", "", "", "", "", "", ""], [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7]),
(["", "", "", "", ""], [1.0, 1.1, 1.2, 1.3, 1.4]),
],
language="ja",
)
== "こんにちは、私はあなたの"
)
def test_cartesia_chinese_timestamp_groups_reassemble_without_spaces():
assert (
_concatenate_processed_timestamps(
[
(["", "", "", "", ""], [0.1, 0.2, 0.3, 0.4, 0.5]),
(["", "", "", ""], [1.0, 1.1, 1.2, 1.3]),
],
language="zh",
)
== "你好,我是你的智能"
)
def test_cartesia_korean_timestamp_groups_reassemble_with_spaces():
assert (
_concatenate_processed_timestamps(
[
(["저는"], [1.6]),
(["여러분의"], [1.8]),
(["AI", "어시스턴트입니다."], [3.7, 4.2]),
],
language="ko",
)
== "저는 여러분의 AI 어시스턴트입니다."
)