diff --git a/CHANGELOG.md b/CHANGELOG.md index b80f47702..989cdb14f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -304,6 +304,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added `on_transcription_stopped` and `on_transcription_error` to Daily callbacks. +- Added SSML reserved character escaping to `AzureBaseTTSService` to properly handle special characters in text sent to Azure TTS. This fixes an issue where characters like `&`, `<`, `>`, `"`, and `'` in LLM-generated text would cause TTS failures. +- ### Changed - Changed the default `url` for `NeuphonicTTSService` to diff --git a/src/pipecat/services/azure/tts.py b/src/pipecat/services/azure/tts.py index 0cf029c6b..15b4f1256 100644 --- a/src/pipecat/services/azure/tts.py +++ b/src/pipecat/services/azure/tts.py @@ -68,6 +68,16 @@ class AzureBaseTTSService(TTSService): construction, voice configuration, and parameter management. """ + # Define SSML escape mappings based on SSML reserved characters + # See - https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-structure + SSML_ESCAPE_CHARS = { + "&": "&", + "<": "<", + ">": ">", + '"': """, + "'": "'", + } + class InputParams(BaseModel): """Input parameters for Azure TTS voice configuration. @@ -154,6 +164,10 @@ class AzureBaseTTSService(TTSService): def _construct_ssml(self, text: str) -> str: language = self._settings["language"] + + # Escape special characters + escaped_text = self._escape_text(text) + ssml = ( f"" - ssml += text + ssml += escaped_text if self._settings["emphasis"]: ssml += "" @@ -197,6 +211,27 @@ class AzureBaseTTSService(TTSService): return ssml + def _escape_text(self, text: str) -> str: + """Escapes XML/SSML reserved characters according to Microsoft documentation. + + This method escapes the following characters: + - & becomes & + - < becomes < + - > becomes > + - " becomes " + - ' becomes ' + + Args: + text: The text to escape. + + Returns: + The escaped text. + """ + escaped_text = text + for char, escape_code in AzureBaseTTSService.SSML_ESCAPE_CHARS.items(): + escaped_text = escaped_text.replace(char, escape_code) + return escaped_text + class AzureTTSService(AzureBaseTTSService): """Azure Cognitive Services streaming TTS service.