diff --git a/changelog/4045.fixed.md b/changelog/4045.fixed.md new file mode 100644 index 000000000..ecae78969 --- /dev/null +++ b/changelog/4045.fixed.md @@ -0,0 +1 @@ +Fixed `SonioxSTTService` crash when `language_hints` contains plain strings instead of `Language` enum values. diff --git a/src/pipecat/services/soniox/stt.py b/src/pipecat/services/soniox/stt.py index f123d850c..5163ef113 100644 --- a/src/pipecat/services/soniox/stt.py +++ b/src/pipecat/services/soniox/stt.py @@ -27,7 +27,7 @@ from pipecat.processors.frame_processor import FrameDirection from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven from pipecat.services.stt_latency import SONIOX_TTFS_P99 from pipecat.services.stt_service import WebsocketSTTService -from pipecat.transcriptions.language import Language +from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.time import time_now_iso8601 from pipecat.utils.tracing.service_decorators import traced_stt @@ -118,14 +118,75 @@ def is_end_token(token: dict) -> bool: def language_to_soniox_language(language: Language) -> str: - """Pipecat Language enum uses same ISO 2-letter codes as Soniox, except with added regional variants. + """Convert a Pipecat Language to a Soniox language code. - For a list of all supported languages, see: https://soniox.com/docs/speech-to-text/core-concepts/supported-languages + For a list of all supported languages, see: + https://soniox.com/docs/speech-to-text/core-concepts/supported-languages """ - lang_str = str(language.value).lower() - if "-" in lang_str: - return lang_str.split("-")[0] - return lang_str + LANGUAGE_MAP = { + Language.AF: "af", + Language.AR: "ar", + Language.AZ: "az", + Language.BE: "be", + Language.BG: "bg", + Language.BN: "bn", + Language.BS: "bs", + Language.CA: "ca", + Language.CS: "cs", + Language.CY: "cy", + Language.DA: "da", + Language.DE: "de", + Language.EL: "el", + Language.EN: "en", + Language.ES: "es", + Language.ET: "et", + Language.EU: "eu", + Language.FA: "fa", + Language.FI: "fi", + Language.FR: "fr", + Language.GL: "gl", + Language.GU: "gu", + Language.HE: "he", + Language.HI: "hi", + Language.HR: "hr", + Language.HU: "hu", + Language.ID: "id", + Language.IT: "it", + Language.JA: "ja", + Language.KA: "ka", + Language.KK: "kk", + Language.KN: "kn", + Language.KO: "ko", + Language.LT: "lt", + Language.LV: "lv", + Language.MK: "mk", + Language.ML: "ml", + Language.MR: "mr", + Language.MS: "ms", + Language.NL: "nl", + Language.NO: "no", + Language.PA: "pa", + Language.PL: "pl", + Language.PT: "pt", + Language.RO: "ro", + Language.RU: "ru", + Language.SK: "sk", + Language.SL: "sl", + Language.SQ: "sq", + Language.SR: "sr", + Language.SV: "sv", + Language.SW: "sw", + Language.TA: "ta", + Language.TE: "te", + Language.TH: "th", + Language.TL: "tl", + Language.TR: "tr", + Language.UK: "uk", + Language.UR: "ur", + Language.VI: "vi", + Language.ZH: "zh", + } + return resolve_language(language, LANGUAGE_MAP, use_base_code=True) def _prepare_language_hints( diff --git a/src/pipecat/transcriptions/language.py b/src/pipecat/transcriptions/language.py index a79a85166..1980590e3 100644 --- a/src/pipecat/transcriptions/language.py +++ b/src/pipecat/transcriptions/language.py @@ -631,13 +631,13 @@ def resolve_language( return result # Not in map - fall back with warning - lang_str = str(language.value) + lang_str = str(language) if use_base_code: # Extract base code (e.g., "en" from "en-US") base_code = lang_str.split("-")[0].lower() - logger.warning(f"Language {language.value} not verified. Using base code '{base_code}'.") + logger.warning(f"Language {language} not verified. Using base code '{base_code}'.") return base_code else: - logger.warning(f"Language {language.value} not verified. Using '{lang_str}'.") + logger.warning(f"Language {language} not verified. Using '{lang_str}'.") return lang_str