Fix SonioxSTTService crash when language_hints contains plain strings (#4045)

Refactor language_to_soniox_language to use resolve_language + LANGUAGE_MAP
pattern consistent with other services. Fix resolve_language fallback to use
str(language) instead of language.value so plain strings don't crash.
This commit is contained in:
Mark Backman
2026-03-16 19:45:03 -04:00
parent b437cbe126
commit 3b8d040e41
3 changed files with 72 additions and 10 deletions

1
changelog/4045.fixed.md Normal file
View File

@@ -0,0 +1 @@
Fixed `SonioxSTTService` crash when `language_hints` contains plain strings instead of `Language` enum values.

View File

@@ -27,7 +27,7 @@ from pipecat.processors.frame_processor import FrameDirection
from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven
from pipecat.services.stt_latency import SONIOX_TTFS_P99
from pipecat.services.stt_service import WebsocketSTTService
from pipecat.transcriptions.language import Language
from pipecat.transcriptions.language import Language, resolve_language
from pipecat.utils.time import time_now_iso8601
from pipecat.utils.tracing.service_decorators import traced_stt
@@ -118,14 +118,75 @@ def is_end_token(token: dict) -> bool:
def language_to_soniox_language(language: Language) -> str:
"""Pipecat Language enum uses same ISO 2-letter codes as Soniox, except with added regional variants.
"""Convert a Pipecat Language to a Soniox language code.
For a list of all supported languages, see: https://soniox.com/docs/speech-to-text/core-concepts/supported-languages
For a list of all supported languages, see:
https://soniox.com/docs/speech-to-text/core-concepts/supported-languages
"""
lang_str = str(language.value).lower()
if "-" in lang_str:
return lang_str.split("-")[0]
return lang_str
LANGUAGE_MAP = {
Language.AF: "af",
Language.AR: "ar",
Language.AZ: "az",
Language.BE: "be",
Language.BG: "bg",
Language.BN: "bn",
Language.BS: "bs",
Language.CA: "ca",
Language.CS: "cs",
Language.CY: "cy",
Language.DA: "da",
Language.DE: "de",
Language.EL: "el",
Language.EN: "en",
Language.ES: "es",
Language.ET: "et",
Language.EU: "eu",
Language.FA: "fa",
Language.FI: "fi",
Language.FR: "fr",
Language.GL: "gl",
Language.GU: "gu",
Language.HE: "he",
Language.HI: "hi",
Language.HR: "hr",
Language.HU: "hu",
Language.ID: "id",
Language.IT: "it",
Language.JA: "ja",
Language.KA: "ka",
Language.KK: "kk",
Language.KN: "kn",
Language.KO: "ko",
Language.LT: "lt",
Language.LV: "lv",
Language.MK: "mk",
Language.ML: "ml",
Language.MR: "mr",
Language.MS: "ms",
Language.NL: "nl",
Language.NO: "no",
Language.PA: "pa",
Language.PL: "pl",
Language.PT: "pt",
Language.RO: "ro",
Language.RU: "ru",
Language.SK: "sk",
Language.SL: "sl",
Language.SQ: "sq",
Language.SR: "sr",
Language.SV: "sv",
Language.SW: "sw",
Language.TA: "ta",
Language.TE: "te",
Language.TH: "th",
Language.TL: "tl",
Language.TR: "tr",
Language.UK: "uk",
Language.UR: "ur",
Language.VI: "vi",
Language.ZH: "zh",
}
return resolve_language(language, LANGUAGE_MAP, use_base_code=True)
def _prepare_language_hints(

View File

@@ -631,13 +631,13 @@ def resolve_language(
return result
# Not in map - fall back with warning
lang_str = str(language.value)
lang_str = str(language)
if use_base_code:
# Extract base code (e.g., "en" from "en-US")
base_code = lang_str.split("-")[0].lower()
logger.warning(f"Language {language.value} not verified. Using base code '{base_code}'.")
logger.warning(f"Language {language} not verified. Using base code '{base_code}'.")
return base_code
else:
logger.warning(f"Language {language.value} not verified. Using '{lang_str}'.")
logger.warning(f"Language {language} not verified. Using '{lang_str}'.")
return lang_str