diff --git a/CHANGELOG.md b/CHANGELOG.md index 87381abf8..506a9305e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -70,7 +70,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 `pipecat.services.[ai_service,image_service,llm_service,stt_service,vision_service]`. - `GladiaSTTService` now uses the `solaria-1` model by default. Other params - use Gladia's default values. + use Gladia's default values. Added support for more language codes. ### Fixed diff --git a/src/pipecat/services/gladia/config.py b/src/pipecat/services/gladia/config.py index 6014dd576..275554418 100644 --- a/src/pipecat/services/gladia/config.py +++ b/src/pipecat/services/gladia/config.py @@ -27,11 +27,9 @@ class PreProcessingConfig(BaseModel): """Configuration for audio pre-processing options. Attributes: - audio_enhancer: Whether to apply audio enhancement speech_threshold: Sensitivity for speech detection (0-1) """ - audio_enhancer: Optional[bool] = None speech_threshold: Optional[float] = None diff --git a/src/pipecat/services/gladia/stt.py b/src/pipecat/services/gladia/stt.py index 80affacb2..03dd35ff2 100644 --- a/src/pipecat/services/gladia/stt.py +++ b/src/pipecat/services/gladia/stt.py @@ -48,8 +48,12 @@ def language_to_gladia_language(language: Language) -> Optional[str]: Language.AR: "ar", Language.AS: "as", Language.AZ: "az", + Language.BA: "ba", + Language.BE: "be", Language.BG: "bg", Language.BN: "bn", + Language.BO: "bo", + Language.BR: "br", Language.BS: "bs", Language.CA: "ca", Language.CS: "cs", @@ -63,13 +67,16 @@ def language_to_gladia_language(language: Language) -> Optional[str]: Language.EU: "eu", Language.FA: "fa", Language.FI: "fi", + Language.FO: "fo", Language.FR: "fr", - Language.GA: "ga", Language.GL: "gl", Language.GU: "gu", + Language.HA: "ha", + Language.HAW: "haw", Language.HE: "he", Language.HI: "hi", Language.HR: "hr", + Language.HT: "ht", Language.HU: "hu", Language.HY: "hy", Language.ID: "id", @@ -82,29 +89,38 @@ def language_to_gladia_language(language: Language) -> Optional[str]: Language.KM: "km", Language.KN: "kn", Language.KO: "ko", + Language.LA: "la", + Language.LB: "lb", + Language.LN: "ln", Language.LO: "lo", Language.LT: "lt", Language.LV: "lv", + Language.MG: "mg", + Language.MI: "mi", Language.MK: "mk", Language.ML: "ml", Language.MN: "mn", Language.MR: "mr", Language.MS: "ms", Language.MT: "mt", - Language.MY: "my", + Language.MY_MR: "mymr", Language.NE: "ne", Language.NL: "nl", + Language.NN: "nn", Language.NO: "no", - Language.OR: "or", + Language.OC: "oc", Language.PA: "pa", Language.PL: "pl", Language.PS: "ps", Language.PT: "pt", Language.RO: "ro", Language.RU: "ru", + Language.SA: "sa", + Language.SD: "sd", Language.SI: "si", Language.SK: "sk", Language.SL: "sl", + Language.SN: "sn", Language.SO: "so", Language.SQ: "sq", Language.SR: "sr", @@ -113,14 +129,19 @@ def language_to_gladia_language(language: Language) -> Optional[str]: Language.SW: "sw", Language.TA: "ta", Language.TE: "te", + Language.TG: "tg", Language.TH: "th", + Language.TK: "tk", + Language.TL: "tl", Language.TR: "tr", + Language.TT: "tt", Language.UK: "uk", Language.UR: "ur", Language.UZ: "uz", Language.VI: "vi", + Language.YI: "yi", + Language.YO: "yo", Language.ZH: "zh", - Language.ZU: "zu", } result = BASE_LANGUAGES.get(language) diff --git a/src/pipecat/transcriptions/language.py b/src/pipecat/transcriptions/language.py index 75f714a72..197564740 100644 --- a/src/pipecat/transcriptions/language.py +++ b/src/pipecat/transcriptions/language.py @@ -182,6 +182,9 @@ class Language(StrEnum): GA = "ga" GA_IE = "ga-IE" + # Gaelic + GD = "gd" + # Galician GL = "gl" GL_ES = "gl-ES" @@ -193,6 +196,9 @@ class Language(StrEnum): # Hausa HA = "ha" + # Hawaiian + HAW = "haw" + # Hebrew HE = "he" HE_IL = "he-IL" @@ -288,6 +294,9 @@ class Language(StrEnum): # Malagasy MG = "mg" + # Maori + MI = "mi" + # Macedonian MK = "mk" MK_MK = "mk-MK" @@ -300,9 +309,6 @@ class Language(StrEnum): MN = "mn" MN_MN = "mn-MN" - # Maori - MI = "mi" - # Marathi MR = "mr" MR_IN = "mr-IN" @@ -318,6 +324,7 @@ class Language(StrEnum): # Burmese MY = "my" MY_MM = "my-MM" + MY_MR = "mymr" # Norwegian NB = "nb" # Norwegian Bokmål @@ -414,9 +421,6 @@ class Language(StrEnum): SW_KE = "sw-KE" SW_TZ = "sw-TZ" - # Tagalog - TL = "tl" - # Tamil TA = "ta" TA_IN = "ta-IN" @@ -438,6 +442,9 @@ class Language(StrEnum): # Turkmen TK = "tk" + # Tagalog + TL = "tl" + # Turkish TR = "tr" TR_TR = "tr-TR" @@ -489,7 +496,7 @@ class Language(StrEnum): ZH_TW = "zh-TW" # Xhosa - XH = "xh" + XH = "xh-ZA" # Zulu ZU = "zu"