Merge pull request #1497 from pipecat-ai/mb/gladia-languages

Align languages with Gladia's supported languages, remove audio_enhancer option
This commit is contained in:
Mark Backman
2025-04-01 11:54:24 -04:00
committed by GitHub
4 changed files with 40 additions and 14 deletions

View File

@@ -70,7 +70,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
`pipecat.services.[ai_service,image_service,llm_service,stt_service,vision_service]`.
- `GladiaSTTService` now uses the `solaria-1` model by default. Other params
use Gladia's default values.
use Gladia's default values. Added support for more language codes.
### Fixed

View File

@@ -27,11 +27,9 @@ class PreProcessingConfig(BaseModel):
"""Configuration for audio pre-processing options.
Attributes:
audio_enhancer: Whether to apply audio enhancement
speech_threshold: Sensitivity for speech detection (0-1)
"""
audio_enhancer: Optional[bool] = None
speech_threshold: Optional[float] = None

View File

@@ -48,8 +48,12 @@ def language_to_gladia_language(language: Language) -> Optional[str]:
Language.AR: "ar",
Language.AS: "as",
Language.AZ: "az",
Language.BA: "ba",
Language.BE: "be",
Language.BG: "bg",
Language.BN: "bn",
Language.BO: "bo",
Language.BR: "br",
Language.BS: "bs",
Language.CA: "ca",
Language.CS: "cs",
@@ -63,13 +67,16 @@ def language_to_gladia_language(language: Language) -> Optional[str]:
Language.EU: "eu",
Language.FA: "fa",
Language.FI: "fi",
Language.FO: "fo",
Language.FR: "fr",
Language.GA: "ga",
Language.GL: "gl",
Language.GU: "gu",
Language.HA: "ha",
Language.HAW: "haw",
Language.HE: "he",
Language.HI: "hi",
Language.HR: "hr",
Language.HT: "ht",
Language.HU: "hu",
Language.HY: "hy",
Language.ID: "id",
@@ -82,29 +89,38 @@ def language_to_gladia_language(language: Language) -> Optional[str]:
Language.KM: "km",
Language.KN: "kn",
Language.KO: "ko",
Language.LA: "la",
Language.LB: "lb",
Language.LN: "ln",
Language.LO: "lo",
Language.LT: "lt",
Language.LV: "lv",
Language.MG: "mg",
Language.MI: "mi",
Language.MK: "mk",
Language.ML: "ml",
Language.MN: "mn",
Language.MR: "mr",
Language.MS: "ms",
Language.MT: "mt",
Language.MY: "my",
Language.MY_MR: "mymr",
Language.NE: "ne",
Language.NL: "nl",
Language.NN: "nn",
Language.NO: "no",
Language.OR: "or",
Language.OC: "oc",
Language.PA: "pa",
Language.PL: "pl",
Language.PS: "ps",
Language.PT: "pt",
Language.RO: "ro",
Language.RU: "ru",
Language.SA: "sa",
Language.SD: "sd",
Language.SI: "si",
Language.SK: "sk",
Language.SL: "sl",
Language.SN: "sn",
Language.SO: "so",
Language.SQ: "sq",
Language.SR: "sr",
@@ -113,14 +129,19 @@ def language_to_gladia_language(language: Language) -> Optional[str]:
Language.SW: "sw",
Language.TA: "ta",
Language.TE: "te",
Language.TG: "tg",
Language.TH: "th",
Language.TK: "tk",
Language.TL: "tl",
Language.TR: "tr",
Language.TT: "tt",
Language.UK: "uk",
Language.UR: "ur",
Language.UZ: "uz",
Language.VI: "vi",
Language.YI: "yi",
Language.YO: "yo",
Language.ZH: "zh",
Language.ZU: "zu",
}
result = BASE_LANGUAGES.get(language)

View File

@@ -182,6 +182,9 @@ class Language(StrEnum):
GA = "ga"
GA_IE = "ga-IE"
# Gaelic
GD = "gd"
# Galician
GL = "gl"
GL_ES = "gl-ES"
@@ -193,6 +196,9 @@ class Language(StrEnum):
# Hausa
HA = "ha"
# Hawaiian
HAW = "haw"
# Hebrew
HE = "he"
HE_IL = "he-IL"
@@ -288,6 +294,9 @@ class Language(StrEnum):
# Malagasy
MG = "mg"
# Maori
MI = "mi"
# Macedonian
MK = "mk"
MK_MK = "mk-MK"
@@ -300,9 +309,6 @@ class Language(StrEnum):
MN = "mn"
MN_MN = "mn-MN"
# Maori
MI = "mi"
# Marathi
MR = "mr"
MR_IN = "mr-IN"
@@ -318,6 +324,7 @@ class Language(StrEnum):
# Burmese
MY = "my"
MY_MM = "my-MM"
MY_MR = "mymr"
# Norwegian
NB = "nb" # Norwegian Bokmål
@@ -414,9 +421,6 @@ class Language(StrEnum):
SW_KE = "sw-KE"
SW_TZ = "sw-TZ"
# Tagalog
TL = "tl"
# Tamil
TA = "ta"
TA_IN = "ta-IN"
@@ -438,6 +442,9 @@ class Language(StrEnum):
# Turkmen
TK = "tk"
# Tagalog
TL = "tl"
# Turkish
TR = "tr"
TR_TR = "tr-TR"
@@ -489,7 +496,7 @@ class Language(StrEnum):
ZH_TW = "zh-TW"
# Xhosa
XH = "xh"
XH = "xh-ZA"
# Zulu
ZU = "zu"