transcriptions: added more languages

This commit is contained in:
Aleix Conchillo Flaqué
2024-08-26 11:12:27 -07:00
parent a253606d50
commit fd3fdacdee
3 changed files with 55 additions and 13 deletions

View File

@@ -26,7 +26,7 @@ from pipecat.frames.frames import (
LLMFullResponseEndFrame
)
from pipecat.processors.frame_processor import FrameDirection
from pipecat.transcriptions.languages import Language
from pipecat.transcriptions.language import Language
from pipecat.services.ai_services import TTSService
from loguru import logger
@@ -43,10 +43,20 @@ except ModuleNotFoundError as e:
def language_to_cartesia_language(language: Language) -> str | None:
match language:
case Language.DE:
return "de"
case Language.EN:
return "en"
case Language.ES:
return "es"
case Language.FR:
return "fr"
case Language.JA:
return "ja"
case Language.PT:
return "pt"
case Language.ZH:
return "zh"
return None

View File

@@ -19,5 +19,46 @@ else:
class Language(StrEnum):
EN = "en"
ES = "es"
BG = "bg" # Bulgarian
CA = "ca" # Catalan
ZH = "zh" # Chinese simplified
ZH_TW = "zh-TW" # Chinese traditional
CS = "cs" # Czech
DA = "da" # Danish
NL = "nl" # Dutch
EN = "en" # English
EN_US = "en-US" # English (USA)
EN_AU = "en-AU" # English (Australia)
EN_GB = "en-GB" # English (Great Britain)
EN_NZ = "en-NZ" # English (New Zealand)
EN_IN = "en-IN" # English (India)
ET = "et" # Estonian
FI = "fi" # Finnish
NL_BE = "nl-BE" # Flemmish
FR = "fr" # French
FR_CA = "fr-CA" # French (Canada)
DE = "de" # German
DE_CH = "de-CH" # German (Switzerland)
EL = "el" # Greek
HI = "hi" # Hindi
HU = "hu" # Hungarian
ID = "id" # Indonesian
IT = "it" # Italian
JA = "ja" # Japanese
KO = "ko" # Korean
LV = "lv" # Latvian
LT = "lt" # Lithuanian
MS = "ms" # Malay
NO = "no" # Norwegian
PL = "pl" # Polish
PT = "pt" # Portuguese
PT_BR = "pt-BR" # Portuguese (Brazil)
RO = "ro" # Romanian
RU = "ru" # Russian
SK = "sk" # Slovak
ES = "es" # Spanish
SV = "sv" # Swedish
TH = "th" # Thai
TR = "tr" # Turkish
UK = "uk" # Ukrainian
VI = "vi" # Vietnamese

View File

@@ -747,15 +747,6 @@ class DailyOutputTransport(BaseOutputTransport):
await self._client.write_frame_to_camera(frame)
def daily_language_to_language(language: str) -> Language | None:
match language:
case "en":
return Language.EN
case "es":
return Language.ES
return None
class DailyTransport(BaseTransport):
def __init__(
@@ -962,7 +953,7 @@ class DailyTransport(BaseTransport):
is_final = message["rawResponse"]["is_final"]
try:
language = message["rawResponse"]["channel"]["alternatives"][0]["languages"][0]
language = daily_language_to_language(language)
language = Language(language)
except KeyError:
language = None
if is_final: