diff --git a/CHANGELOG.md b/CHANGELOG.md index 24484687f..ab969fe52 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- Added supprt for Sarvam Speech-to-Text service (`SarvamSTTService`) with streaming WebSocket + support for `saarika` (STT) and `saaras` (STT-translate) models. + - Added `generation_config` parameter support to `CartesiaTTSService` and `CartesiaHttpTTSService` for Cartesia Sonic-3 models. Includes a new `GenerationConfig` class with `volume` (0.5-2.0), `speed` (0.6-1.5), diff --git a/examples/foundational/07z-interruptible-sarvam-http.py b/examples/foundational/07z-interruptible-sarvam-http.py index 29851d254..20b28c94c 100644 --- a/examples/foundational/07z-interruptible-sarvam-http.py +++ b/examples/foundational/07z-interruptible-sarvam-http.py @@ -22,8 +22,8 @@ from pipecat.processors.aggregators.llm_context import LLMContext from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport -from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService +from pipecat.services.sarvam.stt import SarvamSTTService from pipecat.services.sarvam.tts import SarvamHttpTTSService from pipecat.transcriptions.language import Language from pipecat.transports.base_transport import BaseTransport, TransportParams @@ -63,7 +63,11 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): # Create an HTTP session async with aiohttp.ClientSession() as session: - stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + stt = SarvamSTTService( + api_key=os.getenv("SARVAM_API_KEY"), + model="saarika:v2.5", + params=SarvamSTTService.InputParams(language=None), + ) tts = SarvamHttpTTSService( api_key=os.getenv("SARVAM_API_KEY"), diff --git a/examples/foundational/07z-interruptible-sarvam.py b/examples/foundational/07z-interruptible-sarvam.py index 44e0b7844..9b60b2a24 100644 --- a/examples/foundational/07z-interruptible-sarvam.py +++ b/examples/foundational/07z-interruptible-sarvam.py @@ -24,8 +24,8 @@ from pipecat.processors.aggregators.llm_context import LLMContext from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport -from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService +from pipecat.services.sarvam.stt import SarvamSTTService from pipecat.services.sarvam.tts import SarvamTTSService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams @@ -62,7 +62,12 @@ transport_params = { async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): logger.info(f"Starting bot") - stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + stt = SarvamSTTService( + api_key=os.getenv("SARVAM_API_KEY"), + model="saarika:v2.5", + # Example: set Hindi; omit or change via set_language at runtime + params=SarvamSTTService.InputParams(language=None), + ) tts = SarvamTTSService( api_key=os.getenv("SARVAM_API_KEY"),