From 5cc1d8a024fa9388f9681001790817e034a44392 Mon Sep 17 00:00:00 2001 From: shreyas-sarvam Date: Mon, 13 Oct 2025 10:18:15 +0530 Subject: [PATCH] refactor: Update dependencies and improve logging --- pyproject.toml | 3 +- src/pipecat/services/sarvam/stt.py | 76 ++---------------------------- uv.lock | 28 +++++++++-- 3 files changed, 29 insertions(+), 78 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 1aa72605d..f71358ff1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,7 +38,6 @@ dependencies = [ # Pinning numba to resolve package dependencies "numba==0.61.2", "wait_for2>=0.4.1; python_version<'3.12'", - "sarvamai==0.1.21", ] [project.urls] @@ -94,7 +93,7 @@ rime = [ "pipecat-ai[websockets-base]" ] riva = [ "nvidia-riva-client~=2.21.1" ] runner = [ "python-dotenv>=1.0.0,<2.0.0", "uvicorn>=0.32.0,<1.0.0", "fastapi>=0.115.6,<0.117.0", "pipecat-ai-small-webrtc-prebuilt>=1.0.0"] sambanova = [] -sarvam = [ "sarvamai==0.1.21", "websockets>=13.1,<15.0" ] +sarvam = [ "sarvamai==0.1.21", "pipecat-ai[websockets-base]" ] sentry = [ "sentry-sdk>=2.28.0,<3" ] local-smart-turn = [ "coremltools>=8.0", "transformers", "torch>=2.5.0,<3", "torchaudio>=2.5.0,<3" ] local-smart-turn-v3 = [ "transformers", "onnxruntime>=1.20.1,<2" ] diff --git a/src/pipecat/services/sarvam/stt.py b/src/pipecat/services/sarvam/stt.py index 77203e13a..27b2106a3 100644 --- a/src/pipecat/services/sarvam/stt.py +++ b/src/pipecat/services/sarvam/stt.py @@ -7,11 +7,9 @@ can handle multiple audio formats for Indian language speech recognition. import asyncio import base64 -from enum import StrEnum -from typing import Literal, Optional +from typing import Optional from loguru import logger -from pydantic import BaseModel from pipecat.frames.frames import ( CancelFrame, @@ -35,51 +33,6 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") -class TranscriptionMetrics(BaseModel): - """Metrics for transcription performance.""" - - audio_duration: float - processing_latency: float - - -class TranscriptionData(BaseModel): - """Data structure for transcription results.""" - - request_id: str - transcript: str - language_code: Optional[str] - metrics: Optional[TranscriptionMetrics] = None - is_final: Optional[bool] = None - - -class TranscriptionResponse(BaseModel): - """Response structure for transcription data.""" - - type: Literal["data"] - data: TranscriptionData - - -class VADSignal(StrEnum): - """Voice Activity Detection signal types.""" - - START = "START_SPEECH" - END = "END_SPEECH" - - -class EventData(BaseModel): - """Data structure for VAD events.""" - - signal_type: VADSignal - occured_at: float - - -class EventResponse(BaseModel): - """Response structure for VAD events.""" - - type: Literal["events"] - data: EventData - - def language_to_sarvam_language(language: Language) -> str: """Convert a Language enum to Sarvam's language code format. @@ -249,7 +202,6 @@ class SarvamSTTService(STTService): # Choose the appropriate service based on model if "saarika" in self._model.lower(): # STT service - requires language_code - logger.debug(f"Using STT service with language: {self._language_string}") self._websocket_context = self._sarvam_client.speech_to_text_streaming.connect( language_code=self._language_string, model=self._model, @@ -260,7 +212,6 @@ class SarvamSTTService(STTService): ) else: # STT-translate service - auto-detects language - logger.debug("Using STT-translate service") self._websocket_context = ( self._sarvam_client.speech_to_text_translate_streaming.connect( model=self._model, @@ -274,27 +225,6 @@ class SarvamSTTService(STTService): # Enter the async context manager self._socket_client = await self._websocket_context.__aenter__() - # Set up event handlers - def on_open(data): - logger.debug("WebSocket connection opened") - - def on_message(message): - # Handle message in a separate task to avoid blocking - asyncio.create_task(self._handle_response(message)) - - def on_error(error): - logger.error(f"WebSocket error: {error}") - asyncio.create_task(self.push_error(ErrorFrame(f"WebSocket error: {error}"))) - - def on_close(data): - logger.debug("WebSocket connection closed") - - # Register event handlers - self._socket_client.on(EventType.OPEN, on_open) - self._socket_client.on(EventType.MESSAGE, on_message) - self._socket_client.on(EventType.ERROR, on_error) - self._socket_client.on(EventType.CLOSE, on_close) - # Start listening for messages self._listening_task = asyncio.create_task(self._socket_client.start_listening()) @@ -345,7 +275,7 @@ class SarvamSTTService(STTService): timestamp = message.data.occured_at logger.debug(f"VAD Signal: {signal}, Occurred at: {timestamp}") - if signal == VADSignal.START: + if signal == "START_SPEECH": await self.start_metrics() logger.debug("User started speaking") await self._call_event_handler("on_speech_started") @@ -377,10 +307,10 @@ class SarvamSTTService(STTService): except Exception as e: logger.error(f"Error handling Sarvam response: {e}") await self.push_error(ErrorFrame(f"Failed to handle response: {e}")) + await self.stop_all_metrics() def _map_language_code_to_enum(self, language_code: str) -> Language: """Map Sarvam language code to pipecat Language enum.""" - logger.debug(f"Audio language detected as: {language_code}") mapping = { "bn-IN": Language.BN_IN, "gu-IN": Language.GU_IN, diff --git a/uv.lock b/uv.lock index a49368358..8dee6245e 100644 --- a/uv.lock +++ b/uv.lock @@ -569,6 +569,30 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/39/54/db7a801933dd2537f5376fb8a9e28caff488ef5c2d61f3a8fced55fe6336/blake3-1.0.7-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:d9046bb1e22a8607e1d0d7c3ff47e56e0a197c988502df4bf4d78563f3e9fe2c", size = 553411, upload-time = "2025-09-29T16:40:45.667Z" }, { url = "https://files.pythonhosted.org/packages/2c/08/949cf68d16d1f731d502968bb1486e1a4bf7ef032c38fbc2ef26a2353494/blake3-1.0.7-cp313-cp313t-win32.whl", hash = "sha256:bd2f638bcc00fc09ce985ea3c642d45940e1eda198ab1f4b90cfdecbebbc9315", size = 227049, upload-time = "2025-09-29T16:40:47.446Z" }, { url = "https://files.pythonhosted.org/packages/f2/ae/6783a5ca6235024e00a1e92ab6ca2cd855f4c61c763cf8d6d643846d110c/blake3-1.0.7-cp313-cp313t-win_amd64.whl", hash = "sha256:cb3aa1db14231c2ef0ec5acd805505ce128c39ffa510deb3384eed96fe4addcb", size = 214101, upload-time = "2025-09-29T16:40:48.656Z" }, + { url = "https://files.pythonhosted.org/packages/32/aa/99b4b6c22972b9a854f77d97846a717448a77d079e4bd38e46a3f8ecea76/blake3-1.0.7-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:f7db997205aa420d59fb5639346e40beafb9c09252e2ec6efedca8f230f7520c", size = 346664, upload-time = "2025-10-11T18:02:54.609Z" }, + { url = "https://files.pythonhosted.org/packages/f9/44/e98bc5450be415a335a191b154e299e335046d11fe9514d93961902b7aed/blake3-1.0.7-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:19afec6e276f3bc154541248d92b1ecb198af2ee920025f7ce521028f9a69d8b", size = 324576, upload-time = "2025-10-11T18:02:57.062Z" }, + { url = "https://files.pythonhosted.org/packages/74/25/23a39913c8424ac3df705ed71a00efe34cc1cdbd4588ed6eaf458ea9d7ef/blake3-1.0.7-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:006a11bbba65a95e88ddc069cca751c8812fd144d582715eeea512452fdbe80d", size = 370545, upload-time = "2025-10-11T18:02:59.824Z" }, + { url = "https://files.pythonhosted.org/packages/db/83/9f53a86de9a5999b043febfd84765d240014da42055aeac06d1005b20b07/blake3-1.0.7-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7febeffdc8412fed105ca517cee641ac521fb9cfb750bf7e27a5cdf3ddf74a08", size = 374370, upload-time = "2025-10-11T18:03:01.412Z" }, + { url = "https://files.pythonhosted.org/packages/c4/4c/3290aa4fb7483975a7b3322a73692aa3cf491a77ce7ac61c216c71c6f834/blake3-1.0.7-cp314-cp314-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6c032ce7c52b71015651c0abe9fe599aa2669e6be578aa17d5f993dc93373401", size = 447808, upload-time = "2025-10-11T18:03:02.893Z" }, + { url = "https://files.pythonhosted.org/packages/66/26/92b6e15552865416aae1aedad8b9b4d8b47ca9b73d25373622b1798c05a9/blake3-1.0.7-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5b81455f7d24b58fe26be037cc3854c28ea6eb3671ceab3b1ec0b1239aeb6fef", size = 506118, upload-time = "2025-10-11T18:03:04.51Z" }, + { url = "https://files.pythonhosted.org/packages/1b/ef/f158fc43a03fd366bc428a52a845bd0f884e518deda901c9216bd469867e/blake3-1.0.7-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:41b0127b0e7c8610054c421959dbe7140a81ac2c88fa9e099994fbaa529af3c1", size = 393239, upload-time = "2025-10-11T18:03:07.102Z" }, + { url = "https://files.pythonhosted.org/packages/10/49/2a56ce897ec7ed0e25953b3873da271ea60cc107ae02ecc6655252e554c7/blake3-1.0.7-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4755ca95b4114b629d8f3570bc661916d211d52d47f57ff70e9687377ab39cb9", size = 386267, upload-time = "2025-10-11T18:03:08.904Z" }, + { url = "https://files.pythonhosted.org/packages/d9/c4/ee4c03ea419198b91c889ef173015b5d637a390d3f7d63cb70033a7201d6/blake3-1.0.7-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:8abe929cfd27b375e02e3dd7a690192fa4efecc52ef510df91ef01651ef08dc7", size = 549641, upload-time = "2025-10-11T18:03:10.64Z" }, + { url = "https://files.pythonhosted.org/packages/b2/cc/a918d6649b56fe705133e06d9958d90978aad30063d42cca4dfe23db16e9/blake3-1.0.7-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:dd607eb5ad5a9b44ff62243759aa0af4085f6f43c9b01f503561a70da63e3b94", size = 553691, upload-time = "2025-10-11T18:03:12.108Z" }, + { url = "https://files.pythonhosted.org/packages/fd/9f/568546f555fd1555d4867c497e9413f67bf769d076e773b9ca9e07a0b6f6/blake3-1.0.7-cp314-cp314-win32.whl", hash = "sha256:a51684d1f346e7680f7c244c25b0e279e3b297f1938126e4ea8e32425ea269f5", size = 227552, upload-time = "2025-10-11T18:03:13.468Z" }, + { url = "https://files.pythonhosted.org/packages/97/2b/d4ef7365d9f601c8a127b5993f2662d45d2cb6d430bf3dbbb7a6f0b33639/blake3-1.0.7-cp314-cp314-win_amd64.whl", hash = "sha256:a6a481719e28e2c61aafd4273d32663365d97613341b72fcdf2f6afbd426319b", size = 214719, upload-time = "2025-10-11T18:03:14.835Z" }, + { url = "https://files.pythonhosted.org/packages/2f/53/f697cc34e382a225d163ea0c6a35c7eb4cfd1011e85db6610adfac98e522/blake3-1.0.7-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:daa8933cd7db19143bd6b59f7ac4c7c7446767d7b2c3a748a4559aa483275fa2", size = 347071, upload-time = "2025-10-11T18:03:16.637Z" }, + { url = "https://files.pythonhosted.org/packages/4c/85/836dcb5c5709c2331f02ce065f7ebfaae710a6c1768cdc47ee3197645f98/blake3-1.0.7-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:24074adfffffe0fa7a7dd930cc608d6e965e70306e2c1e14d412e29ec94fa360", size = 324341, upload-time = "2025-10-11T18:03:18.073Z" }, + { url = "https://files.pythonhosted.org/packages/6d/48/36b2c25007933619ce60e24b9f360baaa77d08939284045476c8e157fe62/blake3-1.0.7-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dce6e6f03de2674f9860cf330d8a4fcdb63a60659435e5e31d72d174fc102d8e", size = 370140, upload-time = "2025-10-11T18:03:19.582Z" }, + { url = "https://files.pythonhosted.org/packages/70/82/8a8977e5d56b9fb719033940c8ce34afc733190d34ab868a647a9af7b584/blake3-1.0.7-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e783f33d53a2de8d2ab845235dd53393d521b5e4a76c23d03e77e472266359d3", size = 373022, upload-time = "2025-10-11T18:03:21.143Z" }, + { url = "https://files.pythonhosted.org/packages/e2/c4/44017ba40804a528568b35a36c05187786830c4d891c5540d59a121a7cec/blake3-1.0.7-cp314-cp314t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:782784aef18eb61f4ce8bf2b9506b7d90f0d183176b453345b221837a18041b7", size = 447243, upload-time = "2025-10-11T18:03:22.707Z" }, + { url = "https://files.pythonhosted.org/packages/78/c1/4fa20e68624784082734d31b8c9c80ad226658c024e61b9f9b6751ba0a4a/blake3-1.0.7-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6062122e77f40e3733cac2ef3f25e0fc7f555e352fe6f513f8404ad11dc69974", size = 506149, upload-time = "2025-10-11T18:03:24.424Z" }, + { url = "https://files.pythonhosted.org/packages/8e/63/af65466e27e7b92800a068afaee11b2fa071e34a7f5900f8e13832f18185/blake3-1.0.7-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6c2614bc9d69fd6067571f3bb37b3b07a6b86a56167553ad4784a3c508771f39", size = 393243, upload-time = "2025-10-11T18:03:25.872Z" }, + { url = "https://files.pythonhosted.org/packages/f3/82/54a4807a3243d0e094ada9d65687aeb40059587e374b3beb9c89f6552c9b/blake3-1.0.7-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d6df2bd56c43bdeb6699d4af0a0dd0d77537d95cb4a5dde4b39ed6e54cc725d6", size = 386318, upload-time = "2025-10-11T18:03:27.338Z" }, + { url = "https://files.pythonhosted.org/packages/42/e8/32b56531b5d9da67e476735ceaec7c3bf89310629abeeafb03c724145c88/blake3-1.0.7-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:8b635cf4350caf459ecb335b32be622068423245bda457d5bc159106eb20f912", size = 548945, upload-time = "2025-10-11T18:03:28.779Z" }, + { url = "https://files.pythonhosted.org/packages/ad/50/33b1aca708be629e285a537f1adf34dfcabc4c30b28c436361323d11f593/blake3-1.0.7-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:f96a685775f87ddf75ff495dc9698703268c66c170caca977347427ef8d52324", size = 553564, upload-time = "2025-10-11T18:03:30.247Z" }, + { url = "https://files.pythonhosted.org/packages/fe/07/8b17cbf40ccd9afeed6ae9f55018181786b30ff4e079ac8bf4ca4799e47b/blake3-1.0.7-cp314-cp314t-win32.whl", hash = "sha256:0633b7d9bad87dc7fce545042353f2e056604d993f71d1dce666a9f5edc13e05", size = 227345, upload-time = "2025-10-11T18:03:31.933Z" }, + { url = "https://files.pythonhosted.org/packages/d9/8a/ab9de8a73616350759356a483f440212bc2a22fc9aaa77cabbf06c3483db/blake3-1.0.7-cp314-cp314t-win_amd64.whl", hash = "sha256:5e356daa0089968dc1ff1d0d112e7cc1700533441d8f30ae99f835a94dc8b0f3", size = 213964, upload-time = "2025-10-11T18:03:33.919Z" }, ] [[package]] @@ -4316,7 +4340,6 @@ dependencies = [ { name = "pydantic" }, { name = "pyloudnorm" }, { name = "resampy" }, - { name = "sarvamai" }, { name = "soxr" }, { name = "wait-for2", marker = "python_full_version < '3.12'" }, ] @@ -4603,6 +4626,7 @@ requires-dist = [ { name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'openai'" }, { name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'playht'" }, { name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'rime'" }, + { name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'sarvam'" }, { name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'soniox'" }, { name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'websocket'" }, { name = "pipecat-ai-krisp", marker = "extra == 'krisp'", specifier = "~=0.4.0" }, @@ -4616,7 +4640,6 @@ requires-dist = [ { name = "python-dotenv", marker = "extra == 'runner'", specifier = ">=1.0.0,<2.0.0" }, { name = "pyvips", extras = ["binary"], marker = "extra == 'moondream'", specifier = "~=3.0.0" }, { name = "resampy", specifier = "~=0.4.3" }, - { name = "sarvamai", specifier = "==0.1.21" }, { name = "sarvamai", marker = "extra == 'sarvam'", specifier = "==0.1.21" }, { name = "sentry-sdk", marker = "extra == 'sentry'", specifier = ">=2.28.0,<3" }, { name = "simli-ai", marker = "extra == 'simli'", specifier = "~=0.1.10" }, @@ -4635,7 +4658,6 @@ requires-dist = [ { name = "uvicorn", marker = "extra == 'runner'", specifier = ">=0.32.0,<1.0.0" }, { name = "vllm", marker = "extra == 'ultravox'", specifier = ">=0.9.0" }, { name = "wait-for2", marker = "python_full_version < '3.12'", specifier = ">=0.4.1" }, - { name = "websockets", marker = "extra == 'sarvam'", specifier = ">=13.1,<15.0" }, { name = "websockets", marker = "extra == 'websockets-base'", specifier = ">=13.1,<16.0" }, ] provides-extras = ["aic", "anthropic", "assemblyai", "asyncai", "aws", "aws-nova-sonic", "azure", "cartesia", "cerebras", "deepseek", "daily", "deepgram", "elevenlabs", "fal", "fireworks", "fish", "gladia", "google", "grok", "groq", "gstreamer", "heygen", "hume", "inworld", "krisp", "koala", "langchain", "livekit", "lmnt", "local", "mcp", "mem0", "mistral", "mlx-whisper", "moondream", "nim", "neuphonic", "noisereduce", "openai", "openpipe", "openrouter", "perplexity", "playht", "qwen", "rime", "riva", "runner", "sambanova", "sarvam", "sentry", "local-smart-turn", "local-smart-turn-v3", "remote-smart-turn", "silero", "simli", "soniox", "soundfile", "speechmatics", "strands", "tavus", "together", "tracing", "ultravox", "webrtc", "websocket", "websockets-base", "whisper"]