refactor: Update dependencies and improve logging
This commit is contained in:
@@ -38,7 +38,6 @@ dependencies = [
|
||||
# Pinning numba to resolve package dependencies
|
||||
"numba==0.61.2",
|
||||
"wait_for2>=0.4.1; python_version<'3.12'",
|
||||
"sarvamai==0.1.21",
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
@@ -94,7 +93,7 @@ rime = [ "pipecat-ai[websockets-base]" ]
|
||||
riva = [ "nvidia-riva-client~=2.21.1" ]
|
||||
runner = [ "python-dotenv>=1.0.0,<2.0.0", "uvicorn>=0.32.0,<1.0.0", "fastapi>=0.115.6,<0.117.0", "pipecat-ai-small-webrtc-prebuilt>=1.0.0"]
|
||||
sambanova = []
|
||||
sarvam = [ "sarvamai==0.1.21", "websockets>=13.1,<15.0" ]
|
||||
sarvam = [ "sarvamai==0.1.21", "pipecat-ai[websockets-base]" ]
|
||||
sentry = [ "sentry-sdk>=2.28.0,<3" ]
|
||||
local-smart-turn = [ "coremltools>=8.0", "transformers", "torch>=2.5.0,<3", "torchaudio>=2.5.0,<3" ]
|
||||
local-smart-turn-v3 = [ "transformers", "onnxruntime>=1.20.1,<2" ]
|
||||
|
||||
@@ -7,11 +7,9 @@ can handle multiple audio formats for Indian language speech recognition.
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
from enum import StrEnum
|
||||
from typing import Literal, Optional
|
||||
from typing import Optional
|
||||
|
||||
from loguru import logger
|
||||
from pydantic import BaseModel
|
||||
|
||||
from pipecat.frames.frames import (
|
||||
CancelFrame,
|
||||
@@ -35,51 +33,6 @@ except ModuleNotFoundError as e:
|
||||
raise Exception(f"Missing module: {e}")
|
||||
|
||||
|
||||
class TranscriptionMetrics(BaseModel):
|
||||
"""Metrics for transcription performance."""
|
||||
|
||||
audio_duration: float
|
||||
processing_latency: float
|
||||
|
||||
|
||||
class TranscriptionData(BaseModel):
|
||||
"""Data structure for transcription results."""
|
||||
|
||||
request_id: str
|
||||
transcript: str
|
||||
language_code: Optional[str]
|
||||
metrics: Optional[TranscriptionMetrics] = None
|
||||
is_final: Optional[bool] = None
|
||||
|
||||
|
||||
class TranscriptionResponse(BaseModel):
|
||||
"""Response structure for transcription data."""
|
||||
|
||||
type: Literal["data"]
|
||||
data: TranscriptionData
|
||||
|
||||
|
||||
class VADSignal(StrEnum):
|
||||
"""Voice Activity Detection signal types."""
|
||||
|
||||
START = "START_SPEECH"
|
||||
END = "END_SPEECH"
|
||||
|
||||
|
||||
class EventData(BaseModel):
|
||||
"""Data structure for VAD events."""
|
||||
|
||||
signal_type: VADSignal
|
||||
occured_at: float
|
||||
|
||||
|
||||
class EventResponse(BaseModel):
|
||||
"""Response structure for VAD events."""
|
||||
|
||||
type: Literal["events"]
|
||||
data: EventData
|
||||
|
||||
|
||||
def language_to_sarvam_language(language: Language) -> str:
|
||||
"""Convert a Language enum to Sarvam's language code format.
|
||||
|
||||
@@ -249,7 +202,6 @@ class SarvamSTTService(STTService):
|
||||
# Choose the appropriate service based on model
|
||||
if "saarika" in self._model.lower():
|
||||
# STT service - requires language_code
|
||||
logger.debug(f"Using STT service with language: {self._language_string}")
|
||||
self._websocket_context = self._sarvam_client.speech_to_text_streaming.connect(
|
||||
language_code=self._language_string,
|
||||
model=self._model,
|
||||
@@ -260,7 +212,6 @@ class SarvamSTTService(STTService):
|
||||
)
|
||||
else:
|
||||
# STT-translate service - auto-detects language
|
||||
logger.debug("Using STT-translate service")
|
||||
self._websocket_context = (
|
||||
self._sarvam_client.speech_to_text_translate_streaming.connect(
|
||||
model=self._model,
|
||||
@@ -274,27 +225,6 @@ class SarvamSTTService(STTService):
|
||||
# Enter the async context manager
|
||||
self._socket_client = await self._websocket_context.__aenter__()
|
||||
|
||||
# Set up event handlers
|
||||
def on_open(data):
|
||||
logger.debug("WebSocket connection opened")
|
||||
|
||||
def on_message(message):
|
||||
# Handle message in a separate task to avoid blocking
|
||||
asyncio.create_task(self._handle_response(message))
|
||||
|
||||
def on_error(error):
|
||||
logger.error(f"WebSocket error: {error}")
|
||||
asyncio.create_task(self.push_error(ErrorFrame(f"WebSocket error: {error}")))
|
||||
|
||||
def on_close(data):
|
||||
logger.debug("WebSocket connection closed")
|
||||
|
||||
# Register event handlers
|
||||
self._socket_client.on(EventType.OPEN, on_open)
|
||||
self._socket_client.on(EventType.MESSAGE, on_message)
|
||||
self._socket_client.on(EventType.ERROR, on_error)
|
||||
self._socket_client.on(EventType.CLOSE, on_close)
|
||||
|
||||
# Start listening for messages
|
||||
self._listening_task = asyncio.create_task(self._socket_client.start_listening())
|
||||
|
||||
@@ -345,7 +275,7 @@ class SarvamSTTService(STTService):
|
||||
timestamp = message.data.occured_at
|
||||
logger.debug(f"VAD Signal: {signal}, Occurred at: {timestamp}")
|
||||
|
||||
if signal == VADSignal.START:
|
||||
if signal == "START_SPEECH":
|
||||
await self.start_metrics()
|
||||
logger.debug("User started speaking")
|
||||
await self._call_event_handler("on_speech_started")
|
||||
@@ -377,10 +307,10 @@ class SarvamSTTService(STTService):
|
||||
except Exception as e:
|
||||
logger.error(f"Error handling Sarvam response: {e}")
|
||||
await self.push_error(ErrorFrame(f"Failed to handle response: {e}"))
|
||||
await self.stop_all_metrics()
|
||||
|
||||
def _map_language_code_to_enum(self, language_code: str) -> Language:
|
||||
"""Map Sarvam language code to pipecat Language enum."""
|
||||
logger.debug(f"Audio language detected as: {language_code}")
|
||||
mapping = {
|
||||
"bn-IN": Language.BN_IN,
|
||||
"gu-IN": Language.GU_IN,
|
||||
|
||||
28
uv.lock
generated
28
uv.lock
generated
@@ -569,6 +569,30 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/39/54/db7a801933dd2537f5376fb8a9e28caff488ef5c2d61f3a8fced55fe6336/blake3-1.0.7-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:d9046bb1e22a8607e1d0d7c3ff47e56e0a197c988502df4bf4d78563f3e9fe2c", size = 553411, upload-time = "2025-09-29T16:40:45.667Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2c/08/949cf68d16d1f731d502968bb1486e1a4bf7ef032c38fbc2ef26a2353494/blake3-1.0.7-cp313-cp313t-win32.whl", hash = "sha256:bd2f638bcc00fc09ce985ea3c642d45940e1eda198ab1f4b90cfdecbebbc9315", size = 227049, upload-time = "2025-09-29T16:40:47.446Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f2/ae/6783a5ca6235024e00a1e92ab6ca2cd855f4c61c763cf8d6d643846d110c/blake3-1.0.7-cp313-cp313t-win_amd64.whl", hash = "sha256:cb3aa1db14231c2ef0ec5acd805505ce128c39ffa510deb3384eed96fe4addcb", size = 214101, upload-time = "2025-09-29T16:40:48.656Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/32/aa/99b4b6c22972b9a854f77d97846a717448a77d079e4bd38e46a3f8ecea76/blake3-1.0.7-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:f7db997205aa420d59fb5639346e40beafb9c09252e2ec6efedca8f230f7520c", size = 346664, upload-time = "2025-10-11T18:02:54.609Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f9/44/e98bc5450be415a335a191b154e299e335046d11fe9514d93961902b7aed/blake3-1.0.7-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:19afec6e276f3bc154541248d92b1ecb198af2ee920025f7ce521028f9a69d8b", size = 324576, upload-time = "2025-10-11T18:02:57.062Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/74/25/23a39913c8424ac3df705ed71a00efe34cc1cdbd4588ed6eaf458ea9d7ef/blake3-1.0.7-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:006a11bbba65a95e88ddc069cca751c8812fd144d582715eeea512452fdbe80d", size = 370545, upload-time = "2025-10-11T18:02:59.824Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/db/83/9f53a86de9a5999b043febfd84765d240014da42055aeac06d1005b20b07/blake3-1.0.7-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7febeffdc8412fed105ca517cee641ac521fb9cfb750bf7e27a5cdf3ddf74a08", size = 374370, upload-time = "2025-10-11T18:03:01.412Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c4/4c/3290aa4fb7483975a7b3322a73692aa3cf491a77ce7ac61c216c71c6f834/blake3-1.0.7-cp314-cp314-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6c032ce7c52b71015651c0abe9fe599aa2669e6be578aa17d5f993dc93373401", size = 447808, upload-time = "2025-10-11T18:03:02.893Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/66/26/92b6e15552865416aae1aedad8b9b4d8b47ca9b73d25373622b1798c05a9/blake3-1.0.7-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5b81455f7d24b58fe26be037cc3854c28ea6eb3671ceab3b1ec0b1239aeb6fef", size = 506118, upload-time = "2025-10-11T18:03:04.51Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/1b/ef/f158fc43a03fd366bc428a52a845bd0f884e518deda901c9216bd469867e/blake3-1.0.7-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:41b0127b0e7c8610054c421959dbe7140a81ac2c88fa9e099994fbaa529af3c1", size = 393239, upload-time = "2025-10-11T18:03:07.102Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/10/49/2a56ce897ec7ed0e25953b3873da271ea60cc107ae02ecc6655252e554c7/blake3-1.0.7-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4755ca95b4114b629d8f3570bc661916d211d52d47f57ff70e9687377ab39cb9", size = 386267, upload-time = "2025-10-11T18:03:08.904Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d9/c4/ee4c03ea419198b91c889ef173015b5d637a390d3f7d63cb70033a7201d6/blake3-1.0.7-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:8abe929cfd27b375e02e3dd7a690192fa4efecc52ef510df91ef01651ef08dc7", size = 549641, upload-time = "2025-10-11T18:03:10.64Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b2/cc/a918d6649b56fe705133e06d9958d90978aad30063d42cca4dfe23db16e9/blake3-1.0.7-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:dd607eb5ad5a9b44ff62243759aa0af4085f6f43c9b01f503561a70da63e3b94", size = 553691, upload-time = "2025-10-11T18:03:12.108Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/fd/9f/568546f555fd1555d4867c497e9413f67bf769d076e773b9ca9e07a0b6f6/blake3-1.0.7-cp314-cp314-win32.whl", hash = "sha256:a51684d1f346e7680f7c244c25b0e279e3b297f1938126e4ea8e32425ea269f5", size = 227552, upload-time = "2025-10-11T18:03:13.468Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/97/2b/d4ef7365d9f601c8a127b5993f2662d45d2cb6d430bf3dbbb7a6f0b33639/blake3-1.0.7-cp314-cp314-win_amd64.whl", hash = "sha256:a6a481719e28e2c61aafd4273d32663365d97613341b72fcdf2f6afbd426319b", size = 214719, upload-time = "2025-10-11T18:03:14.835Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2f/53/f697cc34e382a225d163ea0c6a35c7eb4cfd1011e85db6610adfac98e522/blake3-1.0.7-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:daa8933cd7db19143bd6b59f7ac4c7c7446767d7b2c3a748a4559aa483275fa2", size = 347071, upload-time = "2025-10-11T18:03:16.637Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/4c/85/836dcb5c5709c2331f02ce065f7ebfaae710a6c1768cdc47ee3197645f98/blake3-1.0.7-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:24074adfffffe0fa7a7dd930cc608d6e965e70306e2c1e14d412e29ec94fa360", size = 324341, upload-time = "2025-10-11T18:03:18.073Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/6d/48/36b2c25007933619ce60e24b9f360baaa77d08939284045476c8e157fe62/blake3-1.0.7-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dce6e6f03de2674f9860cf330d8a4fcdb63a60659435e5e31d72d174fc102d8e", size = 370140, upload-time = "2025-10-11T18:03:19.582Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/70/82/8a8977e5d56b9fb719033940c8ce34afc733190d34ab868a647a9af7b584/blake3-1.0.7-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e783f33d53a2de8d2ab845235dd53393d521b5e4a76c23d03e77e472266359d3", size = 373022, upload-time = "2025-10-11T18:03:21.143Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e2/c4/44017ba40804a528568b35a36c05187786830c4d891c5540d59a121a7cec/blake3-1.0.7-cp314-cp314t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:782784aef18eb61f4ce8bf2b9506b7d90f0d183176b453345b221837a18041b7", size = 447243, upload-time = "2025-10-11T18:03:22.707Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/78/c1/4fa20e68624784082734d31b8c9c80ad226658c024e61b9f9b6751ba0a4a/blake3-1.0.7-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6062122e77f40e3733cac2ef3f25e0fc7f555e352fe6f513f8404ad11dc69974", size = 506149, upload-time = "2025-10-11T18:03:24.424Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/8e/63/af65466e27e7b92800a068afaee11b2fa071e34a7f5900f8e13832f18185/blake3-1.0.7-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6c2614bc9d69fd6067571f3bb37b3b07a6b86a56167553ad4784a3c508771f39", size = 393243, upload-time = "2025-10-11T18:03:25.872Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f3/82/54a4807a3243d0e094ada9d65687aeb40059587e374b3beb9c89f6552c9b/blake3-1.0.7-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d6df2bd56c43bdeb6699d4af0a0dd0d77537d95cb4a5dde4b39ed6e54cc725d6", size = 386318, upload-time = "2025-10-11T18:03:27.338Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/42/e8/32b56531b5d9da67e476735ceaec7c3bf89310629abeeafb03c724145c88/blake3-1.0.7-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:8b635cf4350caf459ecb335b32be622068423245bda457d5bc159106eb20f912", size = 548945, upload-time = "2025-10-11T18:03:28.779Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ad/50/33b1aca708be629e285a537f1adf34dfcabc4c30b28c436361323d11f593/blake3-1.0.7-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:f96a685775f87ddf75ff495dc9698703268c66c170caca977347427ef8d52324", size = 553564, upload-time = "2025-10-11T18:03:30.247Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/fe/07/8b17cbf40ccd9afeed6ae9f55018181786b30ff4e079ac8bf4ca4799e47b/blake3-1.0.7-cp314-cp314t-win32.whl", hash = "sha256:0633b7d9bad87dc7fce545042353f2e056604d993f71d1dce666a9f5edc13e05", size = 227345, upload-time = "2025-10-11T18:03:31.933Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d9/8a/ab9de8a73616350759356a483f440212bc2a22fc9aaa77cabbf06c3483db/blake3-1.0.7-cp314-cp314t-win_amd64.whl", hash = "sha256:5e356daa0089968dc1ff1d0d112e7cc1700533441d8f30ae99f835a94dc8b0f3", size = 213964, upload-time = "2025-10-11T18:03:33.919Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4316,7 +4340,6 @@ dependencies = [
|
||||
{ name = "pydantic" },
|
||||
{ name = "pyloudnorm" },
|
||||
{ name = "resampy" },
|
||||
{ name = "sarvamai" },
|
||||
{ name = "soxr" },
|
||||
{ name = "wait-for2", marker = "python_full_version < '3.12'" },
|
||||
]
|
||||
@@ -4603,6 +4626,7 @@ requires-dist = [
|
||||
{ name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'openai'" },
|
||||
{ name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'playht'" },
|
||||
{ name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'rime'" },
|
||||
{ name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'sarvam'" },
|
||||
{ name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'soniox'" },
|
||||
{ name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'websocket'" },
|
||||
{ name = "pipecat-ai-krisp", marker = "extra == 'krisp'", specifier = "~=0.4.0" },
|
||||
@@ -4616,7 +4640,6 @@ requires-dist = [
|
||||
{ name = "python-dotenv", marker = "extra == 'runner'", specifier = ">=1.0.0,<2.0.0" },
|
||||
{ name = "pyvips", extras = ["binary"], marker = "extra == 'moondream'", specifier = "~=3.0.0" },
|
||||
{ name = "resampy", specifier = "~=0.4.3" },
|
||||
{ name = "sarvamai", specifier = "==0.1.21" },
|
||||
{ name = "sarvamai", marker = "extra == 'sarvam'", specifier = "==0.1.21" },
|
||||
{ name = "sentry-sdk", marker = "extra == 'sentry'", specifier = ">=2.28.0,<3" },
|
||||
{ name = "simli-ai", marker = "extra == 'simli'", specifier = "~=0.1.10" },
|
||||
@@ -4635,7 +4658,6 @@ requires-dist = [
|
||||
{ name = "uvicorn", marker = "extra == 'runner'", specifier = ">=0.32.0,<1.0.0" },
|
||||
{ name = "vllm", marker = "extra == 'ultravox'", specifier = ">=0.9.0" },
|
||||
{ name = "wait-for2", marker = "python_full_version < '3.12'", specifier = ">=0.4.1" },
|
||||
{ name = "websockets", marker = "extra == 'sarvam'", specifier = ">=13.1,<15.0" },
|
||||
{ name = "websockets", marker = "extra == 'websockets-base'", specifier = ">=13.1,<16.0" },
|
||||
]
|
||||
provides-extras = ["aic", "anthropic", "assemblyai", "asyncai", "aws", "aws-nova-sonic", "azure", "cartesia", "cerebras", "deepseek", "daily", "deepgram", "elevenlabs", "fal", "fireworks", "fish", "gladia", "google", "grok", "groq", "gstreamer", "heygen", "hume", "inworld", "krisp", "koala", "langchain", "livekit", "lmnt", "local", "mcp", "mem0", "mistral", "mlx-whisper", "moondream", "nim", "neuphonic", "noisereduce", "openai", "openpipe", "openrouter", "perplexity", "playht", "qwen", "rime", "riva", "runner", "sambanova", "sarvam", "sentry", "local-smart-turn", "local-smart-turn-v3", "remote-smart-turn", "silero", "simli", "soniox", "soundfile", "speechmatics", "strands", "tavus", "together", "tracing", "ultravox", "webrtc", "websocket", "websockets-base", "whisper"]
|
||||
|
||||
Reference in New Issue
Block a user