From ca2bfd6f12c534352a21c11bebe6d855b4dfdcac Mon Sep 17 00:00:00 2001 From: Mark Backman Date: Thu, 26 Mar 2026 12:22:06 -0400 Subject: [PATCH] Remove SambaNovaSTTService SambaNova no longer offers speech-to-text audio models. --- README.md | 2 +- changelog/4154.removed.md | 1 + .../13g-sambanova-transcription.py | 121 --------------- .../14s-function-calling-sambanova.py | 7 +- src/pipecat/services/sambanova/__init__.py | 1 - src/pipecat/services/sambanova/stt.py | 143 ------------------ src/pipecat/services/stt_latency.py | 1 - 7 files changed, 5 insertions(+), 271 deletions(-) create mode 100644 changelog/4154.removed.md delete mode 100644 examples/foundational/13g-sambanova-transcription.py delete mode 100644 src/pipecat/services/sambanova/stt.py diff --git a/README.md b/README.md index 7f087b9b8..2fa1a45e2 100644 --- a/README.md +++ b/README.md @@ -87,7 +87,7 @@ Catch new features, interviews, and how-tos on our [Pipecat TV](https://www.yout | Category | Services | | ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/stt/elevenlabs), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Gradium](https://docs.pipecat.ai/server/services/stt/gradium), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [NVIDIA Riva](https://docs.pipecat.ai/server/services/stt/riva), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [SambaNova (Whisper)](https://docs.pipecat.ai/server/services/stt/sambanova), [Sarvam](https://docs.pipecat.ai/server/services/stt/sarvam), [Soniox](https://docs.pipecat.ai/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/server/services/stt/speechmatics), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) | +| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/stt/elevenlabs), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Gradium](https://docs.pipecat.ai/server/services/stt/gradium), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [NVIDIA Riva](https://docs.pipecat.ai/server/services/stt/riva), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [Sarvam](https://docs.pipecat.ai/server/services/stt/sarvam), [Soniox](https://docs.pipecat.ai/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/server/services/stt/speechmatics), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) | | LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [Mistral](https://docs.pipecat.ai/server/services/llm/mistral), [Novita](https://docs.pipecat.ai/server/services/llm/novita), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nvidia), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/server/services/llm/sambanova), [Sarvam](https://docs.pipecat.ai/server/services/llm/sarvam), [Together AI](https://docs.pipecat.ai/server/services/llm/together) | | Text-to-Speech | [Async](https://docs.pipecat.ai/server/services/tts/asyncai), [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Camb AI](https://docs.pipecat.ai/server/services/tts/camb), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [Gradium](https://docs.pipecat.ai/server/services/tts/gradium), [Groq](https://docs.pipecat.ai/server/services/tts/groq), [Hume](https://docs.pipecat.ai/server/services/tts/hume), [Inworld](https://docs.pipecat.ai/server/services/tts/inworld), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [NVIDIA Riva](https://docs.pipecat.ai/server/services/tts/riva), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [Resemble](https://docs.pipecat.ai/server/services/tts/resemble), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [Smallest](https://docs.pipecat.ai/server/services/tts/smallest), [Speechmatics](https://docs.pipecat.ai/server/services/tts/speechmatics), [xAI](https://docs.pipecat.ai/server/services/tts/xai), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) | | Speech-to-Speech | [AWS Nova Sonic](https://docs.pipecat.ai/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [Grok Voice Agent](https://docs.pipecat.ai/server/services/s2s/grok), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai), [Ultravox](https://docs.pipecat.ai/server/services/s2s/ultravox), | diff --git a/changelog/4154.removed.md b/changelog/4154.removed.md new file mode 100644 index 000000000..4a778cc48 --- /dev/null +++ b/changelog/4154.removed.md @@ -0,0 +1 @@ +- Removed `SambaNovaSTTService`. SambaNova no longer offers speech-to-text audio models. Use another STT provider instead. diff --git a/examples/foundational/13g-sambanova-transcription.py b/examples/foundational/13g-sambanova-transcription.py deleted file mode 100644 index 26e961c5e..000000000 --- a/examples/foundational/13g-sambanova-transcription.py +++ /dev/null @@ -1,121 +0,0 @@ -# -# Copyright (c) 2024-2026, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - -import os -import time - -from dotenv import load_dotenv -from loguru import logger - -from pipecat.audio.vad.silero import SileroVADAnalyzer -from pipecat.audio.vad.vad_analyzer import VADParams -from pipecat.frames.frames import Frame, TranscriptionFrame, UserStoppedSpeakingFrame -from pipecat.pipeline.pipeline import Pipeline -from pipecat.pipeline.runner import PipelineRunner -from pipecat.pipeline.task import PipelineParams, PipelineTask -from pipecat.processors.audio.vad_processor import VADProcessor -from pipecat.processors.frame_processor import FrameDirection, FrameProcessor -from pipecat.runner.types import RunnerArguments -from pipecat.runner.utils import create_transport -from pipecat.services.sambanova.stt import SambaNovaSTTService -from pipecat.transports.base_transport import BaseTransport, TransportParams -from pipecat.transports.daily.transport import DailyParams -from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams - -load_dotenv(override=True) - - -STOP_SECS = 2.0 - - -class TranscriptionLogger(FrameProcessor): - """Measures transcription latency. - - Uses the (intentionally) long STOP_SECS parameter to give the transcription time to finish, - then outputs the timing between when the VAD first classified audio input as not-speech and - the delivery of the last transcription frame. - """ - - def __init__(self): - super().__init__() - self._last_transcription_time = time.time() - - async def process_frame(self, frame: Frame, direction: FrameDirection): - await super().process_frame(frame, direction) - - if isinstance(frame, UserStoppedSpeakingFrame): - logger.debug( - f"Transcription latency: {(STOP_SECS - (time.time() - self._last_transcription_time)):.2f}" - ) - - if isinstance(frame, TranscriptionFrame): - self._last_transcription_time = time.time() - - # Push all frames through - await self.push_frame(frame, direction) - - -# We use lambdas to defer transport parameter creation until the transport -# type is selected at runtime. -transport_params = { - "daily": lambda: DailyParams( - audio_in_enabled=True, - ), - "twilio": lambda: FastAPIWebsocketParams( - audio_in_enabled=True, - ), - "webrtc": lambda: TransportParams( - audio_in_enabled=True, - ), -} - - -async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): - logger.info(f"Starting bot") - - stt = SambaNovaSTTService( - settings=SambaNovaSTTService.Settings( - model="Whisper-Large-v3", - ), - api_key=os.getenv("SAMBANOVA_API_KEY"), - ) - - tl = TranscriptionLogger() - vad_processor = VADProcessor( - vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=STOP_SECS)) - ) - - pipeline = Pipeline([transport.input(), vad_processor, stt, tl]) - - task = PipelineTask( - pipeline, - params=PipelineParams( - enable_metrics=True, - enable_usage_metrics=True, - ), - idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, - ) - - @transport.event_handler("on_client_disconnected") - async def on_client_disconnected(transport, client): - logger.info(f"Client disconnected") - await task.cancel() - - runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) - - await runner.run(task) - - -async def bot(runner_args: RunnerArguments): - """Main bot entry point compatible with Pipecat Cloud.""" - transport = await create_transport(runner_args, transport_params) - await run_bot(transport, runner_args) - - -if __name__ == "__main__": - from pipecat.runner.run import main - - main() diff --git a/examples/foundational/14s-function-calling-sambanova.py b/examples/foundational/14s-function-calling-sambanova.py index 3854d2d6e..c1e880bc0 100644 --- a/examples/foundational/14s-function-calling-sambanova.py +++ b/examples/foundational/14s-function-calling-sambanova.py @@ -25,9 +25,9 @@ from pipecat.processors.aggregators.llm_response_universal import ( from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.llm_service import FunctionCallParams from pipecat.services.sambanova.llm import SambaNovaLLMService -from pipecat.services.sambanova.stt import SambaNovaSTTService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams @@ -60,9 +60,8 @@ transport_params = { async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): logger.info(f"Starting bot") - stt = SambaNovaSTTService( - model="Whisper-Large-v3", - api_key=os.getenv("SAMBANOVA_API_KEY"), + stt = DeepgramSTTService( + api_key=os.getenv("DEEPGRAM_API_KEY"), ) tts = CartesiaTTSService( diff --git a/src/pipecat/services/sambanova/__init__.py b/src/pipecat/services/sambanova/__init__.py index 749fc4460..1886e0b91 100644 --- a/src/pipecat/services/sambanova/__init__.py +++ b/src/pipecat/services/sambanova/__init__.py @@ -5,4 +5,3 @@ # from .llm import * -from .stt import * diff --git a/src/pipecat/services/sambanova/stt.py b/src/pipecat/services/sambanova/stt.py deleted file mode 100644 index 5cf12d771..000000000 --- a/src/pipecat/services/sambanova/stt.py +++ /dev/null @@ -1,143 +0,0 @@ -# -# Copyright (c) 2024-2026, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - -"""SambaNova's Speech-to-Text service implementation for real-time transcription.""" - -from dataclasses import dataclass -from typing import Any, Optional - -from loguru import logger - -from pipecat.services.stt_latency import SAMBANOVA_TTFS_P99 -from pipecat.services.whisper.base_stt import ( - BaseWhisperSTTService, - Transcription, -) -from pipecat.transcriptions.language import Language - - -@dataclass -class SambaNovaSTTSettings(BaseWhisperSTTService.Settings): - """Settings for the SambaNova STT service.""" - - pass - - -class SambaNovaSTTService(BaseWhisperSTTService): # type: ignore - """SambaNova Whisper speech-to-text service. - - Uses SambaNova's Whisper API to convert audio to text. - Requires a SambaNova API key set via the api_key parameter or SAMBANOVA_API_KEY environment variable. - """ - - Settings = SambaNovaSTTSettings - - def __init__( - self, - *, - model: Optional[str] = None, - api_key: Optional[str] = None, - base_url: str = "https://api.sambanova.ai/v1", - language: Optional[Language] = None, - prompt: Optional[str] = None, - temperature: Optional[float] = None, - settings: Optional[Settings] = None, - ttfs_p99_latency: Optional[float] = SAMBANOVA_TTFS_P99, - **kwargs: Any, - ) -> None: - """Initialize SambaNova STT service. - - Args: - model: Whisper model to use. - - .. deprecated:: 0.0.105 - Use ``settings=SambaNovaSTTService.Settings(model=...)`` instead. - - api_key: SambaNova API key. Defaults to None. - base_url: API base URL. Defaults to "https://api.sambanova.ai/v1". - language: Language of the audio input. - - .. deprecated:: 0.0.105 - Use ``settings=SambaNovaSTTService.Settings(language=...)`` instead. - - prompt: Optional text to guide the model's style or continue a previous segment. - - .. deprecated:: 0.0.105 - Use ``settings=SambaNovaSTTService.Settings(prompt=...)`` instead. - - temperature: Optional sampling temperature between 0 and 1. - - .. deprecated:: 0.0.105 - Use ``settings=SambaNovaSTTService.Settings(temperature=...)`` instead. - - settings: Runtime-updatable settings. When provided alongside deprecated - parameters, ``settings`` values take precedence. - ttfs_p99_latency: P99 latency from speech end to final transcript in seconds. - Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark - **kwargs: Additional arguments passed to `pipecat.services.whisper.base_stt.BaseWhisperSTTService`. - """ - # --- 1. Hardcoded defaults --- - default_settings = self.Settings( - model="Whisper-Large-v3", - language=Language.EN, - prompt=None, - temperature=None, - ) - - # --- 2. Deprecated direct-arg overrides --- - if model is not None: - self._warn_init_param_moved_to_settings("model", "model") - default_settings.model = model - if language is not None: - self._warn_init_param_moved_to_settings("language", "language") - default_settings.language = language - if prompt is not None: - self._warn_init_param_moved_to_settings("prompt", "prompt") - default_settings.prompt = prompt - if temperature is not None: - self._warn_init_param_moved_to_settings("temperature", "temperature") - default_settings.temperature = temperature - - # --- 3. (no params object for this service) --- - - # --- 4. Settings delta (canonical API, always wins) --- - if settings is not None: - default_settings.apply_update(settings) - - super().__init__( - api_key=api_key, - base_url=base_url, - settings=default_settings, - ttfs_p99_latency=ttfs_p99_latency, - **kwargs, - ) - - async def _transcribe(self, audio: bytes) -> Transcription: - assert self._settings.language is not None - - if self._include_prob_metrics: - # https://docs.sambanova.ai/docs/en/features/audio#request-parameters - logger.warning( - "SambaNova STT does not support probability metrics " - "(include_prob_metrics parameter has no effect). " - "Check their docs: https://docs.sambanova.ai/docs/en/features/audio#request-parameters for more details." - ) - - # Build kwargs dict with only set parameters - kwargs = { - "file": ("audio.wav", audio, "audio/wav"), - "model": self._settings.model, - "response_format": "json", - "language": self._settings.language, - } - - if self._settings.prompt is not None: - kwargs["prompt"] = self._settings.prompt - - if self._settings.temperature is not None: - kwargs["temperature"] = self._settings.temperature - - return await self._client.audio.transcriptions.create(**kwargs) diff --git a/src/pipecat/services/stt_latency.py b/src/pipecat/services/stt_latency.py index 4396a787a..21c33dfb0 100644 --- a/src/pipecat/services/stt_latency.py +++ b/src/pipecat/services/stt_latency.py @@ -46,7 +46,6 @@ GRADIUM_TTFS_P99: float = 1.61 GROQ_TTFS_P99: float = 1.54 OPENAI_TTFS_P99: float = 2.01 OPENAI_REALTIME_TTFS_P99: float = 1.66 -SAMBANOVA_TTFS_P99: float = 2.20 SARVAM_TTFS_P99: float = 1.17 SONIOX_TTFS_P99: float = 0.35 SPEECHMATICS_TTFS_P99: float = 0.74