Remove SambaNovaSTTService

SambaNova no longer offers speech-to-text audio models.
This commit is contained in:
Mark Backman
2026-03-26 12:22:06 -04:00
parent 5ad4aa9bea
commit ca2bfd6f12
7 changed files with 5 additions and 271 deletions

View File

@@ -87,7 +87,7 @@ Catch new features, interviews, and how-tos on our [Pipecat TV](https://www.yout
| Category | Services |
| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/stt/elevenlabs), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Gradium](https://docs.pipecat.ai/server/services/stt/gradium), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [NVIDIA Riva](https://docs.pipecat.ai/server/services/stt/riva), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [SambaNova (Whisper)](https://docs.pipecat.ai/server/services/stt/sambanova), [Sarvam](https://docs.pipecat.ai/server/services/stt/sarvam), [Soniox](https://docs.pipecat.ai/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/server/services/stt/speechmatics), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) |
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/stt/elevenlabs), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Gradium](https://docs.pipecat.ai/server/services/stt/gradium), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [NVIDIA Riva](https://docs.pipecat.ai/server/services/stt/riva), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [Sarvam](https://docs.pipecat.ai/server/services/stt/sarvam), [Soniox](https://docs.pipecat.ai/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/server/services/stt/speechmatics), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) |
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [Mistral](https://docs.pipecat.ai/server/services/llm/mistral), [Novita](https://docs.pipecat.ai/server/services/llm/novita), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nvidia), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/server/services/llm/sambanova), [Sarvam](https://docs.pipecat.ai/server/services/llm/sarvam), [Together AI](https://docs.pipecat.ai/server/services/llm/together) |
| Text-to-Speech | [Async](https://docs.pipecat.ai/server/services/tts/asyncai), [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Camb AI](https://docs.pipecat.ai/server/services/tts/camb), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [Gradium](https://docs.pipecat.ai/server/services/tts/gradium), [Groq](https://docs.pipecat.ai/server/services/tts/groq), [Hume](https://docs.pipecat.ai/server/services/tts/hume), [Inworld](https://docs.pipecat.ai/server/services/tts/inworld), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [NVIDIA Riva](https://docs.pipecat.ai/server/services/tts/riva), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [Resemble](https://docs.pipecat.ai/server/services/tts/resemble), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [Smallest](https://docs.pipecat.ai/server/services/tts/smallest), [Speechmatics](https://docs.pipecat.ai/server/services/tts/speechmatics), [xAI](https://docs.pipecat.ai/server/services/tts/xai), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
| Speech-to-Speech | [AWS Nova Sonic](https://docs.pipecat.ai/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [Grok Voice Agent](https://docs.pipecat.ai/server/services/s2s/grok), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai), [Ultravox](https://docs.pipecat.ai/server/services/s2s/ultravox), |

View File

@@ -0,0 +1 @@
- Removed `SambaNovaSTTService`. SambaNova no longer offers speech-to-text audio models. Use another STT provider instead.

View File

@@ -1,121 +0,0 @@
#
# Copyright (c) 2024-2026, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
import os
import time
from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.audio.vad.vad_analyzer import VADParams
from pipecat.frames.frames import Frame, TranscriptionFrame, UserStoppedSpeakingFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.audio.vad_processor import VADProcessor
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
from pipecat.runner.types import RunnerArguments
from pipecat.runner.utils import create_transport
from pipecat.services.sambanova.stt import SambaNovaSTTService
from pipecat.transports.base_transport import BaseTransport, TransportParams
from pipecat.transports.daily.transport import DailyParams
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
load_dotenv(override=True)
STOP_SECS = 2.0
class TranscriptionLogger(FrameProcessor):
"""Measures transcription latency.
Uses the (intentionally) long STOP_SECS parameter to give the transcription time to finish,
then outputs the timing between when the VAD first classified audio input as not-speech and
the delivery of the last transcription frame.
"""
def __init__(self):
super().__init__()
self._last_transcription_time = time.time()
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)
if isinstance(frame, UserStoppedSpeakingFrame):
logger.debug(
f"Transcription latency: {(STOP_SECS - (time.time() - self._last_transcription_time)):.2f}"
)
if isinstance(frame, TranscriptionFrame):
self._last_transcription_time = time.time()
# Push all frames through
await self.push_frame(frame, direction)
# We use lambdas to defer transport parameter creation until the transport
# type is selected at runtime.
transport_params = {
"daily": lambda: DailyParams(
audio_in_enabled=True,
),
"twilio": lambda: FastAPIWebsocketParams(
audio_in_enabled=True,
),
"webrtc": lambda: TransportParams(
audio_in_enabled=True,
),
}
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
stt = SambaNovaSTTService(
settings=SambaNovaSTTService.Settings(
model="Whisper-Large-v3",
),
api_key=os.getenv("SAMBANOVA_API_KEY"),
)
tl = TranscriptionLogger()
vad_processor = VADProcessor(
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=STOP_SECS))
)
pipeline = Pipeline([transport.input(), vad_processor, stt, tl])
task = PipelineTask(
pipeline,
params=PipelineParams(
enable_metrics=True,
enable_usage_metrics=True,
),
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
)
@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info(f"Client disconnected")
await task.cancel()
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)
async def bot(runner_args: RunnerArguments):
"""Main bot entry point compatible with Pipecat Cloud."""
transport = await create_transport(runner_args, transport_params)
await run_bot(transport, runner_args)
if __name__ == "__main__":
from pipecat.runner.run import main
main()

View File

@@ -25,9 +25,9 @@ from pipecat.processors.aggregators.llm_response_universal import (
from pipecat.runner.types import RunnerArguments
from pipecat.runner.utils import create_transport
from pipecat.services.cartesia.tts import CartesiaTTSService
from pipecat.services.deepgram.stt import DeepgramSTTService
from pipecat.services.llm_service import FunctionCallParams
from pipecat.services.sambanova.llm import SambaNovaLLMService
from pipecat.services.sambanova.stt import SambaNovaSTTService
from pipecat.transports.base_transport import BaseTransport, TransportParams
from pipecat.transports.daily.transport import DailyParams
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
@@ -60,9 +60,8 @@ transport_params = {
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
stt = SambaNovaSTTService(
model="Whisper-Large-v3",
api_key=os.getenv("SAMBANOVA_API_KEY"),
stt = DeepgramSTTService(
api_key=os.getenv("DEEPGRAM_API_KEY"),
)
tts = CartesiaTTSService(

View File

@@ -5,4 +5,3 @@
#
from .llm import *
from .stt import *

View File

@@ -1,143 +0,0 @@
#
# Copyright (c) 2024-2026, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#
"""SambaNova's Speech-to-Text service implementation for real-time transcription."""
from dataclasses import dataclass
from typing import Any, Optional
from loguru import logger
from pipecat.services.stt_latency import SAMBANOVA_TTFS_P99
from pipecat.services.whisper.base_stt import (
BaseWhisperSTTService,
Transcription,
)
from pipecat.transcriptions.language import Language
@dataclass
class SambaNovaSTTSettings(BaseWhisperSTTService.Settings):
"""Settings for the SambaNova STT service."""
pass
class SambaNovaSTTService(BaseWhisperSTTService): # type: ignore
"""SambaNova Whisper speech-to-text service.
Uses SambaNova's Whisper API to convert audio to text.
Requires a SambaNova API key set via the api_key parameter or SAMBANOVA_API_KEY environment variable.
"""
Settings = SambaNovaSTTSettings
def __init__(
self,
*,
model: Optional[str] = None,
api_key: Optional[str] = None,
base_url: str = "https://api.sambanova.ai/v1",
language: Optional[Language] = None,
prompt: Optional[str] = None,
temperature: Optional[float] = None,
settings: Optional[Settings] = None,
ttfs_p99_latency: Optional[float] = SAMBANOVA_TTFS_P99,
**kwargs: Any,
) -> None:
"""Initialize SambaNova STT service.
Args:
model: Whisper model to use.
.. deprecated:: 0.0.105
Use ``settings=SambaNovaSTTService.Settings(model=...)`` instead.
api_key: SambaNova API key. Defaults to None.
base_url: API base URL. Defaults to "https://api.sambanova.ai/v1".
language: Language of the audio input.
.. deprecated:: 0.0.105
Use ``settings=SambaNovaSTTService.Settings(language=...)`` instead.
prompt: Optional text to guide the model's style or continue a previous segment.
.. deprecated:: 0.0.105
Use ``settings=SambaNovaSTTService.Settings(prompt=...)`` instead.
temperature: Optional sampling temperature between 0 and 1.
.. deprecated:: 0.0.105
Use ``settings=SambaNovaSTTService.Settings(temperature=...)`` instead.
settings: Runtime-updatable settings. When provided alongside deprecated
parameters, ``settings`` values take precedence.
ttfs_p99_latency: P99 latency from speech end to final transcript in seconds.
Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark
**kwargs: Additional arguments passed to `pipecat.services.whisper.base_stt.BaseWhisperSTTService`.
"""
# --- 1. Hardcoded defaults ---
default_settings = self.Settings(
model="Whisper-Large-v3",
language=Language.EN,
prompt=None,
temperature=None,
)
# --- 2. Deprecated direct-arg overrides ---
if model is not None:
self._warn_init_param_moved_to_settings("model", "model")
default_settings.model = model
if language is not None:
self._warn_init_param_moved_to_settings("language", "language")
default_settings.language = language
if prompt is not None:
self._warn_init_param_moved_to_settings("prompt", "prompt")
default_settings.prompt = prompt
if temperature is not None:
self._warn_init_param_moved_to_settings("temperature", "temperature")
default_settings.temperature = temperature
# --- 3. (no params object for this service) ---
# --- 4. Settings delta (canonical API, always wins) ---
if settings is not None:
default_settings.apply_update(settings)
super().__init__(
api_key=api_key,
base_url=base_url,
settings=default_settings,
ttfs_p99_latency=ttfs_p99_latency,
**kwargs,
)
async def _transcribe(self, audio: bytes) -> Transcription:
assert self._settings.language is not None
if self._include_prob_metrics:
# https://docs.sambanova.ai/docs/en/features/audio#request-parameters
logger.warning(
"SambaNova STT does not support probability metrics "
"(include_prob_metrics parameter has no effect). "
"Check their docs: https://docs.sambanova.ai/docs/en/features/audio#request-parameters for more details."
)
# Build kwargs dict with only set parameters
kwargs = {
"file": ("audio.wav", audio, "audio/wav"),
"model": self._settings.model,
"response_format": "json",
"language": self._settings.language,
}
if self._settings.prompt is not None:
kwargs["prompt"] = self._settings.prompt
if self._settings.temperature is not None:
kwargs["temperature"] = self._settings.temperature
return await self._client.audio.transcriptions.create(**kwargs)

View File

@@ -46,7 +46,6 @@ GRADIUM_TTFS_P99: float = 1.61
GROQ_TTFS_P99: float = 1.54
OPENAI_TTFS_P99: float = 2.01
OPENAI_REALTIME_TTFS_P99: float = 1.66
SAMBANOVA_TTFS_P99: float = 2.20
SARVAM_TTFS_P99: float = 1.17
SONIOX_TTFS_P99: float = 0.35
SPEECHMATICS_TTFS_P99: float = 0.74