Remove SambaNovaSTTService
SambaNova no longer offers speech-to-text audio models.
This commit is contained in:
@@ -87,7 +87,7 @@ Catch new features, interviews, and how-tos on our [Pipecat TV](https://www.yout
|
||||
|
||||
| Category | Services |
|
||||
| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/stt/elevenlabs), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Gradium](https://docs.pipecat.ai/server/services/stt/gradium), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [NVIDIA Riva](https://docs.pipecat.ai/server/services/stt/riva), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [SambaNova (Whisper)](https://docs.pipecat.ai/server/services/stt/sambanova), [Sarvam](https://docs.pipecat.ai/server/services/stt/sarvam), [Soniox](https://docs.pipecat.ai/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/server/services/stt/speechmatics), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) |
|
||||
| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/stt/elevenlabs), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Gradium](https://docs.pipecat.ai/server/services/stt/gradium), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [NVIDIA Riva](https://docs.pipecat.ai/server/services/stt/riva), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [Sarvam](https://docs.pipecat.ai/server/services/stt/sarvam), [Soniox](https://docs.pipecat.ai/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/server/services/stt/speechmatics), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) |
|
||||
| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [Mistral](https://docs.pipecat.ai/server/services/llm/mistral), [Novita](https://docs.pipecat.ai/server/services/llm/novita), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nvidia), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/server/services/llm/sambanova), [Sarvam](https://docs.pipecat.ai/server/services/llm/sarvam), [Together AI](https://docs.pipecat.ai/server/services/llm/together) |
|
||||
| Text-to-Speech | [Async](https://docs.pipecat.ai/server/services/tts/asyncai), [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Camb AI](https://docs.pipecat.ai/server/services/tts/camb), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [Gradium](https://docs.pipecat.ai/server/services/tts/gradium), [Groq](https://docs.pipecat.ai/server/services/tts/groq), [Hume](https://docs.pipecat.ai/server/services/tts/hume), [Inworld](https://docs.pipecat.ai/server/services/tts/inworld), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [NVIDIA Riva](https://docs.pipecat.ai/server/services/tts/riva), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [Resemble](https://docs.pipecat.ai/server/services/tts/resemble), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [Smallest](https://docs.pipecat.ai/server/services/tts/smallest), [Speechmatics](https://docs.pipecat.ai/server/services/tts/speechmatics), [xAI](https://docs.pipecat.ai/server/services/tts/xai), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) |
|
||||
| Speech-to-Speech | [AWS Nova Sonic](https://docs.pipecat.ai/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [Grok Voice Agent](https://docs.pipecat.ai/server/services/s2s/grok), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai), [Ultravox](https://docs.pipecat.ai/server/services/s2s/ultravox), |
|
||||
|
||||
1
changelog/4154.removed.md
Normal file
1
changelog/4154.removed.md
Normal file
@@ -0,0 +1 @@
|
||||
- Removed `SambaNovaSTTService`. SambaNova no longer offers speech-to-text audio models. Use another STT provider instead.
|
||||
@@ -1,121 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import os
|
||||
import time
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.frames.frames import Frame, TranscriptionFrame, UserStoppedSpeakingFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.audio.vad_processor import VADProcessor
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.sambanova.stt import SambaNovaSTTService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
STOP_SECS = 2.0
|
||||
|
||||
|
||||
class TranscriptionLogger(FrameProcessor):
|
||||
"""Measures transcription latency.
|
||||
|
||||
Uses the (intentionally) long STOP_SECS parameter to give the transcription time to finish,
|
||||
then outputs the timing between when the VAD first classified audio input as not-speech and
|
||||
the delivery of the last transcription frame.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self._last_transcription_time = time.time()
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, UserStoppedSpeakingFrame):
|
||||
logger.debug(
|
||||
f"Transcription latency: {(STOP_SECS - (time.time() - self._last_transcription_time)):.2f}"
|
||||
)
|
||||
|
||||
if isinstance(frame, TranscriptionFrame):
|
||||
self._last_transcription_time = time.time()
|
||||
|
||||
# Push all frames through
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
# We use lambdas to defer transport parameter creation until the transport
|
||||
# type is selected at runtime.
|
||||
transport_params = {
|
||||
"daily": lambda: DailyParams(
|
||||
audio_in_enabled=True,
|
||||
),
|
||||
"twilio": lambda: FastAPIWebsocketParams(
|
||||
audio_in_enabled=True,
|
||||
),
|
||||
"webrtc": lambda: TransportParams(
|
||||
audio_in_enabled=True,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = SambaNovaSTTService(
|
||||
settings=SambaNovaSTTService.Settings(
|
||||
model="Whisper-Large-v3",
|
||||
),
|
||||
api_key=os.getenv("SAMBANOVA_API_KEY"),
|
||||
)
|
||||
|
||||
tl = TranscriptionLogger()
|
||||
vad_processor = VADProcessor(
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=STOP_SECS))
|
||||
)
|
||||
|
||||
pipeline = Pipeline([transport.input(), vad_processor, stt, tl])
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
idle_timeout_secs=runner_args.pipeline_idle_timeout_secs,
|
||||
)
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
async def on_client_disconnected(transport, client):
|
||||
logger.info(f"Client disconnected")
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
async def bot(runner_args: RunnerArguments):
|
||||
"""Main bot entry point compatible with Pipecat Cloud."""
|
||||
transport = await create_transport(runner_args, transport_params)
|
||||
await run_bot(transport, runner_args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pipecat.runner.run import main
|
||||
|
||||
main()
|
||||
@@ -25,9 +25,9 @@ from pipecat.processors.aggregators.llm_response_universal import (
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.llm_service import FunctionCallParams
|
||||
from pipecat.services.sambanova.llm import SambaNovaLLMService
|
||||
from pipecat.services.sambanova.stt import SambaNovaSTTService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams
|
||||
@@ -60,9 +60,8 @@ transport_params = {
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = SambaNovaSTTService(
|
||||
model="Whisper-Large-v3",
|
||||
api_key=os.getenv("SAMBANOVA_API_KEY"),
|
||||
stt = DeepgramSTTService(
|
||||
api_key=os.getenv("DEEPGRAM_API_KEY"),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
|
||||
@@ -5,4 +5,3 @@
|
||||
#
|
||||
|
||||
from .llm import *
|
||||
from .stt import *
|
||||
|
||||
@@ -1,143 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2024-2026, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""SambaNova's Speech-to-Text service implementation for real-time transcription."""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Optional
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.services.stt_latency import SAMBANOVA_TTFS_P99
|
||||
from pipecat.services.whisper.base_stt import (
|
||||
BaseWhisperSTTService,
|
||||
Transcription,
|
||||
)
|
||||
from pipecat.transcriptions.language import Language
|
||||
|
||||
|
||||
@dataclass
|
||||
class SambaNovaSTTSettings(BaseWhisperSTTService.Settings):
|
||||
"""Settings for the SambaNova STT service."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class SambaNovaSTTService(BaseWhisperSTTService): # type: ignore
|
||||
"""SambaNova Whisper speech-to-text service.
|
||||
|
||||
Uses SambaNova's Whisper API to convert audio to text.
|
||||
Requires a SambaNova API key set via the api_key parameter or SAMBANOVA_API_KEY environment variable.
|
||||
"""
|
||||
|
||||
Settings = SambaNovaSTTSettings
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
model: Optional[str] = None,
|
||||
api_key: Optional[str] = None,
|
||||
base_url: str = "https://api.sambanova.ai/v1",
|
||||
language: Optional[Language] = None,
|
||||
prompt: Optional[str] = None,
|
||||
temperature: Optional[float] = None,
|
||||
settings: Optional[Settings] = None,
|
||||
ttfs_p99_latency: Optional[float] = SAMBANOVA_TTFS_P99,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Initialize SambaNova STT service.
|
||||
|
||||
Args:
|
||||
model: Whisper model to use.
|
||||
|
||||
.. deprecated:: 0.0.105
|
||||
Use ``settings=SambaNovaSTTService.Settings(model=...)`` instead.
|
||||
|
||||
api_key: SambaNova API key. Defaults to None.
|
||||
base_url: API base URL. Defaults to "https://api.sambanova.ai/v1".
|
||||
language: Language of the audio input.
|
||||
|
||||
.. deprecated:: 0.0.105
|
||||
Use ``settings=SambaNovaSTTService.Settings(language=...)`` instead.
|
||||
|
||||
prompt: Optional text to guide the model's style or continue a previous segment.
|
||||
|
||||
.. deprecated:: 0.0.105
|
||||
Use ``settings=SambaNovaSTTService.Settings(prompt=...)`` instead.
|
||||
|
||||
temperature: Optional sampling temperature between 0 and 1.
|
||||
|
||||
.. deprecated:: 0.0.105
|
||||
Use ``settings=SambaNovaSTTService.Settings(temperature=...)`` instead.
|
||||
|
||||
settings: Runtime-updatable settings. When provided alongside deprecated
|
||||
parameters, ``settings`` values take precedence.
|
||||
ttfs_p99_latency: P99 latency from speech end to final transcript in seconds.
|
||||
Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark
|
||||
**kwargs: Additional arguments passed to `pipecat.services.whisper.base_stt.BaseWhisperSTTService`.
|
||||
"""
|
||||
# --- 1. Hardcoded defaults ---
|
||||
default_settings = self.Settings(
|
||||
model="Whisper-Large-v3",
|
||||
language=Language.EN,
|
||||
prompt=None,
|
||||
temperature=None,
|
||||
)
|
||||
|
||||
# --- 2. Deprecated direct-arg overrides ---
|
||||
if model is not None:
|
||||
self._warn_init_param_moved_to_settings("model", "model")
|
||||
default_settings.model = model
|
||||
if language is not None:
|
||||
self._warn_init_param_moved_to_settings("language", "language")
|
||||
default_settings.language = language
|
||||
if prompt is not None:
|
||||
self._warn_init_param_moved_to_settings("prompt", "prompt")
|
||||
default_settings.prompt = prompt
|
||||
if temperature is not None:
|
||||
self._warn_init_param_moved_to_settings("temperature", "temperature")
|
||||
default_settings.temperature = temperature
|
||||
|
||||
# --- 3. (no params object for this service) ---
|
||||
|
||||
# --- 4. Settings delta (canonical API, always wins) ---
|
||||
if settings is not None:
|
||||
default_settings.apply_update(settings)
|
||||
|
||||
super().__init__(
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
settings=default_settings,
|
||||
ttfs_p99_latency=ttfs_p99_latency,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
async def _transcribe(self, audio: bytes) -> Transcription:
|
||||
assert self._settings.language is not None
|
||||
|
||||
if self._include_prob_metrics:
|
||||
# https://docs.sambanova.ai/docs/en/features/audio#request-parameters
|
||||
logger.warning(
|
||||
"SambaNova STT does not support probability metrics "
|
||||
"(include_prob_metrics parameter has no effect). "
|
||||
"Check their docs: https://docs.sambanova.ai/docs/en/features/audio#request-parameters for more details."
|
||||
)
|
||||
|
||||
# Build kwargs dict with only set parameters
|
||||
kwargs = {
|
||||
"file": ("audio.wav", audio, "audio/wav"),
|
||||
"model": self._settings.model,
|
||||
"response_format": "json",
|
||||
"language": self._settings.language,
|
||||
}
|
||||
|
||||
if self._settings.prompt is not None:
|
||||
kwargs["prompt"] = self._settings.prompt
|
||||
|
||||
if self._settings.temperature is not None:
|
||||
kwargs["temperature"] = self._settings.temperature
|
||||
|
||||
return await self._client.audio.transcriptions.create(**kwargs)
|
||||
@@ -46,7 +46,6 @@ GRADIUM_TTFS_P99: float = 1.61
|
||||
GROQ_TTFS_P99: float = 1.54
|
||||
OPENAI_TTFS_P99: float = 2.01
|
||||
OPENAI_REALTIME_TTFS_P99: float = 1.66
|
||||
SAMBANOVA_TTFS_P99: float = 2.20
|
||||
SARVAM_TTFS_P99: float = 1.17
|
||||
SONIOX_TTFS_P99: float = 0.35
|
||||
SPEECHMATICS_TTFS_P99: float = 0.74
|
||||
|
||||
Reference in New Issue
Block a user