diff --git a/changelog/4026.fixed.md b/changelog/4026.fixed.md new file mode 100644 index 000000000..a12321ab4 --- /dev/null +++ b/changelog/4026.fixed.md @@ -0,0 +1 @@ +- Fixed `DeepgramSTTService` ignoring the `base_url` scheme when using `ws://` or `http://`. Previously these were silently overwritten with `wss://` / `https://`, breaking air-gapped or private deployments that don't use TLS. All scheme choices (`wss://`, `https://`, `ws://`, `http://`, or bare hostname) are now respected. diff --git a/src/pipecat/services/deepgram/stt.py b/src/pipecat/services/deepgram/stt.py index 7d849a160..32710d00c 100644 --- a/src/pipecat/services/deepgram/stt.py +++ b/src/pipecat/services/deepgram/stt.py @@ -247,6 +247,45 @@ class DeepgramSTTSettings(STTSettings): del self.extra[key] +def _derive_deepgram_urls(base_url: str) -> tuple[str, str]: + """Derive paired WebSocket and HTTP URLs from a single base URL. + + The Deepgram SDK client requires both a WebSocket URL (for streaming) + and an HTTP URL (for REST calls). This helper lets developers provide + a single ``base_url`` and consistently derives both, preserving the + security level they chose. Useful for air-gapped or private deployments + where insecure schemes (ws:// / http://) are acceptable. + + Accepted inputs: + - ``wss://`` or ``https://`` — secure (paired as wss + https) + - ``ws://`` or ``http://`` — insecure (paired as ws + http) + - Bare hostname (no scheme) — defaults to secure + - Unrecognized scheme — logs a warning, defaults to secure + + Args: + base_url: Host with optional scheme, port, and path. + + Returns: + A (ws_url, http_url) tuple with consistent schemes. + """ + known_schemes = ("wss://", "https://", "ws://", "http://") + if "://" in base_url: + scheme, host = base_url.split("://", 1) + scheme += "://" + if scheme not in known_schemes: + logger.warning( + f"Unrecognized scheme in base_url '{base_url}', defaulting to wss:// / https://" + ) + else: + scheme = "" + host = base_url + + insecure = scheme in ("ws://", "http://") + ws_url = f"{'ws' if insecure else 'wss'}://{host}" + http_url = f"{'http' if insecure else 'https'}://{host}" + return ws_url, http_url + + class DeepgramSTTService(STTService): """Deepgram speech-to-text service. @@ -445,8 +484,7 @@ class DeepgramSTTService(STTService): try: from deepgram import DeepgramClientEnvironment - ws_url = base_url if base_url.startswith("wss://") else f"wss://{base_url}" - http_url = base_url if base_url.startswith("https://") else f"https://{base_url}" + ws_url, http_url = _derive_deepgram_urls(base_url) environment = DeepgramClientEnvironment( base=http_url, production=ws_url, diff --git a/tests/test_deepgram_stt.py b/tests/test_deepgram_stt.py new file mode 100644 index 000000000..eb8036237 --- /dev/null +++ b/tests/test_deepgram_stt.py @@ -0,0 +1,51 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import io + +import pytest +from loguru import logger + +from pipecat.services.deepgram.stt import _derive_deepgram_urls + + +@pytest.mark.parametrize( + "base_url, expected_ws, expected_http", + [ + # Secure schemes + ("wss://mydeepgram.com", "wss://mydeepgram.com", "https://mydeepgram.com"), + ("https://mydeepgram.com", "wss://mydeepgram.com", "https://mydeepgram.com"), + # Insecure schemes (air-gapped deployments) + ("ws://mydeepgram.com", "ws://mydeepgram.com", "http://mydeepgram.com"), + ("http://mydeepgram.com", "ws://mydeepgram.com", "http://mydeepgram.com"), + # Bare hostname defaults to secure + ("mydeepgram.com", "wss://mydeepgram.com", "https://mydeepgram.com"), + # With port + ("ws://localhost:8080", "ws://localhost:8080", "http://localhost:8080"), + ("wss://localhost:443", "wss://localhost:443", "https://localhost:443"), + ("localhost:8080", "wss://localhost:8080", "https://localhost:8080"), + # With path + ("wss://host/v1/listen", "wss://host/v1/listen", "https://host/v1/listen"), + ("http://host/v1/listen", "ws://host/v1/listen", "http://host/v1/listen"), + ], +) +def test_derive_deepgram_urls(base_url, expected_ws, expected_http): + ws_url, http_url = _derive_deepgram_urls(base_url) + assert ws_url == expected_ws + assert http_url == expected_http + + +def test_derive_deepgram_urls_unknown_scheme_warns(): + sink = io.StringIO() + handler_id = logger.add(sink, format="{message}") + try: + ws_url, http_url = _derive_deepgram_urls("ftp://mydeepgram.com") + # Falls back to secure + assert ws_url == "wss://mydeepgram.com" + assert http_url == "https://mydeepgram.com" + assert "Unrecognized scheme" in sink.getvalue() + finally: + logger.remove(handler_id)