From 644030584fb3ee9238f7f00fbfe2ed492022db4e Mon Sep 17 00:00:00 2001
From: Mark Backman <mark@daily.co>
Date: Tue, 12 May 2026 15:38:20 -0400
Subject: [PATCH] Centralize OpenAI audio constants

---
 src/pipecat/services/openai/_constants.py      | 10 ++++++++++
 src/pipecat/services/openai/realtime/events.py |  9 ++++-----
 src/pipecat/services/openai/realtime/llm.py    |  9 +++++----
 src/pipecat/services/openai/stt.py             | 15 ++++++---------
 src/pipecat/services/openai/tts.py             | 11 +++++------
 tests/test_settings.py                         |  3 ++-
 6 files changed, 32 insertions(+), 25 deletions(-)
 create mode 100644 src/pipecat/services/openai/_constants.py

diff --git a/src/pipecat/services/openai/_constants.py b/src/pipecat/services/openai/_constants.py
new file mode 100644
index 000000000..110c95cdb
--- /dev/null
+++ b/src/pipecat/services/openai/_constants.py
@@ -0,0 +1,10 @@
+#
+# Copyright (c) 2024-2026, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+"""Internal constants for OpenAI service integrations."""
+
+OPENAI_SAMPLE_RATE = 24000
+OPENAI_REALTIME_WHISPER_MODEL = "gpt-realtime-whisper"
diff --git a/src/pipecat/services/openai/realtime/events.py b/src/pipecat/services/openai/realtime/events.py
index 42d3b7e4d..215ab1b23 100644
--- a/src/pipecat/services/openai/realtime/events.py
+++ b/src/pipecat/services/openai/realtime/events.py
@@ -13,13 +13,12 @@ from typing import Any, Literal
 from pydantic import BaseModel, ConfigDict, Field
 
 from pipecat.adapters.schemas.tools_schema import ToolsSchema
+from pipecat.services.openai._constants import OPENAI_REALTIME_WHISPER_MODEL, OPENAI_SAMPLE_RATE
 
 #
 # session properties
 #
 
-GPT_REALTIME_WHISPER_MODEL = "gpt-realtime-whisper"
-
 
 class AudioFormat(BaseModel):
     """Base class for audio format configuration."""
@@ -36,7 +35,7 @@ class PCMAudioFormat(AudioFormat):
     """
 
     type: Literal["audio/pcm"] = "audio/pcm"
-    rate: Literal[24000] = 24000
+    rate: Literal[24000] = OPENAI_SAMPLE_RATE
 
 
 class PCMUAudioFormat(AudioFormat):
@@ -62,13 +61,13 @@ class PCMAAudioFormat(AudioFormat):
 class InputAudioTranscription(BaseModel):
     """Configuration for audio transcription settings."""
 
-    model: str = GPT_REALTIME_WHISPER_MODEL
+    model: str = OPENAI_REALTIME_WHISPER_MODEL
     language: str | None
     prompt: str | None
 
     def __init__(
         self,
-        model: str | None = GPT_REALTIME_WHISPER_MODEL,
+        model: str | None = OPENAI_REALTIME_WHISPER_MODEL,
         language: str | None = None,
         prompt: str | None = None,
     ):
diff --git a/src/pipecat/services/openai/realtime/llm.py b/src/pipecat/services/openai/realtime/llm.py
index e39299dc0..a95ceccfd 100644
--- a/src/pipecat/services/openai/realtime/llm.py
+++ b/src/pipecat/services/openai/realtime/llm.py
@@ -51,6 +51,7 @@ from pipecat.metrics.metrics import LLMTokenUsage
 from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.frame_processor import FrameDirection
 from pipecat.services.llm_service import FunctionCallFromLLM, LLMService
+from pipecat.services.openai._constants import OPENAI_REALTIME_WHISPER_MODEL, OPENAI_SAMPLE_RATE
 from pipecat.services.settings import (
     NOT_GIVEN,
     LLMSettings,
@@ -337,11 +338,11 @@ class OpenAIRealtimeLLMService(LLMService[OpenAIRealtimeLLMAdapter]):
             and session_properties.audio.input.transcription
             else None
         )
-        if transcription and transcription.model == events.GPT_REALTIME_WHISPER_MODEL:
+        if transcription and transcription.model == OPENAI_REALTIME_WHISPER_MODEL:
             if transcription.prompt:
                 transcription.prompt = None
                 logger.warning(
-                    f"{events.GPT_REALTIME_WHISPER_MODEL} does not support the prompt "
+                    f"{OPENAI_REALTIME_WHISPER_MODEL} does not support the prompt "
                     "parameter; omitting prompt from OpenAI Realtime input audio "
                     "transcription settings."
                 )
@@ -505,7 +506,7 @@ class OpenAIRealtimeLLMService(LLMService[OpenAIRealtimeLLMAdapter]):
         self._current_audio_response = None
 
     def _calculate_audio_duration_ms(
-        self, total_bytes: int, sample_rate: int = 24000, bytes_per_sample: int = 2
+        self, total_bytes: int, sample_rate: int = OPENAI_SAMPLE_RATE, bytes_per_sample: int = 2
     ) -> int:
         """Calculate audio duration in milliseconds based on PCM audio parameters."""
         samples = total_bytes / bytes_per_sample
@@ -797,7 +798,7 @@ class OpenAIRealtimeLLMService(LLMService[OpenAIRealtimeLLMAdapter]):
         self._current_audio_response.total_size += len(audio)
         frame = TTSAudioRawFrame(
             audio=audio,
-            sample_rate=24000,
+            sample_rate=OPENAI_SAMPLE_RATE,
             num_channels=1,
         )
         await self.push_frame(frame)
diff --git a/src/pipecat/services/openai/stt.py b/src/pipecat/services/openai/stt.py
index 8fb82b8df..862c0f3a6 100644
--- a/src/pipecat/services/openai/stt.py
+++ b/src/pipecat/services/openai/stt.py
@@ -36,6 +36,7 @@ from pipecat.frames.frames import (
     VADUserStoppedSpeakingFrame,
 )
 from pipecat.processors.frame_processor import FrameDirection
+from pipecat.services.openai._constants import OPENAI_REALTIME_WHISPER_MODEL, OPENAI_SAMPLE_RATE
 from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven, assert_given
 from pipecat.services.stt_latency import OPENAI_REALTIME_TTFS_P99, OPENAI_TTFS_P99
 from pipecat.services.stt_service import WebsocketSTTService
@@ -178,10 +179,6 @@ class OpenAISTTService(BaseWhisperSTTService):
         return await self._client.audio.transcriptions.create(**kwargs)
 
 
-_OPENAI_SAMPLE_RATE = 24000
-_OPENAI_REALTIME_WHISPER_MODEL = "gpt-realtime-whisper"
-
-
 @dataclass
 class OpenAIRealtimeSTTSettings(STTSettings):
     """Settings for OpenAIRealtimeSTTService.
@@ -308,7 +305,7 @@ class OpenAIRealtimeSTTService(WebsocketSTTService):
 
         # --- 1. Hardcoded defaults ---
         default_settings = self.Settings(
-            model=_OPENAI_REALTIME_WHISPER_MODEL,
+            model=OPENAI_REALTIME_WHISPER_MODEL,
             language=Language.EN,
             prompt=None,
             noise_reduction=None,
@@ -359,11 +356,11 @@ class OpenAIRealtimeSTTService(WebsocketSTTService):
     @staticmethod
     def _omit_unsupported_prompt(settings: OpenAIRealtimeSTTSettings) -> dict[str, Any]:
         """Drop prompt settings that are not accepted by the selected model."""
-        if settings.model == _OPENAI_REALTIME_WHISPER_MODEL and settings.prompt:
+        if settings.model == OPENAI_REALTIME_WHISPER_MODEL and settings.prompt:
             old_prompt = settings.prompt
             settings.prompt = None
             logger.warning(
-                f"{_OPENAI_REALTIME_WHISPER_MODEL} does not support the prompt parameter; "
+                f"{OPENAI_REALTIME_WHISPER_MODEL} does not support the prompt parameter; "
                 "omitting prompt from OpenAI Realtime transcription session."
             )
             return {"prompt": old_prompt}
@@ -572,7 +569,7 @@ class OpenAIRealtimeSTTService(WebsocketSTTService):
         input_audio: dict = {
             "format": {
                 "type": "audio/pcm",
-                "rate": _OPENAI_SAMPLE_RATE,
+                "rate": OPENAI_SAMPLE_RATE,
             },
             "transcription": transcription,
         }
@@ -609,7 +606,7 @@ class OpenAIRealtimeSTTService(WebsocketSTTService):
         Args:
             audio: Raw audio bytes at the pipeline sample rate.
         """
-        audio = await self._resampler.resample(audio, self.sample_rate, _OPENAI_SAMPLE_RATE)
+        audio = await self._resampler.resample(audio, self.sample_rate, OPENAI_SAMPLE_RATE)
         if not audio:
             return
         payload = base64.b64encode(audio).decode("utf-8")
diff --git a/src/pipecat/services/openai/tts.py b/src/pipecat/services/openai/tts.py
index a6528f59e..e6ebb4dd6 100644
--- a/src/pipecat/services/openai/tts.py
+++ b/src/pipecat/services/openai/tts.py
@@ -24,6 +24,7 @@ from pipecat.frames.frames import (
     StartFrame,
     TTSAudioRawFrame,
 )
+from pipecat.services.openai._constants import OPENAI_SAMPLE_RATE
 from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven, assert_given
 from pipecat.services.tts_service import TTSService
 from pipecat.utils.tracing.service_decorators import traced_tts
@@ -85,8 +86,6 @@ class OpenAITTSService(TTSService):
     Settings = OpenAITTSSettings
     _settings: Settings
 
-    OPENAI_SAMPLE_RATE = 24000  # OpenAI TTS always outputs at 24kHz
-
     class InputParams(BaseModel):
         """Input parameters for OpenAI TTS configuration.
 
@@ -150,9 +149,9 @@ class OpenAITTSService(TTSService):
                 parameters, ``settings`` values take precedence.
             **kwargs: Additional keyword arguments passed to TTSService.
         """
-        if sample_rate and sample_rate != self.OPENAI_SAMPLE_RATE:
+        if sample_rate and sample_rate != OPENAI_SAMPLE_RATE:
             logger.warning(
-                f"OpenAI TTS only supports {self.OPENAI_SAMPLE_RATE}Hz sample rate. "
+                f"OpenAI TTS only supports {OPENAI_SAMPLE_RATE}Hz sample rate. "
                 f"Current rate of {sample_rate}Hz may cause issues."
             )
 
@@ -217,9 +216,9 @@ class OpenAITTSService(TTSService):
             frame: The start frame containing initialization parameters.
         """
         await super().start(frame)
-        if self.sample_rate != self.OPENAI_SAMPLE_RATE:
+        if self.sample_rate != OPENAI_SAMPLE_RATE:
             logger.warning(
-                f"OpenAI TTS requires {self.OPENAI_SAMPLE_RATE}Hz sample rate. "
+                f"OpenAI TTS requires {OPENAI_SAMPLE_RATE}Hz sample rate. "
                 f"Current rate of {self.sample_rate}Hz may cause issues."
             )
 
diff --git a/tests/test_settings.py b/tests/test_settings.py
index 78c1f30b8..ab76a8c6c 100644
--- a/tests/test_settings.py
+++ b/tests/test_settings.py
@@ -12,6 +12,7 @@ from pipecat.services.deepgram.sagemaker.stt import DeepgramSageMakerSTTSettings
 from pipecat.services.deepgram.stt import DeepgramSTTService, DeepgramSTTSettings
 from pipecat.services.inworld.realtime import events as inworld_events
 from pipecat.services.inworld.realtime.llm import InworldRealtimeLLMSettings
+from pipecat.services.openai._constants import OPENAI_REALTIME_WHISPER_MODEL
 from pipecat.services.openai.realtime import events
 from pipecat.services.openai.realtime.llm import (
     OpenAIRealtimeLLMService,
@@ -757,7 +758,7 @@ class TestOpenAIRealtimeSessionProperties:
             audio=events.AudioConfiguration(
                 input=events.AudioInput(
                     transcription=events.InputAudioTranscription(
-                        model=events.GPT_REALTIME_WHISPER_MODEL,
+                        model=OPENAI_REALTIME_WHISPER_MODEL,
                         prompt="Keywords: metoprolol",
                     )
                 )