Enhance session management and logging configuration
- Updated .env.example to clarify audio frame size validation and default codec settings. - Refactored logging setup in main.py to support JSON serialization based on log format configuration. - Improved session.py to dynamically compute audio frame bytes and include protocol version in session events. - Added tests to validate session start events and audio frame handling based on chunk size settings.
This commit is contained in:
@@ -54,7 +54,7 @@ class Session:
|
||||
TRACK_AUDIO_IN = "audio_in"
|
||||
TRACK_AUDIO_OUT = "audio_out"
|
||||
TRACK_CONTROL = "control"
|
||||
AUDIO_FRAME_BYTES = 640 # 16k mono pcm_s16le, 20ms
|
||||
AUDIO_FRAME_BYTES = 640 # Legacy fallback: 16k mono pcm_s16le, 20ms
|
||||
_METADATA_ALLOWED_TOP_LEVEL_KEYS = {
|
||||
"overrides",
|
||||
"dynamicVariables",
|
||||
@@ -111,6 +111,7 @@ class Session:
|
||||
self.id = session_id
|
||||
self.transport = transport
|
||||
self.use_duplex = use_duplex if use_duplex is not None else settings.duplex_enabled
|
||||
self.audio_frame_bytes = self._compute_audio_frame_bytes()
|
||||
self._assistant_id = str(assistant_id or "").strip() or None
|
||||
self._backend_gateway = backend_gateway or build_backend_adapter_from_settings()
|
||||
self._history_bridge = SessionHistoryBridge(
|
||||
@@ -210,11 +211,14 @@ class Session:
|
||||
)
|
||||
return
|
||||
|
||||
frame_bytes = self.AUDIO_FRAME_BYTES
|
||||
frame_bytes = getattr(self, "audio_frame_bytes", self._compute_audio_frame_bytes())
|
||||
if len(audio_bytes) % frame_bytes != 0:
|
||||
await self._send_error(
|
||||
"client",
|
||||
f"Audio frame size must be a multiple of {frame_bytes} bytes (20ms PCM)",
|
||||
(
|
||||
f"Audio frame size must be a multiple of {frame_bytes} bytes "
|
||||
f"({settings.chunk_size_ms}ms PCM @ {settings.sample_rate}Hz)"
|
||||
),
|
||||
"audio.frame_size_mismatch",
|
||||
stage="audio",
|
||||
retryable=False,
|
||||
@@ -384,6 +388,7 @@ class Session:
|
||||
ev(
|
||||
"session.started",
|
||||
trackId=self.current_track_id,
|
||||
protocolVersion=self._public_ws_protocol_version(),
|
||||
tracks={
|
||||
"audio_in": self.TRACK_AUDIO_IN,
|
||||
"audio_out": self.TRACK_AUDIO_OUT,
|
||||
@@ -1137,6 +1142,7 @@ class Session:
|
||||
output_mode = str(runtime_output.get("mode") or "").strip().lower() if isinstance(runtime_output, dict) else ""
|
||||
if output_mode not in {"audio", "text"}:
|
||||
output_mode = "audio"
|
||||
output_codec = str(runtime_output.get("codec") or settings.default_codec or "pcm").strip().lower() or "pcm"
|
||||
|
||||
tools_allowlist: List[str] = []
|
||||
runtime_tools = runtime.get("tools", {}) if isinstance(runtime, dict) else {}
|
||||
@@ -1146,7 +1152,11 @@ class Session:
|
||||
tools_allowlist = [str(item) for item in allowlist if item is not None and str(item).strip()]
|
||||
|
||||
resolved: Dict[str, Any] = {
|
||||
"output": {"mode": output_mode},
|
||||
"protocolVersion": self._public_ws_protocol_version(),
|
||||
"output": {
|
||||
"mode": output_mode,
|
||||
"codec": output_codec,
|
||||
},
|
||||
"tools": {
|
||||
"enabled": bool(tools_allowlist),
|
||||
"count": len(tools_allowlist),
|
||||
@@ -1162,6 +1172,24 @@ class Session:
|
||||
|
||||
return resolved
|
||||
|
||||
@staticmethod
|
||||
def _compute_audio_frame_bytes() -> int:
|
||||
"""Compute expected PCM frame bytes from SAMPLE_RATE and CHUNK_SIZE_MS."""
|
||||
sample_rate = max(1, int(getattr(settings, "sample_rate", 16000)))
|
||||
chunk_ms = max(1, int(getattr(settings, "chunk_size_ms", 20)))
|
||||
bytes_per_frame = int(round(sample_rate * 2 * (chunk_ms / 1000.0)))
|
||||
if bytes_per_frame < 2:
|
||||
bytes_per_frame = 2
|
||||
if bytes_per_frame % 2 != 0:
|
||||
bytes_per_frame += 1
|
||||
return bytes_per_frame
|
||||
|
||||
@staticmethod
|
||||
def _public_ws_protocol_version() -> str:
|
||||
"""Return public protocol version label announced to clients."""
|
||||
version = str(getattr(settings, "ws_protocol_version", "v1") or "v1").strip()
|
||||
return version or "v1"
|
||||
|
||||
def _extract_json_obj(self, text: str) -> Optional[Dict[str, Any]]:
|
||||
"""Best-effort extraction of a JSON object from freeform text."""
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user