Enhance session management and logging configuration

- Updated .env.example to clarify audio frame size validation and default codec settings. - Refactored logging setup in main.py to support JSON serialization based on log format configuration. - Improved session.py to dynamically compute audio frame bytes and include protocol version in session events. - Added tests to validate session start events and audio frame handling based on chunk size settings.
2026-03-05 21:44:23 +08:00
parent 1cecbaa172
commit 6b589a1b7c
4 changed files with 105 additions and 17 deletions
--- a/engine/.env.example
+++ b/engine/.env.example
@@ -26,22 +26,27 @@ HISTORY_FINALIZE_DRAIN_TIMEOUT_SEC=1.5
 SAMPLE_RATE=16000
 # 20ms is recommended for VAD stability and latency.
 # 100ms works but usually worsens start-of-speech accuracy.
+# WS binary audio frame size validation is derived from SAMPLE_RATE + CHUNK_SIZE_MS.
+# Client frame payloads must be a multiple of: SAMPLE_RATE * 2 * (CHUNK_SIZE_MS / 1000).
 CHUNK_SIZE_MS=20
+# Public default output codec exposed in config.resolved (overridable by runtime metadata).
 DEFAULT_CODEC=pcm
 MAX_AUDIO_BUFFER_SECONDS=30

 # Local assistant/agent YAML directory. In local mode the runtime resolves:
 #   ASSISTANT_LOCAL_CONFIG_DIR/<assistant_id>.yaml
-ASSISTANT_LOCAL_CONFIG_DIR=engine/config/agents
+ASSISTANT_LOCAL_CONFIG_DIR=config/agents

 # Logging
 LOG_LEVEL=INFO
 # json is better for production/observability; text is easier locally.
+# Controls both console and file log serialization/format.
 LOG_FORMAT=json

 # WebSocket behavior
 INACTIVITY_TIMEOUT_SEC=60
 HEARTBEAT_INTERVAL_SEC=50
+# Public protocol label emitted in session.started/config.resolved payloads.
 WS_PROTOCOL_VERSION=v1

 # CORS / ICE (JSON strings)