Add output.audio.played message handling and update documentation

- Introduced `output.audio.played` message type for client acknowledgment of audio playback completion.
- Updated `DuplexPipeline` to track client playback state and handle playback completion events.
- Enhanced session handling to route `output.audio.played` messages to the pipeline.
- Revised API documentation to include details about the new message type and its fields.
- Updated schema documentation to reflect the addition of `output.audio.played` in the message flow.
This commit is contained in:
Xin Wang
2026-03-04 10:01:34 +08:00
parent 80fff09b76
commit 7d4af18815
8 changed files with 275 additions and 19 deletions

View File

@@ -1,7 +1,7 @@
import pytest
from core.session import Session, WsSessionState
from models.ws_v1 import SessionStartMessage, parse_client_message
from models.ws_v1 import OutputAudioPlayedMessage, SessionStartMessage, parse_client_message
def _session() -> Session:
@@ -16,6 +16,17 @@ def test_parse_client_message_rejects_hello_message():
parse_client_message({"type": "hello", "version": "v1"})
def test_parse_client_message_accepts_output_audio_played():
message = parse_client_message({"type": "output.audio.played", "tts_id": "tts_001"})
assert isinstance(message, OutputAudioPlayedMessage)
assert message.tts_id == "tts_001"
def test_parse_client_message_rejects_output_audio_played_without_tts_id():
with pytest.raises(ValueError, match="tts_id"):
parse_client_message({"type": "output.audio.played", "tts_id": ""})
@pytest.mark.asyncio
async def test_handle_text_reports_invalid_message_for_hello():
session = Session.__new__(Session)
@@ -42,6 +53,45 @@ async def test_handle_text_reports_invalid_message_for_hello():
assert "Unknown client message type: hello" in message
@pytest.mark.asyncio
async def test_handle_v1_message_routes_output_audio_played_to_pipeline():
session = Session.__new__(Session)
session.id = "sess_output_audio_played"
session.ws_state = WsSessionState.ACTIVE
received = {}
class _Pipeline:
async def handle_output_audio_played(self, **payload):
received.update(payload)
session.pipeline = _Pipeline()
async def _send_error(sender, message, code, **kwargs):
raise AssertionError(f"Unexpected error: sender={sender} code={code} message={message} kwargs={kwargs}")
session._send_error = _send_error
await session._handle_v1_message(
OutputAudioPlayedMessage(
type="output.audio.played",
tts_id="tts_001",
response_id="resp_001",
turn_id="turn_001",
played_at_ms=1730000018450,
played_ms=2520,
)
)
assert received == {
"tts_id": "tts_001",
"response_id": "resp_001",
"turn_id": "turn_001",
"played_at_ms": 1730000018450,
"played_ms": 2520,
}
def test_validate_metadata_rejects_services_payload():
session = _session()
sanitized, error = session._validate_and_sanitize_client_metadata({"services": {"llm": {"provider": "openai"}}})