feat(rtvi): type UI accessibility snapshots

This commit is contained in:
Mark Backman
2026-05-06 11:29:19 -04:00
parent 41124dc494
commit d18fe7c39c
2 changed files with 167 additions and 7 deletions

View File

@@ -20,7 +20,7 @@ from typing import (
Literal,
)
from pydantic import BaseModel
from pydantic import BaseModel, ConfigDict
from pipecat.frames.frames import (
AggregationType,
@@ -617,18 +617,91 @@ class UICommandData(BaseModel):
payload: Any | None = None
class A11yNode(BaseModel):
"""One node in the UI accessibility snapshot tree.
Mirrors the client-side ``A11yNode`` wire shape. Extra fields are
allowed so clients can add platform-specific or future metadata
without breaking older servers.
Parameters:
ref: Stable client-assigned element reference.
role: ARIA-style role for the node.
name: Optional accessible name.
value: Optional current value for inputs/progress/etc.
state: Optional short state tags (e.g. ``"focused"``,
``"disabled"``, ``"offscreen"``).
level: Optional heading level.
colcount: Optional column count for grid-like containers.
rowcount: Optional row count for grid-like containers.
children: Optional child nodes.
"""
model_config = ConfigDict(extra="allow")
ref: str
role: str
name: str | None = None
value: str | None = None
state: list[str] | None = None
level: int | None = None
colcount: int | None = None
rowcount: int | None = None
children: list["A11yNode"] | None = None
class A11ySelection(BaseModel):
"""The user's current text selection in the UI snapshot.
Extra fields are allowed for forward compatibility with client
snapshot additions.
Parameters:
ref: Ref of the element that carries the selection.
text: Selected text.
start_offset: Optional selection start offset.
end_offset: Optional selection end offset.
"""
model_config = ConfigDict(extra="allow")
ref: str
text: str
start_offset: int | None = None
end_offset: int | None = None
class A11ySnapshot(BaseModel):
"""Client accessibility snapshot sent in a ``ui-snapshot`` message.
Mirrors the client-side ``A11ySnapshot`` wire shape. Extra fields
are allowed so clients can add compatible metadata over time.
Parameters:
root: Root accessibility node.
captured_at: Client-side epoch milliseconds when captured.
selection: Optional current text selection.
"""
model_config = ConfigDict(extra="allow")
root: A11yNode
captured_at: int
selection: A11ySelection | None = None
class UISnapshotData(BaseModel):
"""Inner ``data`` for a ``ui-snapshot`` message.
The accessibility snapshot tree is opaque on the server side.
The client owns its shape; the server stores it as-is for
rendering into the LLM context.
The accessibility snapshot tree mirrors the client-side
``A11ySnapshot`` wire shape and is kept forward-compatible by
allowing extra fields on the snapshot models.
Parameters:
tree: The serialized accessibility tree.
"""
tree: Any | None = None
tree: A11ySnapshot
class UICancelTaskData(BaseModel):

View File

@@ -19,6 +19,9 @@ from pipecat.processors.frameworks.rtvi.frames import (
RTVIUIEventFrame,
)
from pipecat.processors.frameworks.rtvi.models import (
A11yNode,
A11ySelection,
A11ySnapshot,
Click,
Focus,
Highlight,
@@ -72,17 +75,101 @@ class TestEnvelopeMessages(unittest.TestCase):
)
def test_ui_snapshot_envelope(self):
msg = UISnapshotMessage(id="m2", data=UISnapshotData(tree={"root": "..."}))
msg = UISnapshotMessage(
id="m2",
data=UISnapshotData(
tree=A11ySnapshot(
root=A11yNode(
ref="e1",
role="main",
children=[A11yNode(ref="e2", role="button", name="Save")],
),
captured_at=42,
selection=A11ySelection(ref="e2", text="Save", start_offset=0, end_offset=4),
)
),
)
self.assertEqual(
msg.model_dump(),
{
"label": "rtvi-ai",
"type": "ui-snapshot",
"id": "m2",
"data": {"tree": {"root": "..."}},
"data": {
"tree": {
"root": {
"ref": "e1",
"role": "main",
"name": None,
"value": None,
"state": None,
"level": None,
"colcount": None,
"rowcount": None,
"children": [
{
"ref": "e2",
"role": "button",
"name": "Save",
"value": None,
"state": None,
"level": None,
"colcount": None,
"rowcount": None,
"children": None,
}
],
},
"captured_at": 42,
"selection": {
"ref": "e2",
"text": "Save",
"start_offset": 0,
"end_offset": 4,
},
}
},
},
)
def test_ui_snapshot_allows_future_client_fields(self):
msg = UISnapshotMessage.model_validate(
{
"id": "m2",
"data": {
"tree": {
"root": {
"ref": "e1",
"role": "main",
"bounds": {"x": 1, "y": 2},
"children": [
{
"ref": "e2",
"role": "button",
"name": "Save",
"platform_state": {"pressed": False},
}
],
},
"captured_at": 42,
"selection": {
"ref": "e2",
"text": "Save",
"direction": "forward",
},
"viewport": {"width": 1024, "height": 768},
}
},
}
)
dumped = msg.model_dump()
tree = dumped["data"]["tree"]
self.assertEqual(tree["root"]["bounds"], {"x": 1, "y": 2})
self.assertEqual(tree["root"]["children"][0]["platform_state"], {"pressed": False})
self.assertEqual(tree["selection"]["direction"], "forward")
self.assertEqual(tree["viewport"], {"width": 1024, "height": 768})
def test_ui_cancel_task_envelope(self):
msg = UICancelTaskMessage(id="m3", data=UICancelTaskData(task_id="t-99", reason="user"))
self.assertEqual(