945 lines
35 KiB
Python
945 lines
35 KiB
Python
"""Session management for active calls."""
|
|
|
|
import asyncio
|
|
import hashlib
|
|
import json
|
|
import re
|
|
import time
|
|
from enum import Enum
|
|
from typing import Optional, Dict, Any, List
|
|
from loguru import logger
|
|
|
|
from app.backend_adapters import build_backend_adapter_from_settings
|
|
from core.transports import BaseTransport
|
|
from core.duplex_pipeline import DuplexPipeline
|
|
from core.conversation import ConversationTurn
|
|
from core.history_bridge import SessionHistoryBridge
|
|
from core.workflow_runner import WorkflowRunner, WorkflowTransition, WorkflowNodeDef, WorkflowEdgeDef
|
|
from app.config import settings
|
|
from services.base import LLMMessage
|
|
from models.ws_v1 import (
|
|
parse_client_message,
|
|
ev,
|
|
HelloMessage,
|
|
SessionStartMessage,
|
|
SessionStopMessage,
|
|
InputTextMessage,
|
|
ResponseCancelMessage,
|
|
ToolCallResultsMessage,
|
|
)
|
|
|
|
|
|
class WsSessionState(str, Enum):
|
|
"""Protocol state machine for WS sessions."""
|
|
|
|
WAIT_HELLO = "wait_hello"
|
|
WAIT_START = "wait_start"
|
|
ACTIVE = "active"
|
|
STOPPED = "stopped"
|
|
|
|
|
|
class Session:
|
|
"""
|
|
Manages a single call session.
|
|
|
|
Handles command routing, audio processing, and session lifecycle.
|
|
Uses full duplex voice conversation pipeline.
|
|
"""
|
|
|
|
TRACK_AUDIO_IN = "audio_in"
|
|
TRACK_AUDIO_OUT = "audio_out"
|
|
TRACK_CONTROL = "control"
|
|
AUDIO_FRAME_BYTES = 640 # 16k mono pcm_s16le, 20ms
|
|
_CLIENT_METADATA_OVERRIDES = {
|
|
"firstTurnMode",
|
|
"greeting",
|
|
"generatedOpenerEnabled",
|
|
"systemPrompt",
|
|
"output",
|
|
"bargeIn",
|
|
"knowledge",
|
|
"knowledgeBaseId",
|
|
"history",
|
|
"userId",
|
|
"assistantId",
|
|
"source",
|
|
}
|
|
_CLIENT_METADATA_ID_KEYS = {
|
|
"appId",
|
|
"app_id",
|
|
"channel",
|
|
"configVersionId",
|
|
"config_version_id",
|
|
}
|
|
|
|
def __init__(
|
|
self,
|
|
session_id: str,
|
|
transport: BaseTransport,
|
|
use_duplex: bool = None,
|
|
backend_gateway: Optional[Any] = None,
|
|
):
|
|
"""
|
|
Initialize session.
|
|
|
|
Args:
|
|
session_id: Unique session identifier
|
|
transport: Transport instance for communication
|
|
use_duplex: Whether to use duplex pipeline (defaults to settings.duplex_enabled)
|
|
"""
|
|
self.id = session_id
|
|
self.transport = transport
|
|
self.use_duplex = use_duplex if use_duplex is not None else settings.duplex_enabled
|
|
self._backend_gateway = backend_gateway or build_backend_adapter_from_settings()
|
|
self._history_bridge = SessionHistoryBridge(
|
|
history_writer=self._backend_gateway,
|
|
enabled=settings.history_enabled,
|
|
queue_max_size=settings.history_queue_max_size,
|
|
retry_max_attempts=settings.history_retry_max_attempts,
|
|
retry_backoff_sec=settings.history_retry_backoff_sec,
|
|
finalize_drain_timeout_sec=settings.history_finalize_drain_timeout_sec,
|
|
)
|
|
|
|
self.pipeline = DuplexPipeline(
|
|
transport=transport,
|
|
session_id=session_id,
|
|
system_prompt=settings.duplex_system_prompt,
|
|
greeting=settings.duplex_greeting,
|
|
knowledge_searcher=getattr(self._backend_gateway, "search_knowledge_context", None),
|
|
tool_resource_resolver=getattr(self._backend_gateway, "fetch_tool_resource", None),
|
|
)
|
|
|
|
# Session state
|
|
self.created_at = None
|
|
self.state = "created" # Legacy call state for /call/lists
|
|
self.ws_state = WsSessionState.WAIT_HELLO
|
|
self._pipeline_started = False
|
|
self.protocol_version: Optional[str] = None
|
|
self.authenticated: bool = False
|
|
|
|
# Track IDs
|
|
self.current_track_id: str = self.TRACK_CONTROL
|
|
self._event_seq: int = 0
|
|
self._cleanup_lock = asyncio.Lock()
|
|
self._cleaned_up = False
|
|
self.workflow_runner: Optional[WorkflowRunner] = None
|
|
self._workflow_last_user_text: str = ""
|
|
self._workflow_initial_node: Optional[WorkflowNodeDef] = None
|
|
|
|
self.pipeline.set_event_sequence_provider(self._next_event_seq)
|
|
self.pipeline.conversation.on_turn_complete(self._on_turn_complete)
|
|
|
|
logger.info(f"Session {self.id} created (duplex={self.use_duplex})")
|
|
|
|
async def handle_text(self, text_data: str) -> None:
|
|
"""
|
|
Handle incoming text data (WS v1 JSON control messages).
|
|
|
|
Args:
|
|
text_data: JSON text data
|
|
"""
|
|
try:
|
|
data = json.loads(text_data)
|
|
message = parse_client_message(data)
|
|
await self._handle_v1_message(message)
|
|
|
|
except json.JSONDecodeError as e:
|
|
logger.error(f"Session {self.id} JSON decode error: {e}")
|
|
await self._send_error("client", f"Invalid JSON: {e}", "protocol.invalid_json")
|
|
|
|
except ValueError as e:
|
|
logger.error(f"Session {self.id} command parse error: {e}")
|
|
await self._send_error("client", f"Invalid message: {e}", "protocol.invalid_message")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Session {self.id} handle_text error: {e}", exc_info=True)
|
|
await self._send_error("server", f"Internal error: {e}", "server.internal")
|
|
|
|
async def handle_audio(self, audio_bytes: bytes) -> None:
|
|
"""
|
|
Handle incoming audio data.
|
|
|
|
Args:
|
|
audio_bytes: PCM audio data
|
|
"""
|
|
if self.ws_state != WsSessionState.ACTIVE:
|
|
await self._send_error(
|
|
"client",
|
|
"Audio received before session.start",
|
|
"protocol.order",
|
|
stage="protocol",
|
|
retryable=False,
|
|
)
|
|
return
|
|
|
|
try:
|
|
if not audio_bytes:
|
|
return
|
|
if len(audio_bytes) % 2 != 0:
|
|
await self._send_error(
|
|
"client",
|
|
"Invalid PCM payload: odd number of bytes",
|
|
"audio.invalid_pcm",
|
|
stage="audio",
|
|
retryable=False,
|
|
)
|
|
return
|
|
|
|
frame_bytes = self.AUDIO_FRAME_BYTES
|
|
if len(audio_bytes) % frame_bytes != 0:
|
|
await self._send_error(
|
|
"client",
|
|
f"Audio frame size must be a multiple of {frame_bytes} bytes (20ms PCM)",
|
|
"audio.frame_size_mismatch",
|
|
stage="audio",
|
|
retryable=False,
|
|
)
|
|
return
|
|
|
|
for i in range(0, len(audio_bytes), frame_bytes):
|
|
frame = audio_bytes[i : i + frame_bytes]
|
|
await self.pipeline.process_audio(frame)
|
|
except Exception as e:
|
|
logger.error(f"Session {self.id} handle_audio error: {e}", exc_info=True)
|
|
await self._send_error(
|
|
"server",
|
|
f"Audio processing failed: {e}",
|
|
"audio.processing_failed",
|
|
stage="audio",
|
|
retryable=True,
|
|
)
|
|
|
|
async def _handle_v1_message(self, message: Any) -> None:
|
|
"""Route validated WS v1 message to handlers."""
|
|
msg_type = message.type
|
|
logger.info(f"Session {self.id} received message: {msg_type}")
|
|
|
|
if isinstance(message, HelloMessage):
|
|
await self._handle_hello(message)
|
|
return
|
|
|
|
# All messages below require hello handshake first
|
|
if self.ws_state == WsSessionState.WAIT_HELLO:
|
|
await self._send_error(
|
|
"client",
|
|
"Expected hello message first",
|
|
"protocol.order",
|
|
)
|
|
return
|
|
|
|
if isinstance(message, SessionStartMessage):
|
|
await self._handle_session_start(message)
|
|
return
|
|
|
|
# All messages below require active session
|
|
if self.ws_state != WsSessionState.ACTIVE:
|
|
await self._send_error(
|
|
"client",
|
|
f"Message '{msg_type}' requires active session",
|
|
"protocol.order",
|
|
)
|
|
return
|
|
|
|
if isinstance(message, InputTextMessage):
|
|
await self.pipeline.process_text(message.text)
|
|
elif isinstance(message, ResponseCancelMessage):
|
|
if message.graceful:
|
|
logger.info(f"Session {self.id} graceful response.cancel")
|
|
else:
|
|
await self.pipeline.interrupt()
|
|
elif isinstance(message, ToolCallResultsMessage):
|
|
await self.pipeline.handle_tool_call_results([item.model_dump() for item in message.results])
|
|
elif isinstance(message, SessionStopMessage):
|
|
await self._handle_session_stop(message.reason)
|
|
else:
|
|
await self._send_error("client", f"Unsupported message type: {msg_type}", "protocol.unsupported")
|
|
|
|
async def _handle_hello(self, message: HelloMessage) -> None:
|
|
"""Handle initial hello/auth/version negotiation."""
|
|
if self.ws_state != WsSessionState.WAIT_HELLO:
|
|
await self._send_error("client", "Duplicate hello", "protocol.order")
|
|
return
|
|
|
|
if message.version != settings.ws_protocol_version:
|
|
await self._send_error(
|
|
"client",
|
|
f"Unsupported protocol version '{message.version}'",
|
|
"protocol.version_unsupported",
|
|
)
|
|
await self.transport.close()
|
|
self.ws_state = WsSessionState.STOPPED
|
|
return
|
|
|
|
auth_payload = message.auth
|
|
api_key = auth_payload.apiKey if auth_payload else None
|
|
jwt = auth_payload.jwt if auth_payload else None
|
|
|
|
if settings.ws_api_key:
|
|
if api_key != settings.ws_api_key:
|
|
await self._send_error("auth", "Invalid API key", "auth.invalid_api_key")
|
|
await self.transport.close()
|
|
self.ws_state = WsSessionState.STOPPED
|
|
return
|
|
elif settings.ws_require_auth and not (api_key or jwt):
|
|
await self._send_error("auth", "Authentication required", "auth.required")
|
|
await self.transport.close()
|
|
self.ws_state = WsSessionState.STOPPED
|
|
return
|
|
|
|
self.authenticated = True
|
|
self.protocol_version = message.version
|
|
self.ws_state = WsSessionState.WAIT_START
|
|
await self._send_event(
|
|
ev(
|
|
"hello.ack",
|
|
version=self.protocol_version,
|
|
)
|
|
)
|
|
|
|
async def _handle_session_start(self, message: SessionStartMessage) -> None:
|
|
"""Handle explicit session start after successful hello."""
|
|
if self.ws_state != WsSessionState.WAIT_START:
|
|
await self._send_error("client", "Duplicate session.start", "protocol.order")
|
|
return
|
|
|
|
raw_metadata = message.metadata or {}
|
|
workflow_runtime = self._bootstrap_workflow(raw_metadata)
|
|
server_runtime = await self._load_server_runtime_metadata(raw_metadata, workflow_runtime)
|
|
client_runtime = self._sanitize_client_metadata(raw_metadata)
|
|
requested_assistant_id = (
|
|
workflow_runtime.get("assistantId")
|
|
or raw_metadata.get("assistantId")
|
|
or raw_metadata.get("appId")
|
|
or raw_metadata.get("app_id")
|
|
)
|
|
if server_runtime:
|
|
logger.info(
|
|
"Session {} loaded trusted runtime config from backend "
|
|
"(requested_assistant_id={}, resolved_assistant_id={}, configVersionId={}, has_services={})",
|
|
self.id,
|
|
requested_assistant_id,
|
|
server_runtime.get("assistantId"),
|
|
server_runtime.get("configVersionId") or server_runtime.get("config_version_id"),
|
|
isinstance(server_runtime.get("services"), dict),
|
|
)
|
|
else:
|
|
logger.warning(
|
|
"Session {} missing trusted backend runtime config "
|
|
"(requested_assistant_id={}); falling back to engine defaults + safe client overrides",
|
|
self.id,
|
|
requested_assistant_id,
|
|
)
|
|
metadata = self._merge_runtime_metadata(server_runtime, self._sanitize_untrusted_runtime_metadata(workflow_runtime))
|
|
metadata = self._merge_runtime_metadata(metadata, client_runtime)
|
|
|
|
# Create history call record early so later turn callbacks can append transcripts.
|
|
await self._start_history_bridge(metadata)
|
|
|
|
# Apply runtime service/prompt overrides from backend if provided
|
|
self.pipeline.apply_runtime_overrides(metadata)
|
|
resolved_preview = self.pipeline.resolved_runtime_config()
|
|
resolved_services = resolved_preview.get("services", {}) if isinstance(resolved_preview, dict) else {}
|
|
llm_cfg = resolved_services.get("llm", {}) if isinstance(resolved_services, dict) else {}
|
|
asr_cfg = resolved_services.get("asr", {}) if isinstance(resolved_services, dict) else {}
|
|
tts_cfg = resolved_services.get("tts", {}) if isinstance(resolved_services, dict) else {}
|
|
logger.info(
|
|
"Session {} effective runtime services "
|
|
"(assistantId={}, configVersionId={}, output_mode={}, "
|
|
"llm={}/{}, asr={}/{}, tts={}/{}, tts_enabled={})",
|
|
self.id,
|
|
metadata.get("assistantId") or metadata.get("appId") or metadata.get("app_id"),
|
|
metadata.get("configVersionId") or metadata.get("config_version_id"),
|
|
(resolved_preview.get("output") or {}).get("mode") if isinstance(resolved_preview, dict) else None,
|
|
llm_cfg.get("provider"),
|
|
llm_cfg.get("model"),
|
|
asr_cfg.get("provider"),
|
|
asr_cfg.get("model"),
|
|
tts_cfg.get("provider"),
|
|
tts_cfg.get("model"),
|
|
tts_cfg.get("enabled"),
|
|
)
|
|
|
|
# Start duplex pipeline
|
|
if not self._pipeline_started:
|
|
await self.pipeline.start()
|
|
self._pipeline_started = True
|
|
logger.info(f"Session {self.id} duplex pipeline started")
|
|
|
|
self.state = "accepted"
|
|
self.ws_state = WsSessionState.ACTIVE
|
|
await self._send_event(
|
|
ev(
|
|
"session.started",
|
|
trackId=self.current_track_id,
|
|
tracks={
|
|
"audio_in": self.TRACK_AUDIO_IN,
|
|
"audio_out": self.TRACK_AUDIO_OUT,
|
|
"control": self.TRACK_CONTROL,
|
|
},
|
|
audio=message.audio.model_dump() if message.audio else {},
|
|
)
|
|
)
|
|
await self._send_event(
|
|
ev(
|
|
"config.resolved",
|
|
trackId=self.TRACK_CONTROL,
|
|
config=self._build_config_resolved(metadata),
|
|
)
|
|
)
|
|
if self.workflow_runner and self._workflow_initial_node:
|
|
await self._send_event(
|
|
ev(
|
|
"workflow.started",
|
|
workflowId=self.workflow_runner.workflow_id,
|
|
workflowName=self.workflow_runner.name,
|
|
nodeId=self._workflow_initial_node.id,
|
|
)
|
|
)
|
|
await self._send_event(
|
|
ev(
|
|
"workflow.node.entered",
|
|
workflowId=self.workflow_runner.workflow_id,
|
|
nodeId=self._workflow_initial_node.id,
|
|
nodeName=self._workflow_initial_node.name,
|
|
nodeType=self._workflow_initial_node.node_type,
|
|
)
|
|
)
|
|
|
|
async def _handle_session_stop(self, reason: Optional[str]) -> None:
|
|
"""Handle session stop."""
|
|
if self.ws_state == WsSessionState.STOPPED:
|
|
return
|
|
|
|
stop_reason = reason or "client_requested"
|
|
self.state = "hungup"
|
|
self.ws_state = WsSessionState.STOPPED
|
|
await self._send_event(
|
|
ev(
|
|
"session.stopped",
|
|
reason=stop_reason,
|
|
)
|
|
)
|
|
await self._finalize_history(status="connected")
|
|
await self.transport.close()
|
|
|
|
async def _send_error(
|
|
self,
|
|
sender: str,
|
|
error_message: str,
|
|
code: str,
|
|
stage: Optional[str] = None,
|
|
retryable: Optional[bool] = None,
|
|
track_id: Optional[str] = None,
|
|
) -> None:
|
|
"""
|
|
Send error event to client.
|
|
|
|
Args:
|
|
sender: Component that generated the error
|
|
error_message: Error message
|
|
code: Machine-readable error code
|
|
"""
|
|
resolved_stage = stage or self._infer_error_stage(code)
|
|
resolved_retryable = retryable if retryable is not None else (resolved_stage in {"asr", "llm", "tts", "tool", "audio"})
|
|
resolved_track_id = track_id or self._error_track_id(resolved_stage, code)
|
|
await self._send_event(
|
|
ev(
|
|
"error",
|
|
sender=sender,
|
|
code=code,
|
|
message=error_message,
|
|
stage=resolved_stage,
|
|
retryable=resolved_retryable,
|
|
trackId=resolved_track_id,
|
|
data={
|
|
"error": {
|
|
"stage": resolved_stage,
|
|
"code": code,
|
|
"message": error_message,
|
|
"retryable": resolved_retryable,
|
|
}
|
|
},
|
|
)
|
|
)
|
|
|
|
def _get_timestamp_ms(self) -> int:
|
|
"""Get current timestamp in milliseconds."""
|
|
import time
|
|
return int(time.time() * 1000)
|
|
|
|
async def cleanup(self) -> None:
|
|
"""Cleanup session resources."""
|
|
async with self._cleanup_lock:
|
|
if self._cleaned_up:
|
|
return
|
|
|
|
self._cleaned_up = True
|
|
logger.info(f"Session {self.id} cleaning up")
|
|
await self._finalize_history(status="connected")
|
|
await self.pipeline.cleanup()
|
|
await self._history_bridge.shutdown()
|
|
await self.transport.close()
|
|
|
|
async def _start_history_bridge(self, metadata: Dict[str, Any]) -> None:
|
|
"""Initialize backend history call record for this session."""
|
|
if self._history_bridge.call_id:
|
|
return
|
|
|
|
history_meta: Dict[str, Any] = {}
|
|
if isinstance(metadata.get("history"), dict):
|
|
history_meta = metadata["history"]
|
|
|
|
raw_user_id = history_meta.get("userId", metadata.get("userId", settings.history_default_user_id))
|
|
try:
|
|
user_id = int(raw_user_id)
|
|
except (TypeError, ValueError):
|
|
user_id = settings.history_default_user_id
|
|
|
|
assistant_id = history_meta.get("assistantId", metadata.get("assistantId"))
|
|
source = str(history_meta.get("source", metadata.get("source", "debug")))
|
|
|
|
call_id = await self._history_bridge.start_call(
|
|
user_id=user_id,
|
|
assistant_id=str(assistant_id) if assistant_id else None,
|
|
source=source,
|
|
)
|
|
if not call_id:
|
|
return
|
|
|
|
logger.info(f"Session {self.id} history bridge enabled (call_id={call_id}, source={source})")
|
|
|
|
async def _on_turn_complete(self, turn: ConversationTurn) -> None:
|
|
"""Process workflow transitions and persist completed turns to history."""
|
|
if turn.text and turn.text.strip():
|
|
role = (turn.role or "").lower()
|
|
if role == "user":
|
|
self._workflow_last_user_text = turn.text.strip()
|
|
elif role == "assistant":
|
|
await self._maybe_advance_workflow(turn.text.strip())
|
|
|
|
self._history_bridge.enqueue_turn(role=turn.role or "", text=turn.text or "")
|
|
|
|
async def _finalize_history(self, status: str) -> None:
|
|
"""Finalize history call record once."""
|
|
await self._history_bridge.finalize(status=status)
|
|
|
|
def _bootstrap_workflow(self, metadata: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Parse workflow payload and return initial runtime overrides."""
|
|
payload = metadata.get("workflow")
|
|
self.workflow_runner = WorkflowRunner.from_payload(payload)
|
|
self._workflow_initial_node = None
|
|
if not self.workflow_runner:
|
|
return {}
|
|
|
|
node = self.workflow_runner.bootstrap()
|
|
if not node:
|
|
logger.warning(f"Session {self.id} workflow payload had no resolvable start node")
|
|
self.workflow_runner = None
|
|
return {}
|
|
|
|
self._workflow_initial_node = node
|
|
logger.info(
|
|
"Session {} workflow enabled: workflow={} start_node={}",
|
|
self.id,
|
|
self.workflow_runner.workflow_id,
|
|
node.id,
|
|
)
|
|
return self.workflow_runner.build_runtime_metadata(node)
|
|
|
|
async def _maybe_advance_workflow(self, assistant_text: str) -> None:
|
|
"""Attempt node transfer after assistant turn finalization."""
|
|
if not self.workflow_runner or self.ws_state == WsSessionState.STOPPED:
|
|
return
|
|
|
|
transition = await self.workflow_runner.route(
|
|
user_text=self._workflow_last_user_text,
|
|
assistant_text=assistant_text,
|
|
llm_router=self._workflow_llm_route,
|
|
)
|
|
if not transition:
|
|
return
|
|
|
|
await self._apply_workflow_transition(transition, reason="rule_match")
|
|
|
|
# Auto-advance through utility nodes when default edges are present.
|
|
max_auto_hops = 6
|
|
auto_hops = 0
|
|
while self.workflow_runner and self.ws_state != WsSessionState.STOPPED:
|
|
current = self.workflow_runner.current_node
|
|
if not current or current.node_type not in {"start", "tool"}:
|
|
break
|
|
|
|
next_default = self.workflow_runner.next_default_transition()
|
|
if not next_default:
|
|
break
|
|
|
|
auto_hops += 1
|
|
await self._apply_workflow_transition(next_default, reason="auto")
|
|
if auto_hops >= max_auto_hops:
|
|
logger.warning(
|
|
"Session {} workflow auto-advance reached hop limit (possible cycle)",
|
|
self.id,
|
|
)
|
|
break
|
|
|
|
async def _apply_workflow_transition(self, transition: WorkflowTransition, reason: str) -> None:
|
|
"""Apply graph transition and emit workflow lifecycle events."""
|
|
if not self.workflow_runner:
|
|
return
|
|
|
|
self.workflow_runner.apply_transition(transition)
|
|
node = transition.node
|
|
edge = transition.edge
|
|
|
|
await self._send_event(
|
|
ev(
|
|
"workflow.edge.taken",
|
|
workflowId=self.workflow_runner.workflow_id,
|
|
edgeId=edge.id,
|
|
fromNodeId=edge.from_node_id,
|
|
toNodeId=edge.to_node_id,
|
|
reason=reason,
|
|
)
|
|
)
|
|
await self._send_event(
|
|
ev(
|
|
"workflow.node.entered",
|
|
workflowId=self.workflow_runner.workflow_id,
|
|
nodeId=node.id,
|
|
nodeName=node.name,
|
|
nodeType=node.node_type,
|
|
)
|
|
)
|
|
|
|
node_runtime = self.workflow_runner.build_runtime_metadata(node)
|
|
if node_runtime:
|
|
self.pipeline.apply_runtime_overrides(node_runtime)
|
|
|
|
if node.node_type == "tool":
|
|
await self._send_event(
|
|
ev(
|
|
"workflow.tool.requested",
|
|
workflowId=self.workflow_runner.workflow_id,
|
|
nodeId=node.id,
|
|
tool=node.tool or {},
|
|
)
|
|
)
|
|
return
|
|
|
|
if node.node_type == "human_transfer":
|
|
await self._send_event(
|
|
ev(
|
|
"workflow.human_transfer",
|
|
workflowId=self.workflow_runner.workflow_id,
|
|
nodeId=node.id,
|
|
)
|
|
)
|
|
await self._handle_session_stop("workflow_human_transfer")
|
|
return
|
|
|
|
if node.node_type == "end":
|
|
await self._send_event(
|
|
ev(
|
|
"workflow.ended",
|
|
workflowId=self.workflow_runner.workflow_id,
|
|
nodeId=node.id,
|
|
)
|
|
)
|
|
await self._handle_session_stop("workflow_end")
|
|
|
|
def _next_event_seq(self) -> int:
|
|
self._event_seq += 1
|
|
return self._event_seq
|
|
|
|
def _event_source(self, event_type: str) -> str:
|
|
if event_type.startswith("workflow."):
|
|
return "system"
|
|
if event_type.startswith("session.") or event_type.startswith("hello.") or event_type == "heartbeat":
|
|
return "system"
|
|
if event_type == "error":
|
|
return "system"
|
|
return "system"
|
|
|
|
def _infer_error_stage(self, code: str) -> str:
|
|
normalized = str(code or "").strip().lower()
|
|
if normalized.startswith("audio."):
|
|
return "audio"
|
|
if normalized.startswith("tool."):
|
|
return "tool"
|
|
if normalized.startswith("asr."):
|
|
return "asr"
|
|
if normalized.startswith("llm."):
|
|
return "llm"
|
|
if normalized.startswith("tts."):
|
|
return "tts"
|
|
return "protocol"
|
|
|
|
def _error_track_id(self, stage: str, code: str) -> str:
|
|
if stage in {"audio", "asr"}:
|
|
return self.TRACK_AUDIO_IN
|
|
if stage in {"llm", "tts", "tool"}:
|
|
return self.TRACK_AUDIO_OUT
|
|
if str(code or "").strip().lower().startswith("auth."):
|
|
return self.TRACK_CONTROL
|
|
return self.TRACK_CONTROL
|
|
|
|
def _envelope_event(self, event: Dict[str, Any]) -> Dict[str, Any]:
|
|
event_type = str(event.get("type") or "")
|
|
source = str(event.get("source") or self._event_source(event_type))
|
|
track_id = event.get("trackId") or self.TRACK_CONTROL
|
|
|
|
data = event.get("data")
|
|
if not isinstance(data, dict):
|
|
data = {}
|
|
for k, v in event.items():
|
|
if k in {"type", "timestamp", "sessionId", "seq", "source", "trackId", "data"}:
|
|
continue
|
|
data.setdefault(k, v)
|
|
|
|
event["sessionId"] = self.id
|
|
event["seq"] = self._next_event_seq()
|
|
event["source"] = source
|
|
event["trackId"] = track_id
|
|
event["data"] = data
|
|
return event
|
|
|
|
async def _send_event(self, event: Dict[str, Any]) -> None:
|
|
await self.transport.send_event(self._envelope_event(event))
|
|
|
|
async def send_heartbeat(self) -> None:
|
|
await self._send_event(ev("heartbeat", trackId=self.TRACK_CONTROL))
|
|
|
|
async def _workflow_llm_route(
|
|
self,
|
|
node: WorkflowNodeDef,
|
|
candidates: List[WorkflowEdgeDef],
|
|
context: Dict[str, str],
|
|
) -> Optional[str]:
|
|
"""LLM-based edge routing for condition.type == 'llm' edges."""
|
|
llm_service = self.pipeline.llm_service
|
|
if not llm_service:
|
|
return None
|
|
|
|
candidate_rows = [
|
|
{
|
|
"edgeId": edge.id,
|
|
"toNodeId": edge.to_node_id,
|
|
"label": edge.label,
|
|
"hint": edge.condition.get("prompt") if isinstance(edge.condition, dict) else None,
|
|
}
|
|
for edge in candidates
|
|
]
|
|
system_prompt = (
|
|
"You are a workflow router. Pick exactly one edge. "
|
|
"Return JSON only: {\"edgeId\":\"...\"}."
|
|
)
|
|
user_prompt = json.dumps(
|
|
{
|
|
"nodeId": node.id,
|
|
"nodeName": node.name,
|
|
"userText": context.get("userText", ""),
|
|
"assistantText": context.get("assistantText", ""),
|
|
"candidates": candidate_rows,
|
|
},
|
|
ensure_ascii=False,
|
|
)
|
|
|
|
try:
|
|
reply = await llm_service.generate(
|
|
[
|
|
LLMMessage(role="system", content=system_prompt),
|
|
LLMMessage(role="user", content=user_prompt),
|
|
],
|
|
temperature=0.0,
|
|
max_tokens=64,
|
|
)
|
|
except Exception as exc:
|
|
logger.warning(f"Session {self.id} workflow llm routing failed: {exc}")
|
|
return None
|
|
|
|
if not reply:
|
|
return None
|
|
|
|
edge_ids = {edge.id for edge in candidates}
|
|
node_ids = {edge.to_node_id for edge in candidates}
|
|
|
|
parsed = self._extract_json_obj(reply)
|
|
if isinstance(parsed, dict):
|
|
edge_id = parsed.get("edgeId") or parsed.get("id")
|
|
node_id = parsed.get("toNodeId") or parsed.get("nodeId")
|
|
if isinstance(edge_id, str) and edge_id in edge_ids:
|
|
return edge_id
|
|
if isinstance(node_id, str) and node_id in node_ids:
|
|
return node_id
|
|
|
|
token_candidates = sorted(edge_ids | node_ids, key=len, reverse=True)
|
|
lowered_reply = reply.lower()
|
|
for token in token_candidates:
|
|
if token.lower() in lowered_reply:
|
|
return token
|
|
return None
|
|
|
|
def _merge_runtime_metadata(self, base: Dict[str, Any], overrides: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Merge node-level metadata overrides into session.start metadata."""
|
|
merged = dict(base or {})
|
|
if not overrides:
|
|
return merged
|
|
for key, value in overrides.items():
|
|
if key == "services" and isinstance(value, dict):
|
|
existing = merged.get("services")
|
|
merged_services = dict(existing) if isinstance(existing, dict) else {}
|
|
merged_services.update(value)
|
|
merged["services"] = merged_services
|
|
else:
|
|
merged[key] = value
|
|
return merged
|
|
|
|
async def _load_server_runtime_metadata(
|
|
self,
|
|
client_metadata: Dict[str, Any],
|
|
workflow_runtime: Dict[str, Any],
|
|
) -> Dict[str, Any]:
|
|
"""Load trusted runtime metadata from backend assistant config."""
|
|
assistant_id = (
|
|
workflow_runtime.get("assistantId")
|
|
or client_metadata.get("assistantId")
|
|
or client_metadata.get("appId")
|
|
or client_metadata.get("app_id")
|
|
)
|
|
if assistant_id is None:
|
|
return {}
|
|
|
|
provider = getattr(self._backend_gateway, "fetch_assistant_config", None)
|
|
if not callable(provider):
|
|
return {}
|
|
|
|
payload = await provider(str(assistant_id).strip())
|
|
if not isinstance(payload, dict):
|
|
return {}
|
|
|
|
assistant_cfg: Dict[str, Any] = {}
|
|
session_start_cfg = payload.get("sessionStartMetadata")
|
|
if isinstance(session_start_cfg, dict):
|
|
assistant_cfg.update(session_start_cfg)
|
|
if isinstance(payload.get("assistant"), dict):
|
|
assistant_cfg.update(payload.get("assistant"))
|
|
elif not assistant_cfg:
|
|
assistant_cfg = payload
|
|
|
|
if not isinstance(assistant_cfg, dict):
|
|
return {}
|
|
|
|
runtime: Dict[str, Any] = {}
|
|
passthrough_keys = {
|
|
"firstTurnMode",
|
|
"generatedOpenerEnabled",
|
|
"output",
|
|
"bargeIn",
|
|
"knowledgeBaseId",
|
|
"knowledge",
|
|
"history",
|
|
"userId",
|
|
"source",
|
|
"tools",
|
|
"services",
|
|
"configVersionId",
|
|
"config_version_id",
|
|
}
|
|
for key in passthrough_keys:
|
|
if key in assistant_cfg:
|
|
runtime[key] = assistant_cfg[key]
|
|
|
|
if assistant_cfg.get("systemPrompt") is not None:
|
|
runtime["systemPrompt"] = str(assistant_cfg.get("systemPrompt") or "")
|
|
elif assistant_cfg.get("prompt") is not None:
|
|
runtime["systemPrompt"] = str(assistant_cfg.get("prompt") or "")
|
|
|
|
if assistant_cfg.get("greeting") is not None:
|
|
runtime["greeting"] = assistant_cfg.get("greeting")
|
|
elif assistant_cfg.get("opener") is not None:
|
|
runtime["greeting"] = assistant_cfg.get("opener")
|
|
|
|
resolved_assistant_id = (
|
|
assistant_cfg.get("assistantId")
|
|
or payload.get("assistantId")
|
|
or assistant_id
|
|
)
|
|
runtime["assistantId"] = str(resolved_assistant_id)
|
|
|
|
if runtime.get("configVersionId") is None and payload.get("configVersionId") is not None:
|
|
runtime["configVersionId"] = payload.get("configVersionId")
|
|
if runtime.get("configVersionId") is None and payload.get("config_version_id") is not None:
|
|
runtime["configVersionId"] = payload.get("config_version_id")
|
|
|
|
if runtime.get("config_version_id") is not None and runtime.get("configVersionId") is None:
|
|
runtime["configVersionId"] = runtime.get("config_version_id")
|
|
return runtime
|
|
|
|
def _sanitize_untrusted_runtime_metadata(self, metadata: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Sanitize untrusted metadata sources.
|
|
|
|
This keeps only a small override whitelist and stable config ID fields.
|
|
"""
|
|
if not isinstance(metadata, dict):
|
|
return {}
|
|
|
|
sanitized: Dict[str, Any] = {}
|
|
for key in self._CLIENT_METADATA_ID_KEYS:
|
|
if key in metadata:
|
|
sanitized[key] = metadata[key]
|
|
for key in self._CLIENT_METADATA_OVERRIDES:
|
|
if key in metadata:
|
|
sanitized[key] = metadata[key]
|
|
|
|
return sanitized
|
|
|
|
def _sanitize_client_metadata(self, metadata: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Apply client metadata whitelist and remove forbidden secrets."""
|
|
sanitized = self._sanitize_untrusted_runtime_metadata(metadata)
|
|
if isinstance(metadata.get("services"), dict):
|
|
logger.warning(
|
|
"Session {} provided metadata.services from client; client-side service config is ignored",
|
|
self.id,
|
|
)
|
|
return sanitized
|
|
|
|
def _build_config_resolved(self, metadata: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Build public resolved config payload (secrets removed)."""
|
|
system_prompt = str(metadata.get("systemPrompt") or self.pipeline.conversation.system_prompt or "")
|
|
prompt_hash = hashlib.sha256(system_prompt.encode("utf-8")).hexdigest() if system_prompt else None
|
|
runtime = self.pipeline.resolved_runtime_config()
|
|
|
|
return {
|
|
"appId": metadata.get("appId") or metadata.get("app_id") or metadata.get("assistantId"),
|
|
"channel": metadata.get("channel"),
|
|
"configVersionId": metadata.get("configVersionId") or metadata.get("config_version_id"),
|
|
"prompt": {"sha256": prompt_hash},
|
|
"output": runtime.get("output", {}),
|
|
"services": runtime.get("services", {}),
|
|
"tools": runtime.get("tools", {}),
|
|
"tracks": {
|
|
"audio_in": self.TRACK_AUDIO_IN,
|
|
"audio_out": self.TRACK_AUDIO_OUT,
|
|
"control": self.TRACK_CONTROL,
|
|
},
|
|
}
|
|
|
|
def _extract_json_obj(self, text: str) -> Optional[Dict[str, Any]]:
|
|
"""Best-effort extraction of a JSON object from freeform text."""
|
|
try:
|
|
parsed = json.loads(text)
|
|
if isinstance(parsed, dict):
|
|
return parsed
|
|
except Exception:
|
|
pass
|
|
|
|
match = re.search(r"\{.*\}", text, re.DOTALL)
|
|
if not match:
|
|
return None
|
|
try:
|
|
parsed = json.loads(match.group(0))
|
|
return parsed if isinstance(parsed, dict) else None
|
|
except Exception:
|
|
return None
|