Refactor project structure and enhance backend integration

- Expanded package inclusion in `pyproject.toml` to support new modules.
- Introduced new `adapters` and `protocol` packages for better organization.
- Added backend adapter implementations for control plane integration.
- Updated main application imports to reflect new package structure.
- Removed deprecated core components and adjusted documentation accordingly.
- Enhanced architecture documentation to clarify the new runtime and integration layers.
This commit is contained in:
Xin Wang
2026-03-06 09:51:56 +08:00
parent 4e2450e800
commit 7e0b777923
75 changed files with 274 additions and 688 deletions

View File

@@ -0,0 +1 @@
"""Adapters package."""

View File

@@ -0,0 +1 @@
"""Control-plane adapters package."""

View File

@@ -20,11 +20,11 @@ except ImportError:
logger.warning("aiortc not available - WebRTC endpoint will be disabled")
from app.config import settings
from app.backend_adapters import build_backend_adapter_from_settings
from core.transports import SocketTransport, WebRtcTransport, BaseTransport
from core.session import Session
from adapters.control_plane.backend import build_backend_adapter_from_settings
from runtime.transports import SocketTransport, WebRtcTransport, BaseTransport
from runtime.session.manager import Session
from processors.tracks import Resampled16kTrack
from core.events import get_event_bus, reset_event_bus
from runtime.events import get_event_bus, reset_event_bus
# Check interval for heartbeat/timeout (seconds)
_HEARTBEAT_CHECK_INTERVAL_SEC = 5

View File

@@ -1,20 +0,0 @@
"""Core Components Package"""
from core.events import EventBus, get_event_bus
from core.transports import BaseTransport, SocketTransport, WebRtcTransport
from core.session import Session
from core.conversation import ConversationManager, ConversationState, ConversationTurn
from core.duplex_pipeline import DuplexPipeline
__all__ = [
"EventBus",
"get_event_bus",
"BaseTransport",
"SocketTransport",
"WebRtcTransport",
"Session",
"ConversationManager",
"ConversationState",
"ConversationTurn",
"DuplexPipeline",
]

View File

@@ -27,15 +27,15 @@ Assistant config source behavior:
## Architecture
- Ports: `core/ports/control_plane.py`
- Adapters: `app/backend_adapters.py`
- Ports: `runtime/ports/control_plane.py`
- Adapters: `adapters/control_plane/backend.py`
`Session` and `DuplexPipeline` receive backend capabilities via injected adapter
methods instead of hard-coding backend client imports.
## Async History Writes
Session history persistence is handled by `core/history_bridge.py`.
Session history persistence is handled by `runtime/history/bridge.py`.
Design:

View File

@@ -4,31 +4,31 @@ This document defines the draft port set used to keep core runtime extensible.
## Port Modules
- `core/ports/control_plane.py`
- `runtime/ports/control_plane.py`
- `AssistantRuntimeConfigProvider`
- `ConversationHistoryStore`
- `KnowledgeRetriever`
- `ToolCatalog`
- `ControlPlaneGateway`
- `core/ports/llm.py`
- `runtime/ports/llm.py`
- `LLMServiceSpec`
- `LLMPort`
- optional extensions: `LLMCancellable`, `LLMRuntimeConfigurable`
- `core/ports/tts.py`
- `runtime/ports/tts.py`
- `TTSServiceSpec`
- `TTSPort`
- `core/ports/asr.py`
- `runtime/ports/asr.py`
- `ASRServiceSpec`
- `ASRPort`
- optional extensions: `ASRInterimControl`, `ASRBufferControl`
- `core/ports/service_factory.py`
- `runtime/ports/service_factory.py`
- `RealtimeServiceFactory`
## Adapter Layer
- `app/service_factory.py` provides `DefaultRealtimeServiceFactory`.
- `providers/factory/default.py` provides `DefaultRealtimeServiceFactory`.
- It maps resolved provider specs to concrete adapters.
- Core orchestration (`core/duplex_pipeline.py`) depends on the factory port/specs, not concrete provider classes.
- Runtime orchestration (`runtime/pipeline/duplex.py`) depends on the factory port/specs, not concrete provider classes.
## Provider Behavior (Current)

View File

@@ -14,19 +14,19 @@ This document describes the runtime architecture of `engine` for realtime voice/
```mermaid
flowchart LR
C[Client\nWeb / Mobile / Device] <-- WS v1 + PCM --> A[FastAPI App\napp/main.py]
A --> S[Session\ncore/session.py]
S --> D[Duplex Pipeline\ncore/duplex_pipeline.py]
A --> S[Session\nruntime/session/manager.py]
S --> D[Duplex Pipeline\nruntime/pipeline/duplex.py]
D --> P[Processors\nVAD / EOU / Tracks]
D --> R[Workflow Runner\ncore/workflow_runner.py]
D --> E[Event Bus + Models\ncore/events.py + models/*]
D --> R[Workflow Runner\nworkflow/runner.py]
D --> E[Event Bus + Models\nruntime/events.py + protocol/ws_v1/*]
R --> SV[Service Layer\nservices/asr.py\nservices/llm.py\nservices/tts.py]
R --> TE[Tool Executor\ncore/tool_executor.py]
R --> SV[Service Layer\nproviders/asr/*\nproviders/llm/*\nproviders/tts/*]
R --> TE[Tool Executor\ntools/executor.py]
S --> HB[History Bridge\ncore/history_bridge.py]
S --> BA[Control Plane Port\ncore/ports/control_plane.py]
BA --> AD[Adapters\napp/backend_adapters.py]
S --> HB[History Bridge\nruntime/history/bridge.py]
S --> BA[Control Plane Port\nruntime/ports/control_plane.py]
BA --> AD[Adapters\nadapters/control_plane/backend.py]
AD --> B[(External Backend API\noptional)]
SV --> M[(ASR/LLM/TTS Providers)]
@@ -58,7 +58,7 @@ flowchart LR
### 2) Session + Orchestration Layer
- Core: `core/session.py`, `core/duplex_pipeline.py`, `core/conversation.py`
- Core: `runtime/session/manager.py`, `runtime/pipeline/duplex.py`, `runtime/conversation.py`
- Responsibilities:
- Per-session state machine
- Turn boundaries and interruption/cancel handling
@@ -75,7 +75,7 @@ flowchart LR
### 4) Workflow + Tooling Layer
- Modules: `core/workflow_runner.py`, `core/tool_executor.py`
- Modules: `workflow/runner.py`, `tools/executor.py`
- Responsibilities:
- Assistant workflow execution
- Tool call planning/execution and timeout handling
@@ -83,7 +83,7 @@ flowchart LR
### 5) Service Integration Layer
- Modules: `services/*`
- Modules: `providers/*`
- Responsibilities:
- Abstracting ASR/LLM/TTS provider differences
- Streaming token/audio adaptation
@@ -91,8 +91,8 @@ flowchart LR
### 6) Backend Integration Layer (Optional)
- Port: `core/ports/control_plane.py`
- Adapters: `app/backend_adapters.py`
- Port: `runtime/ports/control_plane.py`
- Adapters: `adapters/control_plane/backend.py`
- Responsibilities:
- Fetching assistant runtime config
- Persisting call/session metadata and history
@@ -100,7 +100,7 @@ flowchart LR
### 7) Persistence / Reliability Layer
- Module: `core/history_bridge.py`
- Module: `runtime/history/bridge.py`
- Responsibilities:
- Non-blocking queue-based history writes
- Retry with backoff on backend failures

View File

@@ -0,0 +1,21 @@
# Canonical Module Layout
This MVP uses a single canonical module layout without legacy import shims.
## Runtime and protocol
- `protocol.ws_v1.schema`
- `runtime.session.manager`
- `runtime.pipeline.duplex`
- `runtime.history.bridge`
- `runtime.events`
- `runtime.transports`
- `runtime.conversation`
- `runtime.ports.*`
## Integrations and orchestration
- `providers.*`
- `adapters.control_plane.backend`
- `workflow.runner`
- `tools.executor`

View File

@@ -7,9 +7,9 @@
- 握手顺序、状态机、错误语义与实现细节。
实现对照来源:
- `models/ws_v1.py`
- `core/session.py`
- `core/duplex_pipeline.py`
- `protocol/ws_v1/schema.py`
- `runtime/session/manager.py`
- `runtime/pipeline/duplex.py`
- `app/main.py`
---

View File

@@ -1 +0,0 @@
"""Data Models Package"""

View File

@@ -1,143 +0,0 @@
"""Protocol command models matching the original active-call API."""
from typing import Optional, Dict, Any
from pydantic import BaseModel, Field
class InviteCommand(BaseModel):
"""Invite command to initiate a call."""
command: str = Field(default="invite", description="Command type")
option: Optional[Dict[str, Any]] = Field(default=None, description="Call configuration options")
class AcceptCommand(BaseModel):
"""Accept command to accept an incoming call."""
command: str = Field(default="accept", description="Command type")
option: Optional[Dict[str, Any]] = Field(default=None, description="Call configuration options")
class RejectCommand(BaseModel):
"""Reject command to reject an incoming call."""
command: str = Field(default="reject", description="Command type")
reason: str = Field(default="", description="Reason for rejection")
code: Optional[int] = Field(default=None, description="SIP response code")
class RingingCommand(BaseModel):
"""Ringing command to send ringing response."""
command: str = Field(default="ringing", description="Command type")
recorder: Optional[Dict[str, Any]] = Field(default=None, description="Call recording configuration")
early_media: bool = Field(default=False, description="Enable early media")
ringtone: Optional[str] = Field(default=None, description="Custom ringtone URL")
class TTSCommand(BaseModel):
"""TTS command to convert text to speech."""
command: str = Field(default="tts", description="Command type")
text: str = Field(..., description="Text to synthesize")
speaker: Optional[str] = Field(default=None, description="Speaker voice name")
play_id: Optional[str] = Field(default=None, description="Unique identifier for this TTS session")
auto_hangup: bool = Field(default=False, description="Auto hangup after TTS completion")
streaming: bool = Field(default=False, description="Streaming text input")
end_of_stream: bool = Field(default=False, description="End of streaming input")
wait_input_timeout: Optional[int] = Field(default=None, description="Max time to wait for input (seconds)")
option: Optional[Dict[str, Any]] = Field(default=None, description="TTS provider specific options")
class PlayCommand(BaseModel):
"""Play command to play audio from URL."""
command: str = Field(default="play", description="Command type")
url: str = Field(..., description="URL of audio file to play")
auto_hangup: bool = Field(default=False, description="Auto hangup after playback")
wait_input_timeout: Optional[int] = Field(default=None, description="Max time to wait for input (seconds)")
class InterruptCommand(BaseModel):
"""Interrupt command to interrupt current playback."""
command: str = Field(default="interrupt", description="Command type")
graceful: bool = Field(default=False, description="Wait for current TTS to complete")
class PauseCommand(BaseModel):
"""Pause command to pause current playback."""
command: str = Field(default="pause", description="Command type")
class ResumeCommand(BaseModel):
"""Resume command to resume paused playback."""
command: str = Field(default="resume", description="Command type")
class HangupCommand(BaseModel):
"""Hangup command to end the call."""
command: str = Field(default="hangup", description="Command type")
reason: Optional[str] = Field(default=None, description="Reason for hangup")
initiator: Optional[str] = Field(default=None, description="Who initiated the hangup")
class HistoryCommand(BaseModel):
"""History command to add conversation history."""
command: str = Field(default="history", description="Command type")
speaker: str = Field(..., description="Speaker identifier")
text: str = Field(..., description="Conversation text")
class ChatCommand(BaseModel):
"""Chat command for text-based conversation."""
command: str = Field(default="chat", description="Command type")
text: str = Field(..., description="Chat text message")
# Command type mapping
COMMAND_TYPES = {
"invite": InviteCommand,
"accept": AcceptCommand,
"reject": RejectCommand,
"ringing": RingingCommand,
"tts": TTSCommand,
"play": PlayCommand,
"interrupt": InterruptCommand,
"pause": PauseCommand,
"resume": ResumeCommand,
"hangup": HangupCommand,
"history": HistoryCommand,
"chat": ChatCommand,
}
def parse_command(data: Dict[str, Any]) -> BaseModel:
"""
Parse a command from JSON data.
Args:
data: JSON data as dictionary
Returns:
Parsed command model
Raises:
ValueError: If command type is unknown
"""
command_type = data.get("command")
if not command_type:
raise ValueError("Missing 'command' field")
command_class = COMMAND_TYPES.get(command_type)
if not command_class:
raise ValueError(f"Unknown command type: {command_type}")
return command_class(**data)

View File

@@ -1,126 +0,0 @@
"""Configuration models for call options."""
from typing import Optional, Dict, Any, List
from pydantic import BaseModel, Field
class VADOption(BaseModel):
"""Voice Activity Detection configuration."""
type: str = Field(default="silero", description="VAD algorithm type (silero, webrtc)")
samplerate: int = Field(default=16000, description="Audio sample rate for VAD")
speech_padding: int = Field(default=250, description="Speech padding in milliseconds")
silence_padding: int = Field(default=100, description="Silence padding in milliseconds")
ratio: float = Field(default=0.5, description="Voice detection ratio threshold")
voice_threshold: float = Field(default=0.5, description="Voice energy threshold")
max_buffer_duration_secs: int = Field(default=50, description="Maximum buffer duration in seconds")
silence_timeout: Optional[int] = Field(default=None, description="Silence timeout in milliseconds")
endpoint: Optional[str] = Field(default=None, description="Custom VAD service endpoint")
secret_key: Optional[str] = Field(default=None, description="VAD service secret key")
secret_id: Optional[str] = Field(default=None, description="VAD service secret ID")
class ASROption(BaseModel):
"""Automatic Speech Recognition configuration."""
provider: str = Field(..., description="ASR provider (tencent, aliyun, openai, etc.)")
language: Optional[str] = Field(default=None, description="Language code (zh-CN, en-US)")
app_id: Optional[str] = Field(default=None, description="Application ID")
secret_id: Optional[str] = Field(default=None, description="Secret ID for authentication")
secret_key: Optional[str] = Field(default=None, description="Secret key for authentication")
model_type: Optional[str] = Field(default=None, description="ASR model type (16k_zh, 8k_en)")
buffer_size: Optional[int] = Field(default=None, description="Audio buffer size in bytes")
samplerate: Optional[int] = Field(default=None, description="Audio sample rate")
endpoint: Optional[str] = Field(default=None, description="Custom ASR service endpoint")
extra: Optional[Dict[str, Any]] = Field(default=None, description="Additional parameters")
start_when_answer: bool = Field(default=False, description="Start ASR when call is answered")
class TTSOption(BaseModel):
"""Text-to-Speech configuration."""
samplerate: Optional[int] = Field(default=None, description="TTS output sample rate")
provider: str = Field(default="msedge", description="TTS provider (tencent, aliyun, deepgram, msedge)")
speed: float = Field(default=1.0, description="Speech speed multiplier")
app_id: Optional[str] = Field(default=None, description="Application ID")
secret_id: Optional[str] = Field(default=None, description="Secret ID for authentication")
secret_key: Optional[str] = Field(default=None, description="Secret key for authentication")
volume: Optional[int] = Field(default=None, description="Speech volume level (1-10)")
speaker: Optional[str] = Field(default=None, description="Voice speaker name")
codec: Optional[str] = Field(default=None, description="Audio codec")
subtitle: bool = Field(default=False, description="Enable subtitle generation")
emotion: Optional[str] = Field(default=None, description="Speech emotion")
endpoint: Optional[str] = Field(default=None, description="Custom TTS service endpoint")
extra: Optional[Dict[str, Any]] = Field(default=None, description="Additional parameters")
max_concurrent_tasks: Optional[int] = Field(default=None, description="Max concurrent tasks")
class RecorderOption(BaseModel):
"""Call recording configuration."""
recorder_file: str = Field(..., description="Path to recording file")
samplerate: int = Field(default=16000, description="Recording sample rate")
ptime: int = Field(default=200, description="Packet time in milliseconds")
class MediaPassOption(BaseModel):
"""Media pass-through configuration for external audio processing."""
url: str = Field(..., description="WebSocket URL for media streaming")
input_sample_rate: int = Field(default=16000, description="Sample rate of audio received from WebSocket")
output_sample_rate: int = Field(default=16000, description="Sample rate of audio sent to WebSocket")
packet_size: int = Field(default=2560, description="Packet size in bytes")
ptime: Optional[int] = Field(default=None, description="Buffered playback period in milliseconds")
class SipOption(BaseModel):
"""SIP protocol configuration."""
username: Optional[str] = Field(default=None, description="SIP username")
password: Optional[str] = Field(default=None, description="SIP password")
realm: Optional[str] = Field(default=None, description="SIP realm/domain")
headers: Optional[Dict[str, str]] = Field(default=None, description="Additional SIP headers")
class HandlerRule(BaseModel):
"""Handler routing rule."""
caller: Optional[str] = Field(default=None, description="Caller pattern (regex)")
callee: Optional[str] = Field(default=None, description="Callee pattern (regex)")
playbook: Optional[str] = Field(default=None, description="Playbook file path")
webhook: Optional[str] = Field(default=None, description="Webhook URL")
class CallOption(BaseModel):
"""Comprehensive call configuration options."""
# Basic options
denoise: bool = Field(default=False, description="Enable noise reduction")
offer: Optional[str] = Field(default=None, description="SDP offer string")
callee: Optional[str] = Field(default=None, description="Callee SIP URI or phone number")
caller: Optional[str] = Field(default=None, description="Caller SIP URI or phone number")
# Audio codec
codec: str = Field(default="pcm", description="Audio codec (pcm, pcma, pcmu, g722)")
# Component configurations
recorder: Optional[RecorderOption] = Field(default=None, description="Call recording config")
asr: Optional[ASROption] = Field(default=None, description="ASR configuration")
vad: Optional[VADOption] = Field(default=None, description="VAD configuration")
tts: Optional[TTSOption] = Field(default=None, description="TTS configuration")
media_pass: Optional[MediaPassOption] = Field(default=None, description="Media pass-through config")
sip: Optional[SipOption] = Field(default=None, description="SIP configuration")
# Timeouts and networking
handshake_timeout: Optional[int] = Field(default=None, description="Handshake timeout in seconds")
enable_ipv6: bool = Field(default=False, description="Enable IPv6 support")
inactivity_timeout: Optional[int] = Field(default=None, description="Inactivity timeout in seconds")
# EOU configuration
eou: Optional[Dict[str, Any]] = Field(default=None, description="End of utterance detection config")
# Extra parameters
extra: Optional[Dict[str, Any]] = Field(default=None, description="Additional custom parameters")
class Config:
populate_by_name = True

View File

@@ -1,231 +0,0 @@
"""Protocol event models matching the original active-call API."""
from typing import Optional, Dict, Any
from pydantic import BaseModel, Field
from datetime import datetime
def current_timestamp_ms() -> int:
"""Get current timestamp in milliseconds."""
return int(datetime.now().timestamp() * 1000)
# Base Event Model
class BaseEvent(BaseModel):
"""Base event model."""
event: str = Field(..., description="Event type")
track_id: str = Field(..., description="Unique track identifier")
timestamp: int = Field(default_factory=current_timestamp_ms, description="Event timestamp in milliseconds")
# Lifecycle Events
class IncomingEvent(BaseEvent):
"""Incoming call event (SIP only)."""
event: str = Field(default="incoming", description="Event type")
caller: Optional[str] = Field(default=None, description="Caller's SIP URI")
callee: Optional[str] = Field(default=None, description="Callee's SIP URI")
sdp: Optional[str] = Field(default=None, description="SDP offer from caller")
class AnswerEvent(BaseEvent):
"""Call answered event."""
event: str = Field(default="answer", description="Event type")
sdp: Optional[str] = Field(default=None, description="SDP answer from server")
class RejectEvent(BaseEvent):
"""Call rejected event."""
event: str = Field(default="reject", description="Event type")
reason: Optional[str] = Field(default=None, description="Rejection reason")
code: Optional[int] = Field(default=None, description="SIP response code")
class RingingEvent(BaseEvent):
"""Call ringing event."""
event: str = Field(default="ringing", description="Event type")
early_media: bool = Field(default=False, description="Early media available")
class HangupEvent(BaseModel):
"""Call hangup event."""
event: str = Field(default="hangup", description="Event type")
timestamp: int = Field(default_factory=current_timestamp_ms, description="Event timestamp")
reason: Optional[str] = Field(default=None, description="Hangup reason")
initiator: Optional[str] = Field(default=None, description="Who initiated hangup")
start_time: Optional[str] = Field(default=None, description="Call start time (ISO 8601)")
hangup_time: Optional[str] = Field(default=None, description="Hangup time (ISO 8601)")
answer_time: Optional[str] = Field(default=None, description="Answer time (ISO 8601)")
ringing_time: Optional[str] = Field(default=None, description="Ringing time (ISO 8601)")
from_: Optional[Dict[str, Any]] = Field(default=None, alias="from", description="Caller info")
to: Optional[Dict[str, Any]] = Field(default=None, description="Callee info")
extra: Optional[Dict[str, Any]] = Field(default=None, description="Additional metadata")
class Config:
populate_by_name = True
# VAD Events
class SpeakingEvent(BaseEvent):
"""Speech detected event."""
event: str = Field(default="speaking", description="Event type")
start_time: int = Field(default_factory=current_timestamp_ms, description="Speech start time")
class SilenceEvent(BaseEvent):
"""Silence detected event."""
event: str = Field(default="silence", description="Event type")
start_time: int = Field(default_factory=current_timestamp_ms, description="Silence start time")
duration: int = Field(default=0, description="Silence duration in milliseconds")
# AI/ASR Events
class AsrFinalEvent(BaseEvent):
"""ASR final transcription event."""
event: str = Field(default="asrFinal", description="Event type")
index: int = Field(..., description="ASR result sequence number")
start_time: Optional[int] = Field(default=None, description="Speech start time")
end_time: Optional[int] = Field(default=None, description="Speech end time")
text: str = Field(..., description="Transcribed text")
class AsrDeltaEvent(BaseEvent):
"""ASR partial transcription event (streaming)."""
event: str = Field(default="asrDelta", description="Event type")
index: int = Field(..., description="ASR result sequence number")
start_time: Optional[int] = Field(default=None, description="Speech start time")
end_time: Optional[int] = Field(default=None, description="Speech end time")
text: str = Field(..., description="Partial transcribed text")
class EouEvent(BaseEvent):
"""End of utterance detection event."""
event: str = Field(default="eou", description="Event type")
completed: bool = Field(default=True, description="Whether utterance was completed")
# Audio Track Events
class TrackStartEvent(BaseEvent):
"""Audio track start event."""
event: str = Field(default="trackStart", description="Event type")
play_id: Optional[str] = Field(default=None, description="Play ID from TTS/Play command")
class TrackEndEvent(BaseEvent):
"""Audio track end event."""
event: str = Field(default="trackEnd", description="Event type")
duration: int = Field(..., description="Track duration in milliseconds")
ssrc: int = Field(..., description="RTP SSRC identifier")
play_id: Optional[str] = Field(default=None, description="Play ID from TTS/Play command")
class InterruptionEvent(BaseEvent):
"""Playback interruption event."""
event: str = Field(default="interruption", description="Event type")
play_id: Optional[str] = Field(default=None, description="Play ID that was interrupted")
subtitle: Optional[str] = Field(default=None, description="TTS text being played")
position: Optional[int] = Field(default=None, description="Word index position")
total_duration: Optional[int] = Field(default=None, description="Total TTS duration")
current: Optional[int] = Field(default=None, description="Elapsed time when interrupted")
# System Events
class ErrorEvent(BaseEvent):
"""Error event."""
event: str = Field(default="error", description="Event type")
sender: str = Field(..., description="Component that generated the error")
error: str = Field(..., description="Error message")
code: Optional[int] = Field(default=None, description="Error code")
class MetricsEvent(BaseModel):
"""Performance metrics event."""
event: str = Field(default="metrics", description="Event type")
timestamp: int = Field(default_factory=current_timestamp_ms, description="Event timestamp")
key: str = Field(..., description="Metric key")
duration: int = Field(..., description="Duration in milliseconds")
data: Optional[Dict[str, Any]] = Field(default=None, description="Additional metric data")
class AddHistoryEvent(BaseModel):
"""Conversation history entry added event."""
event: str = Field(default="addHistory", description="Event type")
timestamp: int = Field(default_factory=current_timestamp_ms, description="Event timestamp")
sender: Optional[str] = Field(default=None, description="Component that added history")
speaker: str = Field(..., description="Speaker identifier")
text: str = Field(..., description="Conversation text")
class DTMFEvent(BaseEvent):
"""DTMF tone detected event."""
event: str = Field(default="dtmf", description="Event type")
digit: str = Field(..., description="DTMF digit (0-9, *, #, A-D)")
class HeartBeatEvent(BaseModel):
"""Server-to-client heartbeat to keep connection alive."""
event: str = Field(default="heartBeat", description="Event type")
timestamp: int = Field(default_factory=current_timestamp_ms, description="Event timestamp in milliseconds")
# Event type mapping
EVENT_TYPES = {
"incoming": IncomingEvent,
"answer": AnswerEvent,
"reject": RejectEvent,
"ringing": RingingEvent,
"hangup": HangupEvent,
"speaking": SpeakingEvent,
"silence": SilenceEvent,
"asrFinal": AsrFinalEvent,
"asrDelta": AsrDeltaEvent,
"eou": EouEvent,
"trackStart": TrackStartEvent,
"trackEnd": TrackEndEvent,
"interruption": InterruptionEvent,
"error": ErrorEvent,
"metrics": MetricsEvent,
"addHistory": AddHistoryEvent,
"dtmf": DTMFEvent,
"heartBeat": HeartBeatEvent,
}
def create_event(event_type: str, **kwargs) -> BaseModel:
"""
Create an event model.
Args:
event_type: Type of event to create
**kwargs: Event fields
Returns:
Event model instance
Raises:
ValueError: If event type is unknown
"""
event_class = EVENT_TYPES.get(event_type)
if not event_class:
raise ValueError(f"Unknown event type: {event_type}")
return event_class(event=event_type, **kwargs)

View File

@@ -0,0 +1 @@
"""Protocol package."""

View File

@@ -0,0 +1 @@
"""WS v1 protocol package."""

View File

@@ -0,0 +1 @@
"""Providers package."""

View File

@@ -0,0 +1 @@
"""ASR providers."""

View File

@@ -9,7 +9,7 @@ import json
from typing import AsyncIterator, Optional
from loguru import logger
from services.base import BaseASRService, ASRResult, ServiceState
from providers.common.base import BaseASRService, ASRResult, ServiceState
# Try to import websockets for streaming ASR
try:

View File

@@ -19,7 +19,7 @@ except ImportError:
AIOHTTP_AVAILABLE = False
logger.warning("aiohttp not available - OpenAICompatibleASRService will not work")
from services.base import BaseASRService, ASRResult, ServiceState
from providers.common.base import BaseASRService, ASRResult, ServiceState
class OpenAICompatibleASRService(BaseASRService):

View File

@@ -1,6 +1,6 @@
"""Backward-compatible imports for legacy siliconflow_asr module."""
from services.openai_compatible_asr import OpenAICompatibleASRService
from providers.asr.openai_compatible import OpenAICompatibleASRService
# Backward-compatible alias
SiliconFlowASRService = OpenAICompatibleASRService

View File

@@ -0,0 +1 @@
"""Common provider types."""

View File

@@ -0,0 +1 @@
"""Provider factories."""

View File

@@ -6,7 +6,7 @@ from typing import Any
from loguru import logger
from core.ports import (
from runtime.ports import (
ASRPort,
ASRServiceSpec,
LLMPort,
@@ -15,12 +15,12 @@ from core.ports import (
TTSPort,
TTSServiceSpec,
)
from services.asr import BufferedASRService
from services.dashscope_tts import DashScopeTTSService
from services.llm import MockLLMService, OpenAILLMService
from services.openai_compatible_asr import OpenAICompatibleASRService
from services.openai_compatible_tts import OpenAICompatibleTTSService
from services.tts import MockTTSService
from providers.asr.buffered import BufferedASRService
from providers.tts.dashscope import DashScopeTTSService
from providers.llm.openai import MockLLMService, OpenAILLMService
from providers.asr.openai_compatible import OpenAICompatibleASRService
from providers.tts.openai_compatible import OpenAICompatibleTTSService
from providers.tts.mock import MockTTSService
_OPENAI_COMPATIBLE_PROVIDERS = {"openai_compatible", "openai-compatible", "siliconflow"}
_SUPPORTED_LLM_PROVIDERS = {"openai", *_OPENAI_COMPATIBLE_PROVIDERS}

View File

@@ -0,0 +1 @@
"""LLM providers."""

View File

@@ -10,8 +10,8 @@ import uuid
from typing import AsyncIterator, Optional, List, Dict, Any, Callable, Awaitable
from loguru import logger
from app.backend_adapters import build_backend_adapter_from_settings
from services.base import BaseLLMService, LLMMessage, LLMStreamEvent, ServiceState
from adapters.control_plane.backend import build_backend_adapter_from_settings
from providers.common.base import BaseLLMService, LLMMessage, LLMStreamEvent, ServiceState
# Try to import openai
try:

View File

@@ -0,0 +1 @@
"""Realtime providers."""

View File

@@ -0,0 +1 @@
"""TTS providers."""

View File

@@ -12,7 +12,7 @@ from typing import Any, AsyncIterator, Dict, Optional, Tuple
from loguru import logger
from services.base import BaseTTSService, ServiceState, TTSChunk
from providers.common.base import BaseTTSService, ServiceState, TTSChunk
try:
import dashscope

View File

@@ -5,7 +5,7 @@ from typing import AsyncIterator
from loguru import logger
from services.base import BaseTTSService, TTSChunk, ServiceState
from providers.common.base import BaseTTSService, TTSChunk, ServiceState
class MockTTSService(BaseTTSService):

View File

@@ -13,8 +13,8 @@ from typing import AsyncIterator, Optional
from urllib.parse import urlparse, urlunparse
from loguru import logger
from services.base import BaseTTSService, TTSChunk, ServiceState
from services.streaming_tts_adapter import StreamingTTSAdapter # backward-compatible re-export
from providers.common.base import BaseTTSService, TTSChunk, ServiceState
from providers.tts.streaming_adapter import StreamingTTSAdapter # backward-compatible re-export
class OpenAICompatibleTTSService(BaseTTSService):

View File

@@ -1,6 +1,6 @@
"""Backward-compatible imports for legacy siliconflow_tts module."""
from services.openai_compatible_tts import OpenAICompatibleTTSService, StreamingTTSAdapter
from providers.tts.openai_compatible import OpenAICompatibleTTSService, StreamingTTSAdapter
# Backward-compatible alias
SiliconFlowTTSService = OpenAICompatibleTTSService

View File

@@ -4,8 +4,8 @@ import asyncio
from loguru import logger
from services.base import BaseTTSService
from services.streaming_text import extract_tts_sentence, has_spoken_content
from providers.common.base import BaseTTSService
from providers.common.streaming_text import extract_tts_sentence, has_spoken_content
class StreamingTTSAdapter:

View File

@@ -31,7 +31,17 @@ Issues = "https://github.com/yourusername/py-active-call-cc/issues"
[tool.setuptools.packages.find]
where = ["."]
include = ["app*"]
include = [
"app*",
"adapters*",
"protocol*",
"providers*",
"processors*",
"runtime*",
"tools*",
"utils*",
"workflow*",
]
exclude = ["tests*", "scripts*", "reference*"]
[tool.black]

View File

@@ -0,0 +1 @@
"""Runtime package."""

View File

@@ -10,7 +10,7 @@ from dataclasses import dataclass, field
from enum import Enum
from loguru import logger
from services.base import LLMMessage
from providers.common.base import LLMMessage
class ConversationState(Enum):

View File

@@ -0,0 +1 @@
"""Runtime history package."""

View File

@@ -9,7 +9,7 @@ from typing import Optional
from loguru import logger
from core.ports import ConversationHistoryStore
from runtime.ports import ConversationHistoryStore
@dataclass

View File

@@ -0,0 +1 @@
"""Runtime pipeline package."""

View File

@@ -0,0 +1,13 @@
"""ASR flow helpers extracted from the duplex pipeline.
This module is intentionally lightweight for phase-wise migration.
"""
from __future__ import annotations
from providers.common.base import ASRResult
def is_final_result(result: ASRResult) -> bool:
"""Return whether an ASR result is final."""
return bool(result.is_final)

View File

@@ -0,0 +1,6 @@
"""Shared constants for the runtime duplex pipeline."""
TRACK_AUDIO_IN = "audio_in"
TRACK_AUDIO_OUT = "audio_out"
TRACK_CONTROL = "control"
PCM_FRAME_BYTES = 640 # 16k mono pcm_s16le, 20ms

View File

@@ -26,10 +26,10 @@ import aiohttp
from loguru import logger
from app.config import settings
from app.service_factory import DefaultRealtimeServiceFactory
from core.conversation import ConversationManager, ConversationState
from core.events import get_event_bus
from core.ports import (
from providers.factory.default import DefaultRealtimeServiceFactory
from runtime.conversation import ConversationManager, ConversationState
from runtime.events import get_event_bus
from runtime.ports import (
ASRPort,
ASRServiceSpec,
LLMPort,
@@ -38,13 +38,13 @@ from core.ports import (
TTSPort,
TTSServiceSpec,
)
from core.tool_executor import execute_server_tool
from core.transports import BaseTransport
from models.ws_v1 import ev
from tools.executor import execute_server_tool
from runtime.transports import BaseTransport
from protocol.ws_v1.schema import ev
from processors.eou import EouDetector
from processors.vad import SileroVAD, VADProcessor
from services.base import LLMMessage, LLMStreamEvent
from services.streaming_text import extract_tts_sentence, has_spoken_content
from providers.common.base import LLMMessage, LLMStreamEvent
from providers.common.streaming_text import extract_tts_sentence, has_spoken_content
class DuplexPipeline:

View File

@@ -0,0 +1,12 @@
"""Output-event shaping helpers for the runtime pipeline."""
from __future__ import annotations
from typing import Any, Dict
def assistant_text_delta_event(text: str, **extra: Any) -> Dict[str, Any]:
"""Build a normalized assistant text delta payload."""
payload: Dict[str, Any] = {"type": "assistant.text.delta", "text": str(text)}
payload.update(extra)
return payload

View File

@@ -0,0 +1,8 @@
"""Interruption-related helpers extracted from the duplex pipeline."""
from __future__ import annotations
def should_interrupt(min_duration_ms: int, detected_ms: int) -> bool:
"""Decide whether interruption conditions are met."""
return int(detected_ms) >= max(0, int(min_duration_ms))

View File

@@ -0,0 +1,13 @@
"""LLM flow helpers extracted from the duplex pipeline.
This module is intentionally lightweight for phase-wise migration.
"""
from __future__ import annotations
from providers.common.base import LLMStreamEvent
def is_done_event(event: LLMStreamEvent) -> bool:
"""Return whether an LLM stream event signals completion."""
return str(event.type) == "done"

View File

@@ -0,0 +1,13 @@
"""Tooling helpers extracted from the duplex pipeline."""
from __future__ import annotations
from typing import Any
def normalize_tool_name(name: Any, aliases: dict[str, str]) -> str:
"""Normalize tool name with alias mapping."""
normalized = str(name or "").strip()
if not normalized:
return ""
return aliases.get(normalized, normalized)

View File

@@ -0,0 +1,15 @@
"""TTS flow helpers extracted from the duplex pipeline.
This module is intentionally lightweight for phase-wise migration.
"""
from __future__ import annotations
from providers.common.base import TTSChunk
def chunk_duration_ms(chunk: TTSChunk) -> float:
"""Estimate chunk duration in milliseconds for pcm16 mono."""
if chunk.sample_rate <= 0:
return 0.0
return (len(chunk.audio) / 2.0 / float(chunk.sample_rate)) * 1000.0

View File

@@ -1,16 +1,16 @@
"""Port interfaces for engine-side integration boundaries."""
"""Port interfaces for runtime integration boundaries."""
from core.ports.asr import ASRBufferControl, ASRInterimControl, ASRPort, ASRServiceSpec
from core.ports.control_plane import (
from runtime.ports.asr import ASRBufferControl, ASRInterimControl, ASRPort, ASRServiceSpec
from runtime.ports.control_plane import (
AssistantRuntimeConfigProvider,
ControlPlaneGateway,
ConversationHistoryStore,
KnowledgeRetriever,
ToolCatalog,
)
from core.ports.llm import LLMCancellable, LLMPort, LLMRuntimeConfigurable, LLMServiceSpec
from core.ports.service_factory import RealtimeServiceFactory
from core.ports.tts import TTSPort, TTSServiceSpec
from runtime.ports.llm import LLMCancellable, LLMPort, LLMRuntimeConfigurable, LLMServiceSpec
from runtime.ports.service_factory import RealtimeServiceFactory
from runtime.ports.tts import TTSPort, TTSServiceSpec
__all__ = [
"ASRPort",

View File

@@ -5,7 +5,7 @@ from __future__ import annotations
from dataclasses import dataclass
from typing import AsyncIterator, Awaitable, Callable, Optional, Protocol
from services.base import ASRResult
from providers.common.base import ASRResult
TranscriptCallback = Callable[[str, bool], Awaitable[None]]

View File

@@ -5,7 +5,7 @@ from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any, AsyncIterator, Awaitable, Callable, Dict, List, Optional, Protocol
from services.base import LLMMessage, LLMStreamEvent
from providers.common.base import LLMMessage, LLMStreamEvent
KnowledgeRetrieverFn = Callable[..., Awaitable[List[Dict[str, Any]]]]

View File

@@ -4,9 +4,9 @@ from __future__ import annotations
from typing import Protocol
from core.ports.asr import ASRPort, ASRServiceSpec
from core.ports.llm import LLMPort, LLMServiceSpec
from core.ports.tts import TTSPort, TTSServiceSpec
from runtime.ports.asr import ASRPort, ASRServiceSpec
from runtime.ports.llm import LLMPort, LLMServiceSpec
from runtime.ports.tts import TTSPort, TTSServiceSpec
class RealtimeServiceFactory(Protocol):

View File

@@ -5,7 +5,7 @@ from __future__ import annotations
from dataclasses import dataclass
from typing import AsyncIterator, Optional, Protocol
from services.base import TTSChunk
from providers.common.base import TTSChunk
@dataclass(frozen=True)

View File

@@ -0,0 +1 @@
"""Runtime session package."""

View File

@@ -0,0 +1,10 @@
"""Lifecycle helper utilities for runtime sessions."""
from __future__ import annotations
from datetime import datetime, timezone
def utc_now_iso() -> str:
"""Return current UTC timestamp in ISO 8601 format."""
return datetime.now(timezone.utc).isoformat()

View File

@@ -9,22 +9,22 @@ from enum import Enum
from typing import Optional, Dict, Any, List
from loguru import logger
from app.backend_adapters import build_backend_adapter_from_settings
from core.transports import BaseTransport
from core.ports import (
from adapters.control_plane.backend import build_backend_adapter_from_settings
from runtime.transports import BaseTransport
from runtime.ports import (
AssistantRuntimeConfigProvider,
ControlPlaneGateway,
ConversationHistoryStore,
KnowledgeRetriever,
ToolCatalog,
)
from core.duplex_pipeline import DuplexPipeline
from core.conversation import ConversationTurn
from core.history_bridge import SessionHistoryBridge
from core.workflow_runner import WorkflowRunner, WorkflowTransition, WorkflowNodeDef, WorkflowEdgeDef
from runtime.pipeline.duplex import DuplexPipeline
from runtime.conversation import ConversationTurn
from runtime.history.bridge import SessionHistoryBridge
from workflow.runner import WorkflowRunner, WorkflowTransition, WorkflowNodeDef, WorkflowEdgeDef
from app.config import settings
from services.base import LLMMessage
from models.ws_v1 import (
from providers.common.base import LLMMessage
from protocol.ws_v1.schema import (
parse_client_message,
ev,
SessionStartMessage,

View File

@@ -0,0 +1,9 @@
"""Metadata helpers extracted from session manager."""
from __future__ import annotations
import re
from typing import Pattern
DYNAMIC_VARIABLE_KEY_RE: Pattern[str] = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]{0,63}$")
DYNAMIC_VARIABLE_PLACEHOLDER_RE: Pattern[str] = re.compile(r"\{\{\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*\}\}")

View File

@@ -0,0 +1,12 @@
"""Workflow bridge helpers for runtime session orchestration."""
from __future__ import annotations
from typing import Optional
from workflow.runner import WorkflowRunner
def has_active_workflow(workflow_runner: Optional[WorkflowRunner]) -> bool:
"""Return whether a workflow runner exists and has a current node."""
return bool(workflow_runner and workflow_runner.current_node is not None)

View File

@@ -1,52 +0,0 @@
"""AI Services package.
Provides ASR, LLM, TTS, and Realtime API services for voice conversation.
"""
from services.base import (
ServiceState,
ASRResult,
LLMMessage,
TTSChunk,
BaseASRService,
BaseLLMService,
BaseTTSService,
)
from services.llm import OpenAILLMService, MockLLMService
from services.dashscope_tts import DashScopeTTSService
from services.tts import MockTTSService
from services.asr import BufferedASRService, MockASRService
from services.openai_compatible_asr import OpenAICompatibleASRService, SiliconFlowASRService
from services.openai_compatible_tts import OpenAICompatibleTTSService, SiliconFlowTTSService
from services.streaming_tts_adapter import StreamingTTSAdapter
from services.realtime import RealtimeService, RealtimeConfig, RealtimePipeline
__all__ = [
# Base classes
"ServiceState",
"ASRResult",
"LLMMessage",
"TTSChunk",
"BaseASRService",
"BaseLLMService",
"BaseTTSService",
# LLM
"OpenAILLMService",
"MockLLMService",
# TTS
"DashScopeTTSService",
"MockTTSService",
# ASR
"BufferedASRService",
"MockASRService",
"OpenAICompatibleASRService",
"SiliconFlowASRService",
# TTS (SiliconFlow)
"OpenAICompatibleTTSService",
"SiliconFlowTTSService",
"StreamingTTSAdapter",
# Realtime
"RealtimeService",
"RealtimeConfig",
"RealtimePipeline",
]

View File

@@ -1,7 +1,7 @@
import aiohttp
import pytest
from app.backend_adapters import (
from adapters.control_plane.backend import (
AssistantConfigSourceAdapter,
LocalYamlAssistantConfigAdapter,
build_backend_adapter,
@@ -120,7 +120,7 @@ async def test_http_backend_adapter_create_call_record_posts_expected_payload(mo
},
)
monkeypatch.setattr("app.backend_adapters.aiohttp.ClientSession", _FakeClientSession)
monkeypatch.setattr("adapters.control_plane.backend.aiohttp.ClientSession", _FakeClientSession)
config_dir = tmp_path / "assistants"
config_dir.mkdir(parents=True, exist_ok=True)
@@ -198,7 +198,7 @@ async def test_with_backend_url_uses_backend_for_assistant_config(monkeypatch, t
_ = (url, json)
return _FakeResponse(status=200, payload={"id": "call_1"})
monkeypatch.setattr("app.backend_adapters.aiohttp.ClientSession", _FakeClientSession)
monkeypatch.setattr("adapters.control_plane.backend.aiohttp.ClientSession", _FakeClientSession)
config_dir = tmp_path / "assistants"
config_dir.mkdir(parents=True, exist_ok=True)
@@ -234,7 +234,7 @@ async def test_backend_mode_disabled_uses_local_assistant_config_even_with_url(m
_ = timeout
raise AssertionError("HTTP client should not be created when backend_mode=disabled")
monkeypatch.setattr("app.backend_adapters.aiohttp.ClientSession", _FailIfCalledClientSession)
monkeypatch.setattr("adapters.control_plane.backend.aiohttp.ClientSession", _FailIfCalledClientSession)
config_dir = tmp_path / "assistants"
config_dir.mkdir(parents=True, exist_ok=True)

View File

@@ -1,4 +1,4 @@
from core.session import Session
from runtime.session.manager import Session
def _session() -> Session:

View File

@@ -3,7 +3,7 @@ import time
import pytest
from core.history_bridge import SessionHistoryBridge
from runtime.history.bridge import SessionHistoryBridge
class _FakeHistoryWriter:

View File

@@ -4,10 +4,10 @@ from typing import Any, Dict, List
import pytest
from core.conversation import ConversationState
from core.duplex_pipeline import DuplexPipeline
from models.ws_v1 import OutputAudioPlayedMessage, ToolCallResultsMessage, parse_client_message
from services.base import LLMStreamEvent
from runtime.conversation import ConversationState
from runtime.pipeline.duplex import DuplexPipeline
from protocol.ws_v1.schema import OutputAudioPlayedMessage, ToolCallResultsMessage, parse_client_message
from providers.common.base import LLMStreamEvent
class _DummySileroVAD:
@@ -86,9 +86,9 @@ class _CaptureGenerateLLM:
def _build_pipeline(monkeypatch, llm_rounds: List[List[LLMStreamEvent]]) -> tuple[DuplexPipeline, List[Dict[str, Any]]]:
monkeypatch.setattr("core.duplex_pipeline.SileroVAD", _DummySileroVAD)
monkeypatch.setattr("core.duplex_pipeline.VADProcessor", _DummyVADProcessor)
monkeypatch.setattr("core.duplex_pipeline.EouDetector", _DummyEouDetector)
monkeypatch.setattr("runtime.pipeline.duplex.SileroVAD", _DummySileroVAD)
monkeypatch.setattr("runtime.pipeline.duplex.VADProcessor", _DummyVADProcessor)
monkeypatch.setattr("runtime.pipeline.duplex.EouDetector", _DummyEouDetector)
pipeline = DuplexPipeline(
transport=_FakeTransport(),
@@ -112,7 +112,7 @@ def _build_pipeline(monkeypatch, llm_rounds: List[List[LLMStreamEvent]]) -> tupl
def test_pipeline_uses_default_tools_from_settings(monkeypatch):
monkeypatch.setattr(
"core.duplex_pipeline.settings.tools",
"runtime.pipeline.duplex.settings.tools",
[
"current_time",
"calculator",
@@ -141,7 +141,7 @@ def test_pipeline_uses_default_tools_from_settings(monkeypatch):
def test_pipeline_exposes_unknown_string_tools_with_fallback_schema(monkeypatch):
monkeypatch.setattr("core.duplex_pipeline.settings.tools", ["custom_system_cmd"])
monkeypatch.setattr("runtime.pipeline.duplex.settings.tools", ["custom_system_cmd"])
pipeline, _events = _build_pipeline(monkeypatch, [[LLMStreamEvent(type="done")]])
schemas = pipeline._resolved_tool_schemas()
@@ -151,7 +151,7 @@ def test_pipeline_exposes_unknown_string_tools_with_fallback_schema(monkeypatch)
def test_pipeline_assigns_default_client_executor_for_system_string_tools(monkeypatch):
monkeypatch.setattr("core.duplex_pipeline.settings.tools", ["increase_volume"])
monkeypatch.setattr("runtime.pipeline.duplex.settings.tools", ["increase_volume"])
pipeline, _events = _build_pipeline(monkeypatch, [[LLMStreamEvent(type="done")]])
tool_call = {
@@ -221,9 +221,9 @@ async def test_pipeline_applies_default_args_to_tool_call(monkeypatch):
@pytest.mark.asyncio
async def test_generated_opener_prompt_uses_system_prompt_only(monkeypatch):
monkeypatch.setattr("core.duplex_pipeline.SileroVAD", _DummySileroVAD)
monkeypatch.setattr("core.duplex_pipeline.VADProcessor", _DummyVADProcessor)
monkeypatch.setattr("core.duplex_pipeline.EouDetector", _DummyEouDetector)
monkeypatch.setattr("runtime.pipeline.duplex.SileroVAD", _DummySileroVAD)
monkeypatch.setattr("runtime.pipeline.duplex.VADProcessor", _DummyVADProcessor)
monkeypatch.setattr("runtime.pipeline.duplex.EouDetector", _DummyEouDetector)
llm = _CaptureGenerateLLM("你好")
pipeline = DuplexPipeline(
@@ -662,7 +662,7 @@ async def test_server_tool_timeout_emits_504_and_continues(monkeypatch):
"status": {"code": 200, "message": "ok"},
}
monkeypatch.setattr("core.duplex_pipeline.execute_server_tool", _slow_execute)
monkeypatch.setattr("runtime.pipeline.duplex.execute_server_tool", _slow_execute)
pipeline, events = _build_pipeline(
monkeypatch,

View File

@@ -1,6 +1,6 @@
import pytest
from core.tool_executor import execute_server_tool
from tools.executor import execute_server_tool
@pytest.mark.asyncio
@@ -38,7 +38,7 @@ async def test_current_time_uses_local_system_clock(monkeypatch):
async def _should_not_be_called(_tool_id):
raise AssertionError("fetch_tool_resource should not be called for current_time")
monkeypatch.setattr("core.tool_executor.fetch_tool_resource", _should_not_be_called)
monkeypatch.setattr("tools.executor.fetch_tool_resource", _should_not_be_called)
result = await execute_server_tool(
{

View File

@@ -1,7 +1,7 @@
import pytest
from core.session import Session, WsSessionState
from models.ws_v1 import OutputAudioPlayedMessage, SessionStartMessage, parse_client_message
from runtime.session.manager import Session, WsSessionState
from protocol.ws_v1.schema import OutputAudioPlayedMessage, SessionStartMessage, parse_client_message
def _session() -> Session:
@@ -194,7 +194,7 @@ async def test_handle_session_start_requires_assistant_id_and_closes_transport()
@pytest.mark.asyncio
async def test_handle_session_start_applies_whitelisted_overrides_and_ignores_workflow(monkeypatch):
monkeypatch.setattr("core.session.settings.ws_emit_config_resolved", False)
monkeypatch.setattr("runtime.session.manager.settings.ws_emit_config_resolved", False)
session = Session.__new__(Session)
session.id = "sess_start_ok"
@@ -289,9 +289,9 @@ async def test_handle_session_start_applies_whitelisted_overrides_and_ignores_wo
@pytest.mark.asyncio
async def test_handle_session_start_emits_config_resolved_when_enabled(monkeypatch):
monkeypatch.setattr("core.session.settings.ws_emit_config_resolved", True)
monkeypatch.setattr("core.session.settings.ws_protocol_version", "v1-custom")
monkeypatch.setattr("core.session.settings.default_codec", "pcmu")
monkeypatch.setattr("runtime.session.manager.settings.ws_emit_config_resolved", True)
monkeypatch.setattr("runtime.session.manager.settings.ws_protocol_version", "v1-custom")
monkeypatch.setattr("runtime.session.manager.settings.default_codec", "pcmu")
session = Session.__new__(Session)
session.id = "sess_start_emit_config"
@@ -385,8 +385,8 @@ async def test_handle_session_start_emits_config_resolved_when_enabled(monkeypat
@pytest.mark.asyncio
async def test_handle_audio_uses_chunk_size_for_frame_validation(monkeypatch):
monkeypatch.setattr("core.session.settings.sample_rate", 16000)
monkeypatch.setattr("core.session.settings.chunk_size_ms", 10)
monkeypatch.setattr("runtime.session.manager.settings.sample_rate", 16000)
monkeypatch.setattr("runtime.session.manager.settings.chunk_size_ms", 10)
session = Session.__new__(Session)
session.id = "sess_chunk_frame"

1
engine/tools/__init__.py Normal file
View File

@@ -0,0 +1 @@
"""Tools package."""

View File

@@ -8,7 +8,7 @@ from typing import Any, Awaitable, Callable, Dict, Optional
import aiohttp
from app.backend_adapters import build_backend_adapter_from_settings
from adapters.control_plane.backend import build_backend_adapter_from_settings
ToolResourceFetcher = Callable[[str], Awaitable[Optional[Dict[str, Any]]]]

View File

@@ -0,0 +1 @@
"""Workflow package."""