Integrate eou and vad

2026-01-29 13:57:12 +08:00
parent 4cb267a288
commit cd90b4fb37
25 changed files with 2592 additions and 297 deletions
--- a/models/commands.py
+++ b/models/commands.py
@@ -0,0 +1,143 @@
+"""Protocol command models matching the original active-call API."""
+
+from typing import Optional, Dict, Any
+from pydantic import BaseModel, Field
+
+
+class InviteCommand(BaseModel):
+    """Invite command to initiate a call."""
+
+    command: str = Field(default="invite", description="Command type")
+    option: Optional[Dict[str, Any]] = Field(default=None, description="Call configuration options")
+
+
+class AcceptCommand(BaseModel):
+    """Accept command to accept an incoming call."""
+
+    command: str = Field(default="accept", description="Command type")
+    option: Optional[Dict[str, Any]] = Field(default=None, description="Call configuration options")
+
+
+class RejectCommand(BaseModel):
+    """Reject command to reject an incoming call."""
+
+    command: str = Field(default="reject", description="Command type")
+    reason: str = Field(default="", description="Reason for rejection")
+    code: Optional[int] = Field(default=None, description="SIP response code")
+
+
+class RingingCommand(BaseModel):
+    """Ringing command to send ringing response."""
+
+    command: str = Field(default="ringing", description="Command type")
+    recorder: Optional[Dict[str, Any]] = Field(default=None, description="Call recording configuration")
+    early_media: bool = Field(default=False, description="Enable early media")
+    ringtone: Optional[str] = Field(default=None, description="Custom ringtone URL")
+
+
+class TTSCommand(BaseModel):
+    """TTS command to convert text to speech."""
+
+    command: str = Field(default="tts", description="Command type")
+    text: str = Field(..., description="Text to synthesize")
+    speaker: Optional[str] = Field(default=None, description="Speaker voice name")
+    play_id: Optional[str] = Field(default=None, description="Unique identifier for this TTS session")
+    auto_hangup: bool = Field(default=False, description="Auto hangup after TTS completion")
+    streaming: bool = Field(default=False, description="Streaming text input")
+    end_of_stream: bool = Field(default=False, description="End of streaming input")
+    wait_input_timeout: Optional[int] = Field(default=None, description="Max time to wait for input (seconds)")
+    option: Optional[Dict[str, Any]] = Field(default=None, description="TTS provider specific options")
+
+
+class PlayCommand(BaseModel):
+    """Play command to play audio from URL."""
+
+    command: str = Field(default="play", description="Command type")
+    url: str = Field(..., description="URL of audio file to play")
+    auto_hangup: bool = Field(default=False, description="Auto hangup after playback")
+    wait_input_timeout: Optional[int] = Field(default=None, description="Max time to wait for input (seconds)")
+
+
+class InterruptCommand(BaseModel):
+    """Interrupt command to interrupt current playback."""
+
+    command: str = Field(default="interrupt", description="Command type")
+    graceful: bool = Field(default=False, description="Wait for current TTS to complete")
+
+
+class PauseCommand(BaseModel):
+    """Pause command to pause current playback."""
+
+    command: str = Field(default="pause", description="Command type")
+
+
+class ResumeCommand(BaseModel):
+    """Resume command to resume paused playback."""
+
+    command: str = Field(default="resume", description="Command type")
+
+
+class HangupCommand(BaseModel):
+    """Hangup command to end the call."""
+
+    command: str = Field(default="hangup", description="Command type")
+    reason: Optional[str] = Field(default=None, description="Reason for hangup")
+    initiator: Optional[str] = Field(default=None, description="Who initiated the hangup")
+
+
+class HistoryCommand(BaseModel):
+    """History command to add conversation history."""
+
+    command: str = Field(default="history", description="Command type")
+    speaker: str = Field(..., description="Speaker identifier")
+    text: str = Field(..., description="Conversation text")
+
+
+class ChatCommand(BaseModel):
+    """Chat command for text-based conversation."""
+
+    command: str = Field(default="chat", description="Command type")
+    text: str = Field(..., description="Chat text message")
+
+
+# Command type mapping
+COMMAND_TYPES = {
+    "invite": InviteCommand,
+    "accept": AcceptCommand,
+    "reject": RejectCommand,
+    "ringing": RingingCommand,
+    "tts": TTSCommand,
+    "play": PlayCommand,
+    "interrupt": InterruptCommand,
+    "pause": PauseCommand,
+    "resume": ResumeCommand,
+    "hangup": HangupCommand,
+    "history": HistoryCommand,
+    "chat": ChatCommand,
+}
+
+
+def parse_command(data: Dict[str, Any]) -> BaseModel:
+    """
+    Parse a command from JSON data.
+
+    Args:
+        data: JSON data as dictionary
+
+    Returns:
+        Parsed command model
+
+    Raises:
+        ValueError: If command type is unknown
+    """
+    command_type = data.get("command")
+
+    if not command_type:
+        raise ValueError("Missing 'command' field")
+
+    command_class = COMMAND_TYPES.get(command_type)
+
+    if not command_class:
+        raise ValueError(f"Unknown command type: {command_type}")
+
+    return command_class(**data)