I can use text to get audio response and barge in

This commit is contained in:
Xin Wang
2026-01-29 16:25:53 +08:00
parent cd90b4fb37
commit ac0c76e6e8
16 changed files with 3394 additions and 119 deletions

View File

@@ -8,6 +8,7 @@ from loguru import logger
from core.transports import BaseTransport
from core.pipeline import AudioPipeline
from models.commands import parse_command, TTSCommand, ChatCommand, InterruptCommand, HangupCommand
from app.config import settings
class Session:
@@ -15,28 +16,44 @@ class Session:
Manages a single call session.
Handles command routing, audio processing, and session lifecycle.
Supports both basic audio pipeline and full duplex voice conversation.
"""
def __init__(self, session_id: str, transport: BaseTransport):
def __init__(self, session_id: str, transport: BaseTransport, use_duplex: bool = None):
"""
Initialize session.
Args:
session_id: Unique session identifier
transport: Transport instance for communication
use_duplex: Whether to use duplex pipeline (defaults to settings.duplex_enabled)
"""
self.id = session_id
self.transport = transport
self.pipeline = AudioPipeline(transport, session_id)
# Determine pipeline mode
self.use_duplex = use_duplex if use_duplex is not None else settings.duplex_enabled
if self.use_duplex:
from core.duplex_pipeline import DuplexPipeline
self.pipeline = DuplexPipeline(
transport=transport,
session_id=session_id,
system_prompt=settings.duplex_system_prompt,
greeting=settings.duplex_greeting
)
else:
self.pipeline = AudioPipeline(transport, session_id)
# Session state
self.created_at = None
self.state = "created" # created, invited, accepted, ringing, hungup
self._pipeline_started = False
# Track IDs
self.current_track_id: Optional[str] = str(uuid.uuid4())
logger.info(f"Session {self.id} created")
logger.info(f"Session {self.id} created (duplex={self.use_duplex})")
async def handle_text(self, text_data: str) -> None:
"""
@@ -112,7 +129,10 @@ class Session:
audio_bytes: PCM audio data
"""
try:
await self.pipeline.process_input(audio_bytes)
if self.use_duplex:
await self.pipeline.process_audio(audio_bytes)
else:
await self.pipeline.process_input(audio_bytes)
except Exception as e:
logger.error(f"Session {self.id} handle_audio error: {e}", exc_info=True)
@@ -128,6 +148,15 @@ class Session:
"timestamp": self._get_timestamp_ms()
})
# Start duplex pipeline if enabled
if self.use_duplex and not self._pipeline_started:
try:
await self.pipeline.start()
self._pipeline_started = True
logger.info(f"Session {self.id} duplex pipeline started")
except Exception as e:
logger.error(f"Failed to start duplex pipeline: {e}")
logger.info(f"Session {self.id} invited with codec: {option.get('codec', 'pcm')}")
async def _handle_accept(self, data: Dict[str, Any]) -> None:
@@ -199,7 +228,10 @@ class Session:
logger.info(f"Session {self.id} graceful interrupt")
else:
logger.info(f"Session {self.id} immediate interrupt")
await self.pipeline.interrupt()
if self.use_duplex:
await self.pipeline.interrupt()
else:
await self.pipeline.interrupt()
async def _handle_pause(self) -> None:
"""Handle pause command."""
@@ -236,7 +268,10 @@ class Session:
"""Handle chat command."""
logger.info(f"Session {self.id} chat: {command.text[:50]}...")
# Process text input through pipeline
await self.pipeline.process_text_input(command.text)
if self.use_duplex:
await self.pipeline.process_text(command.text)
else:
await self.pipeline.process_text_input(command.text)
async def _send_error(self, sender: str, error_message: str) -> None:
"""