I can use text to get audio response and barge in

2026-01-29 16:25:53 +08:00
parent cd90b4fb37
commit ac0c76e6e8
16 changed files with 3394 additions and 119 deletions
--- a/core/session.py
+++ b/core/session.py
@@ -8,6 +8,7 @@ from loguru import logger
 from core.transports import BaseTransport
 from core.pipeline import AudioPipeline
 from models.commands import parse_command, TTSCommand, ChatCommand, InterruptCommand, HangupCommand
+from app.config import settings


 class Session:
@@ -15,28 +16,44 @@ class Session:
    Manages a single call session.

    Handles command routing, audio processing, and session lifecycle.
+    Supports both basic audio pipeline and full duplex voice conversation.
    """

-    def __init__(self, session_id: str, transport: BaseTransport):
+    def __init__(self, session_id: str, transport: BaseTransport, use_duplex: bool = None):
        """
        Initialize session.

        Args:
            session_id: Unique session identifier
            transport: Transport instance for communication
+            use_duplex: Whether to use duplex pipeline (defaults to settings.duplex_enabled)
        """
        self.id = session_id
        self.transport = transport
-        self.pipeline = AudioPipeline(transport, session_id)
+        
+        # Determine pipeline mode
+        self.use_duplex = use_duplex if use_duplex is not None else settings.duplex_enabled
+        
+        if self.use_duplex:
+            from core.duplex_pipeline import DuplexPipeline
+            self.pipeline = DuplexPipeline(
+                transport=transport,
+                session_id=session_id,
+                system_prompt=settings.duplex_system_prompt,
+                greeting=settings.duplex_greeting
+            )
+        else:
+            self.pipeline = AudioPipeline(transport, session_id)

        # Session state
        self.created_at = None
        self.state = "created"  # created, invited, accepted, ringing, hungup
+        self._pipeline_started = False

        # Track IDs
        self.current_track_id: Optional[str] = str(uuid.uuid4())

-        logger.info(f"Session {self.id} created")
+        logger.info(f"Session {self.id} created (duplex={self.use_duplex})")

    async def handle_text(self, text_data: str) -> None:
        """
@@ -112,7 +129,10 @@ class Session:
            audio_bytes: PCM audio data
        """
        try:
-            await self.pipeline.process_input(audio_bytes)
+            if self.use_duplex:
+                await self.pipeline.process_audio(audio_bytes)
+            else:
+                await self.pipeline.process_input(audio_bytes)
        except Exception as e:
            logger.error(f"Session {self.id} handle_audio error: {e}", exc_info=True)

@@ -128,6 +148,15 @@ class Session:
            "timestamp": self._get_timestamp_ms()
        })

+        # Start duplex pipeline if enabled
+        if self.use_duplex and not self._pipeline_started:
+            try:
+                await self.pipeline.start()
+                self._pipeline_started = True
+                logger.info(f"Session {self.id} duplex pipeline started")
+            except Exception as e:
+                logger.error(f"Failed to start duplex pipeline: {e}")
+
        logger.info(f"Session {self.id} invited with codec: {option.get('codec', 'pcm')}")

    async def _handle_accept(self, data: Dict[str, Any]) -> None:
@@ -199,7 +228,10 @@ class Session:
            logger.info(f"Session {self.id} graceful interrupt")
        else:
            logger.info(f"Session {self.id} immediate interrupt")
-            await self.pipeline.interrupt()
+            if self.use_duplex:
+                await self.pipeline.interrupt()
+            else:
+                await self.pipeline.interrupt()

    async def _handle_pause(self) -> None:
        """Handle pause command."""
@@ -236,7 +268,10 @@ class Session:
        """Handle chat command."""
        logger.info(f"Session {self.id} chat: {command.text[:50]}...")
        # Process text input through pipeline
-        await self.pipeline.process_text_input(command.text)
+        if self.use_duplex:
+            await self.pipeline.process_text(command.text)
+        else:
+            await self.pipeline.process_text_input(command.text)

    async def _send_error(self, sender: str, error_message: str) -> None:
        """