Add bot not interrupt and generated opener

This commit is contained in:
Xin Wang
2026-02-12 13:51:27 +08:00
parent 6179053388
commit d41db6418c
9 changed files with 215 additions and 12 deletions

View File

@@ -268,6 +268,9 @@ class DuplexPipeline:
self._runtime_output: Dict[str, Any] = {}
self._runtime_system_prompt: Optional[str] = None
self._runtime_greeting: Optional[str] = None
self._runtime_generated_opener_enabled: Optional[bool] = None
self._runtime_barge_in_enabled: Optional[bool] = None
self._runtime_barge_in_min_duration_ms: Optional[int] = None
self._runtime_knowledge: Dict[str, Any] = {}
self._runtime_knowledge_base_id: Optional[str] = None
self._runtime_tools: List[Any] = []
@@ -301,8 +304,18 @@ class DuplexPipeline:
if self._runtime_system_prompt:
self.conversation.system_prompt = self._runtime_system_prompt
if "greeting" in metadata:
self._runtime_greeting = str(metadata.get("greeting") or "")
greeting_payload = metadata.get("greeting")
if isinstance(greeting_payload, dict):
self._runtime_greeting = str(greeting_payload.get("text") or "")
generated_flag = self._coerce_bool(greeting_payload.get("generated"))
if generated_flag is not None:
self._runtime_generated_opener_enabled = generated_flag
else:
self._runtime_greeting = str(greeting_payload or "")
self.conversation.greeting = self._runtime_greeting or None
generated_opener_flag = self._coerce_bool(metadata.get("generatedOpenerEnabled"))
if generated_opener_flag is not None:
self._runtime_generated_opener_enabled = generated_opener_flag
services = metadata.get("services") or {}
if isinstance(services, dict):
@@ -315,6 +328,17 @@ class DuplexPipeline:
output = metadata.get("output") or {}
if isinstance(output, dict):
self._runtime_output = output
barge_in = metadata.get("bargeIn")
if isinstance(barge_in, dict):
barge_in_enabled = self._coerce_bool(barge_in.get("enabled"))
if barge_in_enabled is not None:
self._runtime_barge_in_enabled = barge_in_enabled
min_duration = barge_in.get("minDurationMs")
if isinstance(min_duration, (int, float, str)):
try:
self._runtime_barge_in_min_duration_ms = max(0, int(min_duration))
except (TypeError, ValueError):
self._runtime_barge_in_min_duration_ms = None
knowledge_base_id = metadata.get("knowledgeBaseId")
if knowledge_base_id is not None:
@@ -366,6 +390,50 @@ class DuplexPipeline:
return True
def _generated_opener_enabled(self) -> bool:
return self._runtime_generated_opener_enabled is True
def _barge_in_enabled(self) -> bool:
if self._runtime_barge_in_enabled is not None:
return self._runtime_barge_in_enabled
return True
def _resolved_barge_in_min_duration_ms(self) -> int:
if self._runtime_barge_in_min_duration_ms is not None:
return self._runtime_barge_in_min_duration_ms
return self._barge_in_min_duration_ms
async def _generate_runtime_greeting(self) -> Optional[str]:
if not self.llm_service:
return None
prompt_hint = (self._runtime_greeting or "").strip()
system_prompt = (
"You generate one concise opener for a live voice call assistant. "
"Return plain text only, no quotes, no markdown, one sentence."
)
user_prompt = "Generate a friendly opening line (max 25 words)."
if prompt_hint:
user_prompt += f" Style hint: {prompt_hint}"
try:
generated = await self.llm_service.generate(
[
LLMMessage(role="system", content=system_prompt),
LLMMessage(role="user", content=user_prompt),
],
temperature=0.7,
max_tokens=64,
)
except Exception as exc:
logger.warning(f"Failed to generate runtime greeting: {exc}")
return None
text = (generated or "").strip()
if not text:
return None
return text.strip().strip('"').strip("'")
async def start(self) -> None:
"""Start the pipeline and connect services."""
try:
@@ -464,8 +532,15 @@ class DuplexPipeline:
self._outbound_task = asyncio.create_task(self._outbound_loop())
# Speak greeting if configured
if self.conversation.greeting and tts_output_enabled:
await self._speak(self.conversation.greeting)
if tts_output_enabled:
greeting_to_speak = self.conversation.greeting
if self._generated_opener_enabled():
generated_greeting = await self._generate_runtime_greeting()
if generated_greeting:
greeting_to_speak = generated_greeting
self.conversation.greeting = generated_greeting
if greeting_to_speak:
await self._speak(greeting_to_speak)
except Exception as e:
logger.error(f"Failed to start pipeline: {e}")
@@ -552,7 +627,7 @@ class DuplexPipeline:
# 2. Check for barge-in (user speaking while bot speaking)
# Filter false interruptions by requiring minimum speech duration
if self._is_bot_speaking:
if self._is_bot_speaking and self._barge_in_enabled():
if vad_status == "Speech":
# User is speaking while bot is speaking
self._barge_in_silence_frames = 0 # Reset silence counter
@@ -566,7 +641,7 @@ class DuplexPipeline:
self._barge_in_speech_frames += 1
# Check if speech duration exceeds threshold
speech_duration_ms = (time.time() - self._barge_in_speech_start_time) * 1000
if speech_duration_ms >= self._barge_in_min_duration_ms:
if speech_duration_ms >= self._resolved_barge_in_min_duration_ms():
logger.info(f"Barge-in confirmed after {speech_duration_ms:.0f}ms of speech ({self._barge_in_speech_frames} frames)")
await self._handle_barge_in()
else:
@@ -580,6 +655,10 @@ class DuplexPipeline:
self._barge_in_speech_start_time = None
self._barge_in_speech_frames = 0
self._barge_in_silence_frames = 0
elif self._is_bot_speaking and not self._barge_in_enabled():
self._barge_in_speech_start_time = None
self._barge_in_speech_frames = 0
self._barge_in_silence_frames = 0
# 3. Buffer audio for ASR
if vad_status == "Speech" or self.conversation.state == ConversationState.LISTENING: