Add bot not interrupt and generated opener

This commit is contained in:
Xin Wang
2026-02-12 13:51:27 +08:00
parent 6179053388
commit d41db6418c
9 changed files with 215 additions and 12 deletions

View File

@@ -2,15 +2,42 @@ from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from contextlib import asynccontextmanager
import os
from sqlalchemy import inspect, text
from .db import Base, engine
from .routers import assistants, voices, workflows, history, knowledge, llm, asr, tools
def _ensure_assistant_columns() -> None:
"""Best-effort SQLite schema evolution for assistant flags."""
inspector = inspect(engine)
if "assistants" not in inspector.get_table_names():
return
columns = {col["name"] for col in inspector.get_columns("assistants")}
alter_statements = []
if "generated_opener_enabled" not in columns:
alter_statements.append(
"ALTER TABLE assistants ADD COLUMN generated_opener_enabled BOOLEAN DEFAULT 0"
)
if "bot_cannot_be_interrupted" not in columns:
alter_statements.append(
"ALTER TABLE assistants ADD COLUMN bot_cannot_be_interrupted BOOLEAN DEFAULT 0"
)
if not alter_statements:
return
with engine.begin() as conn:
for stmt in alter_statements:
conn.execute(text(stmt))
@asynccontextmanager
async def lifespan(app: FastAPI):
# 启动时创建表
Base.metadata.create_all(bind=engine)
_ensure_assistant_columns()
yield

View File

@@ -113,6 +113,7 @@ class Assistant(Base):
name: Mapped[str] = mapped_column(String(255), nullable=False)
call_count: Mapped[int] = mapped_column(Integer, default=0)
opener: Mapped[str] = mapped_column(Text, default="")
generated_opener_enabled: Mapped[bool] = mapped_column(default=False)
prompt: Mapped[str] = mapped_column(Text, default="")
knowledge_base_id: Mapped[Optional[str]] = mapped_column(String(64), nullable=True)
language: Mapped[str] = mapped_column(String(16), default="zh")
@@ -121,6 +122,7 @@ class Assistant(Base):
speed: Mapped[float] = mapped_column(Float, default=1.0)
hotwords: Mapped[dict] = mapped_column(JSON, default=list)
tools: Mapped[dict] = mapped_column(JSON, default=list)
bot_cannot_be_interrupted: Mapped[bool] = mapped_column(default=False)
interruption_sensitivity: Mapped[int] = mapped_column(Integer, default=500)
config_mode: Mapped[str] = mapped_column(String(32), default="platform")
api_url: Mapped[Optional[str]] = mapped_column(String(255), nullable=True)

View File

@@ -21,7 +21,12 @@ def _resolve_runtime_metadata(db: Session, assistant: Assistant) -> dict:
metadata = {
"systemPrompt": assistant.prompt or "",
"greeting": assistant.opener or "",
"generatedOpenerEnabled": bool(assistant.generated_opener_enabled),
"output": {"mode": "audio" if assistant.voice_output_enabled else "text"},
"bargeIn": {
"enabled": not bool(assistant.bot_cannot_be_interrupted),
"minDurationMs": int(assistant.interruption_sensitivity or 500),
},
"services": {},
}
warnings = []
@@ -100,6 +105,7 @@ def assistant_to_dict(assistant: Assistant) -> dict:
"name": assistant.name,
"callCount": assistant.call_count,
"opener": assistant.opener or "",
"generatedOpenerEnabled": bool(assistant.generated_opener_enabled),
"prompt": assistant.prompt or "",
"knowledgeBaseId": assistant.knowledge_base_id,
"language": assistant.language,
@@ -108,6 +114,7 @@ def assistant_to_dict(assistant: Assistant) -> dict:
"speed": assistant.speed,
"hotwords": assistant.hotwords or [],
"tools": assistant.tools or [],
"botCannotBeInterrupted": bool(assistant.bot_cannot_be_interrupted),
"interruptionSensitivity": assistant.interruption_sensitivity,
"configMode": assistant.config_mode,
"apiUrl": assistant.api_url,
@@ -125,8 +132,10 @@ def _apply_assistant_update(assistant: Assistant, update_data: dict) -> None:
field_map = {
"knowledgeBaseId": "knowledge_base_id",
"interruptionSensitivity": "interruption_sensitivity",
"botCannotBeInterrupted": "bot_cannot_be_interrupted",
"configMode": "config_mode",
"voiceOutputEnabled": "voice_output_enabled",
"generatedOpenerEnabled": "generated_opener_enabled",
"apiUrl": "api_url",
"apiKey": "api_key",
"llmModelId": "llm_model_id",
@@ -184,6 +193,7 @@ def create_assistant(data: AssistantCreate, db: Session = Depends(get_db)):
user_id=1, # 默认用户,后续添加认证
name=data.name,
opener=data.opener,
generated_opener_enabled=data.generatedOpenerEnabled,
prompt=data.prompt,
knowledge_base_id=data.knowledgeBaseId,
language=data.language,
@@ -192,6 +202,7 @@ def create_assistant(data: AssistantCreate, db: Session = Depends(get_db)):
speed=data.speed,
hotwords=data.hotwords,
tools=data.tools,
bot_cannot_be_interrupted=data.botCannotBeInterrupted,
interruption_sensitivity=data.interruptionSensitivity,
config_mode=data.configMode,
api_url=data.apiUrl,

View File

@@ -273,6 +273,7 @@ class ToolResourceOut(ToolResourceBase):
class AssistantBase(BaseModel):
name: str
opener: str = ""
generatedOpenerEnabled: bool = False
prompt: str = ""
knowledgeBaseId: Optional[str] = None
language: str = "zh"
@@ -281,6 +282,7 @@ class AssistantBase(BaseModel):
speed: float = 1.0
hotwords: List[str] = []
tools: List[str] = []
botCannotBeInterrupted: bool = False
interruptionSensitivity: int = 500
configMode: str = "platform"
apiUrl: Optional[str] = None
@@ -299,6 +301,7 @@ class AssistantCreate(AssistantBase):
class AssistantUpdate(BaseModel):
name: Optional[str] = None
opener: Optional[str] = None
generatedOpenerEnabled: Optional[bool] = None
prompt: Optional[str] = None
knowledgeBaseId: Optional[str] = None
language: Optional[str] = None
@@ -307,6 +310,7 @@ class AssistantUpdate(BaseModel):
speed: Optional[float] = None
hotwords: Optional[List[str]] = None
tools: Optional[List[str]] = None
botCannotBeInterrupted: Optional[bool] = None
interruptionSensitivity: Optional[int] = None
configMode: Optional[str] = None
apiUrl: Optional[str] = None

View File

@@ -24,6 +24,8 @@ class TestAssistantAPI:
assert data["prompt"] == sample_assistant_data["prompt"]
assert data["language"] == sample_assistant_data["language"]
assert data["voiceOutputEnabled"] is True
assert data["generatedOpenerEnabled"] is False
assert data["botCannotBeInterrupted"] is False
assert "id" in data
assert data["callCount"] == 0
@@ -225,3 +227,27 @@ class TestAssistantAPI:
metadata = runtime_resp.json()["sessionStartMetadata"]
assert metadata["output"]["mode"] == "text"
assert metadata["services"]["tts"]["enabled"] is False
def test_assistant_interrupt_and_generated_opener_flags(self, client, sample_assistant_data):
sample_assistant_data.update({
"generatedOpenerEnabled": True,
"botCannotBeInterrupted": True,
"interruptionSensitivity": 900,
})
assistant_resp = client.post("/api/assistants", json=sample_assistant_data)
assert assistant_resp.status_code == 200
assistant_id = assistant_resp.json()["id"]
get_resp = client.get(f"/api/assistants/{assistant_id}")
assert get_resp.status_code == 200
payload = get_resp.json()
assert payload["generatedOpenerEnabled"] is True
assert payload["botCannotBeInterrupted"] is True
assert payload["interruptionSensitivity"] == 900
runtime_resp = client.get(f"/api/assistants/{assistant_id}/runtime-config")
assert runtime_resp.status_code == 200
metadata = runtime_resp.json()["sessionStartMetadata"]
assert metadata["generatedOpenerEnabled"] is True
assert metadata["bargeIn"]["enabled"] is False
assert metadata["bargeIn"]["minDurationMs"] == 900

View File

@@ -268,6 +268,9 @@ class DuplexPipeline:
self._runtime_output: Dict[str, Any] = {}
self._runtime_system_prompt: Optional[str] = None
self._runtime_greeting: Optional[str] = None
self._runtime_generated_opener_enabled: Optional[bool] = None
self._runtime_barge_in_enabled: Optional[bool] = None
self._runtime_barge_in_min_duration_ms: Optional[int] = None
self._runtime_knowledge: Dict[str, Any] = {}
self._runtime_knowledge_base_id: Optional[str] = None
self._runtime_tools: List[Any] = []
@@ -301,8 +304,18 @@ class DuplexPipeline:
if self._runtime_system_prompt:
self.conversation.system_prompt = self._runtime_system_prompt
if "greeting" in metadata:
self._runtime_greeting = str(metadata.get("greeting") or "")
greeting_payload = metadata.get("greeting")
if isinstance(greeting_payload, dict):
self._runtime_greeting = str(greeting_payload.get("text") or "")
generated_flag = self._coerce_bool(greeting_payload.get("generated"))
if generated_flag is not None:
self._runtime_generated_opener_enabled = generated_flag
else:
self._runtime_greeting = str(greeting_payload or "")
self.conversation.greeting = self._runtime_greeting or None
generated_opener_flag = self._coerce_bool(metadata.get("generatedOpenerEnabled"))
if generated_opener_flag is not None:
self._runtime_generated_opener_enabled = generated_opener_flag
services = metadata.get("services") or {}
if isinstance(services, dict):
@@ -315,6 +328,17 @@ class DuplexPipeline:
output = metadata.get("output") or {}
if isinstance(output, dict):
self._runtime_output = output
barge_in = metadata.get("bargeIn")
if isinstance(barge_in, dict):
barge_in_enabled = self._coerce_bool(barge_in.get("enabled"))
if barge_in_enabled is not None:
self._runtime_barge_in_enabled = barge_in_enabled
min_duration = barge_in.get("minDurationMs")
if isinstance(min_duration, (int, float, str)):
try:
self._runtime_barge_in_min_duration_ms = max(0, int(min_duration))
except (TypeError, ValueError):
self._runtime_barge_in_min_duration_ms = None
knowledge_base_id = metadata.get("knowledgeBaseId")
if knowledge_base_id is not None:
@@ -366,6 +390,50 @@ class DuplexPipeline:
return True
def _generated_opener_enabled(self) -> bool:
return self._runtime_generated_opener_enabled is True
def _barge_in_enabled(self) -> bool:
if self._runtime_barge_in_enabled is not None:
return self._runtime_barge_in_enabled
return True
def _resolved_barge_in_min_duration_ms(self) -> int:
if self._runtime_barge_in_min_duration_ms is not None:
return self._runtime_barge_in_min_duration_ms
return self._barge_in_min_duration_ms
async def _generate_runtime_greeting(self) -> Optional[str]:
if not self.llm_service:
return None
prompt_hint = (self._runtime_greeting or "").strip()
system_prompt = (
"You generate one concise opener for a live voice call assistant. "
"Return plain text only, no quotes, no markdown, one sentence."
)
user_prompt = "Generate a friendly opening line (max 25 words)."
if prompt_hint:
user_prompt += f" Style hint: {prompt_hint}"
try:
generated = await self.llm_service.generate(
[
LLMMessage(role="system", content=system_prompt),
LLMMessage(role="user", content=user_prompt),
],
temperature=0.7,
max_tokens=64,
)
except Exception as exc:
logger.warning(f"Failed to generate runtime greeting: {exc}")
return None
text = (generated or "").strip()
if not text:
return None
return text.strip().strip('"').strip("'")
async def start(self) -> None:
"""Start the pipeline and connect services."""
try:
@@ -464,8 +532,15 @@ class DuplexPipeline:
self._outbound_task = asyncio.create_task(self._outbound_loop())
# Speak greeting if configured
if self.conversation.greeting and tts_output_enabled:
await self._speak(self.conversation.greeting)
if tts_output_enabled:
greeting_to_speak = self.conversation.greeting
if self._generated_opener_enabled():
generated_greeting = await self._generate_runtime_greeting()
if generated_greeting:
greeting_to_speak = generated_greeting
self.conversation.greeting = generated_greeting
if greeting_to_speak:
await self._speak(greeting_to_speak)
except Exception as e:
logger.error(f"Failed to start pipeline: {e}")
@@ -552,7 +627,7 @@ class DuplexPipeline:
# 2. Check for barge-in (user speaking while bot speaking)
# Filter false interruptions by requiring minimum speech duration
if self._is_bot_speaking:
if self._is_bot_speaking and self._barge_in_enabled():
if vad_status == "Speech":
# User is speaking while bot is speaking
self._barge_in_silence_frames = 0 # Reset silence counter
@@ -566,7 +641,7 @@ class DuplexPipeline:
self._barge_in_speech_frames += 1
# Check if speech duration exceeds threshold
speech_duration_ms = (time.time() - self._barge_in_speech_start_time) * 1000
if speech_duration_ms >= self._barge_in_min_duration_ms:
if speech_duration_ms >= self._resolved_barge_in_min_duration_ms():
logger.info(f"Barge-in confirmed after {speech_duration_ms:.0f}ms of speech ({self._barge_in_speech_frames} frames)")
await self._handle_barge_in()
else:
@@ -580,6 +655,10 @@ class DuplexPipeline:
self._barge_in_speech_start_time = None
self._barge_in_speech_frames = 0
self._barge_in_silence_frames = 0
elif self._is_bot_speaking and not self._barge_in_enabled():
self._barge_in_speech_start_time = None
self._barge_in_speech_frames = 0
self._barge_in_silence_frames = 0
# 3. Buffer audio for ASR
if vad_status == "Speech" or self.conversation.state == ConversationState.LISTENING:

View File

@@ -118,6 +118,7 @@ export const AssistantsPage: React.FC = () => {
const newAssistantPayload: Partial<Assistant> = {
name: 'New Assistant',
opener: '',
generatedOpenerEnabled: false,
prompt: '',
knowledgeBaseId: '',
language: 'zh',
@@ -126,6 +127,7 @@ export const AssistantsPage: React.FC = () => {
speed: 1,
hotwords: [],
tools: [],
botCannotBeInterrupted: false,
interruptionSensitivity: 500,
configMode: 'platform',
};
@@ -244,6 +246,7 @@ export const AssistantsPage: React.FC = () => {
const isExternalConfig = selectedAssistant?.configMode === 'dify' || selectedAssistant?.configMode === 'fastgpt';
const isNoneConfig = selectedAssistant?.configMode === 'none' || !selectedAssistant?.configMode;
const canAdjustInterruptionSensitivity = selectedAssistant?.botCannotBeInterrupted !== true;
return (
<div className="flex h-full min-h-0 gap-6 animate-in fade-in">
@@ -524,11 +527,30 @@ export const AssistantsPage: React.FC = () => {
value={selectedAssistant.opener}
onChange={(e) => updateAssistant('opener', e.target.value)}
placeholder="例如您好我是您的专属AI助手..."
className="bg-white/5 border-white/10 focus:border-primary/50"
disabled={selectedAssistant.generatedOpenerEnabled === true}
className="bg-white/5 border-white/10 focus:border-primary/50 disabled:opacity-50 disabled:cursor-not-allowed"
/>
<p className="text-xs text-muted-foreground"></p>
</div>
<div className="space-y-2">
<label className="text-sm font-medium text-white flex items-center">
<Sparkles className="w-4 h-4 mr-2 text-primary" /> Generated Opener
</label>
<label className="flex h-12 items-center justify-between rounded-xl border border-white/10 bg-white/5 px-4 text-sm">
<span className="text-foreground"></span>
<input
type="checkbox"
checked={selectedAssistant.generatedOpenerEnabled === true}
onChange={(e) => updateAssistant('generatedOpenerEnabled', e.target.checked)}
className="accent-primary"
/>
</label>
<p className="text-xs text-muted-foreground">
</p>
</div>
<div className="space-y-2">
<label className="text-sm font-medium text-white flex items-center">
<BotIcon className="w-4 h-4 mr-2 text-primary"/> (Prompt)
@@ -679,8 +701,23 @@ export const AssistantsPage: React.FC = () => {
</div>
<div className="space-y-4 pt-2">
<div className="flex justify-between items-center mb-1">
<div className="space-y-2">
<label className="text-sm font-medium text-white flex items-center">
<ArrowLeftRight className="w-4 h-4 mr-2 text-primary" /> Bot cannot be interrupted
</label>
<label className="flex h-12 items-center justify-between rounded-xl border border-white/10 bg-white/5 px-4 text-sm">
<span className="text-foreground"></span>
<input
type="checkbox"
checked={selectedAssistant.botCannotBeInterrupted === true}
onChange={(e) => updateAssistant('botCannotBeInterrupted', e.target.checked)}
className="accent-primary"
/>
</label>
</div>
<div className="flex justify-between items-center mb-1">
<label className={`text-sm font-medium flex items-center ${canAdjustInterruptionSensitivity ? 'text-white' : 'text-muted-foreground'}`}>
<Timer className="w-4 h-4 mr-2 text-primary"/> (Interruption Sensitivity)
</label>
<div className="flex items-center gap-2">
@@ -689,7 +726,8 @@ export const AssistantsPage: React.FC = () => {
type="number"
value={selectedAssistant.interruptionSensitivity || 500}
onChange={(e) => updateAssistant('interruptionSensitivity', parseInt(e.target.value) || 0)}
className="w-20 h-8 text-right pr-7 text-xs font-mono bg-black/40 border-white/5"
disabled={!canAdjustInterruptionSensitivity}
className="w-20 h-8 text-right pr-7 text-xs font-mono bg-black/40 border-white/5 disabled:opacity-40 disabled:cursor-not-allowed"
/>
<span className="absolute right-2 top-1/2 -translate-y-1/2 text-[10px] text-muted-foreground font-mono">ms</span>
</div>
@@ -703,16 +741,19 @@ export const AssistantsPage: React.FC = () => {
step="50"
value={selectedAssistant.interruptionSensitivity || 500}
onChange={(e) => updateAssistant('interruptionSensitivity', parseInt(e.target.value))}
className="flex-1 h-1.5 bg-secondary rounded-lg appearance-none cursor-pointer accent-primary"
disabled={!canAdjustInterruptionSensitivity}
className="flex-1 h-1.5 bg-secondary rounded-lg appearance-none cursor-pointer accent-primary disabled:opacity-40 disabled:cursor-not-allowed"
/>
</div>
<div className="flex justify-between text-[10px] text-muted-foreground font-mono uppercase tracking-widest px-0.5 opacity-50">
<div className={`flex justify-between text-[10px] font-mono uppercase tracking-widest px-0.5 ${canAdjustInterruptionSensitivity ? 'text-muted-foreground opacity-50' : 'text-muted-foreground/60 opacity-35'}`}>
<span>0ms (Extreme)</span>
<span>1000ms</span>
<span>2000ms (Lazy)</span>
</div>
<p className="text-xs text-muted-foreground pt-1 italic opacity-60">
* AI
<p className={`text-xs pt-1 italic ${canAdjustInterruptionSensitivity ? 'text-muted-foreground opacity-60' : 'text-muted-foreground/70 opacity-50'}`}>
{canAdjustInterruptionSensitivity
? '* 定义用户说话多长时间后 AI 应当停止当前的发言并响应。数值越小响应越快,但也更容易被噪音误导打断。'
: '* 当前已开启“机器人不可打断”VAD 打断灵敏度已禁用。'}
</p>
</div>
@@ -1760,6 +1801,11 @@ export const DebugDrawer: React.FC<{
},
systemPrompt: assistant.prompt || '',
greeting: assistant.opener || '',
generatedOpenerEnabled: assistant.generatedOpenerEnabled === true,
bargeIn: {
enabled: assistant.botCannotBeInterrupted !== true,
minDurationMs: assistant.interruptionSensitivity || 500,
},
knowledgeBaseId,
knowledge,
tools: selectedToolSchemas,

View File

@@ -30,6 +30,7 @@ const mapAssistant = (raw: AnyRecord): Assistant => ({
name: readField(raw, ['name'], ''),
callCount: Number(readField(raw, ['callCount', 'call_count'], 0)),
opener: readField(raw, ['opener'], ''),
generatedOpenerEnabled: Boolean(readField(raw, ['generatedOpenerEnabled', 'generated_opener_enabled'], false)),
prompt: readField(raw, ['prompt'], ''),
knowledgeBaseId: readField(raw, ['knowledgeBaseId', 'knowledge_base_id'], ''),
language: readField(raw, ['language'], 'zh') as 'zh' | 'en',
@@ -38,6 +39,7 @@ const mapAssistant = (raw: AnyRecord): Assistant => ({
speed: Number(readField(raw, ['speed'], 1)),
hotwords: readField(raw, ['hotwords'], []),
tools: readField(raw, ['tools'], []),
botCannotBeInterrupted: Boolean(readField(raw, ['botCannotBeInterrupted', 'bot_cannot_be_interrupted'], false)),
interruptionSensitivity: Number(readField(raw, ['interruptionSensitivity', 'interruption_sensitivity'], 500)),
configMode: readField(raw, ['configMode', 'config_mode'], 'platform') as 'platform' | 'dify' | 'fastgpt' | 'none',
apiUrl: readField(raw, ['apiUrl', 'api_url'], ''),
@@ -212,6 +214,7 @@ export const createAssistant = async (data: Partial<Assistant>): Promise<Assista
const payload = {
name: data.name || 'New Assistant',
opener: data.opener || '',
generatedOpenerEnabled: data.generatedOpenerEnabled ?? false,
prompt: data.prompt || '',
knowledgeBaseId: data.knowledgeBaseId || '',
language: data.language || 'zh',
@@ -220,6 +223,7 @@ export const createAssistant = async (data: Partial<Assistant>): Promise<Assista
speed: data.speed ?? 1,
hotwords: data.hotwords || [],
tools: data.tools || [],
botCannotBeInterrupted: data.botCannotBeInterrupted ?? false,
interruptionSensitivity: data.interruptionSensitivity ?? 500,
configMode: data.configMode || 'platform',
apiUrl: data.apiUrl || '',
@@ -237,6 +241,7 @@ export const updateAssistant = async (id: string, data: Partial<Assistant>): Pro
const payload = {
name: data.name,
opener: data.opener,
generatedOpenerEnabled: data.generatedOpenerEnabled,
prompt: data.prompt,
knowledgeBaseId: data.knowledgeBaseId,
language: data.language,
@@ -245,6 +250,7 @@ export const updateAssistant = async (id: string, data: Partial<Assistant>): Pro
speed: data.speed,
hotwords: data.hotwords,
tools: data.tools,
botCannotBeInterrupted: data.botCannotBeInterrupted,
interruptionSensitivity: data.interruptionSensitivity,
configMode: data.configMode,
apiUrl: data.apiUrl,

View File

@@ -4,6 +4,7 @@ export interface Assistant {
name: string;
callCount: number;
opener: string;
generatedOpenerEnabled?: boolean;
prompt: string;
knowledgeBaseId: string;
language: 'zh' | 'en';
@@ -12,6 +13,7 @@ export interface Assistant {
speed: number;
hotwords: string[];
tools?: string[]; // IDs of enabled tools
botCannotBeInterrupted?: boolean;
interruptionSensitivity?: number; // In ms
configMode?: 'platform' | 'dify' | 'fastgpt' | 'none';
apiUrl?: string;