From edcbc2cec7bcd8c0b766e8363d80c21566687e5b Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Thu, 12 Feb 2026 15:23:32 +0800 Subject: [PATCH] Add first turn option --- api/app/models.py | 1 + api/app/routers/assistants.py | 4 +++ api/app/schemas.py | 2 ++ api/init_db.py | 4 +++ api/tests/test_assistants.py | 4 +++ engine/core/duplex_pipeline.py | 42 ++++++++++++++---------- web/pages/Assistants.tsx | 60 ++++++++++++++++++++++++++++++---- web/services/backendApi.ts | 3 ++ web/types.ts | 1 + 9 files changed, 97 insertions(+), 24 deletions(-) diff --git a/api/app/models.py b/api/app/models.py index d1669f7..7b6c03f 100644 --- a/api/app/models.py +++ b/api/app/models.py @@ -112,6 +112,7 @@ class Assistant(Base): user_id: Mapped[int] = mapped_column(Integer, ForeignKey("users.id"), index=True) name: Mapped[str] = mapped_column(String(255), nullable=False) call_count: Mapped[int] = mapped_column(Integer, default=0) + first_turn_mode: Mapped[str] = mapped_column(String(32), default="bot_first") opener: Mapped[str] = mapped_column(Text, default="") generated_opener_enabled: Mapped[bool] = mapped_column(default=False) prompt: Mapped[str] = mapped_column(Text, default="") diff --git a/api/app/routers/assistants.py b/api/app/routers/assistants.py index 6a8d02e..468cfb9 100644 --- a/api/app/routers/assistants.py +++ b/api/app/routers/assistants.py @@ -20,6 +20,7 @@ def _is_siliconflow_vendor(vendor: Optional[str]) -> bool: def _resolve_runtime_metadata(db: Session, assistant: Assistant) -> dict: metadata = { "systemPrompt": assistant.prompt or "", + "firstTurnMode": assistant.first_turn_mode or "bot_first", "greeting": assistant.opener or "", "generatedOpenerEnabled": bool(assistant.generated_opener_enabled), "output": {"mode": "audio" if assistant.voice_output_enabled else "text"}, @@ -104,6 +105,7 @@ def assistant_to_dict(assistant: Assistant) -> dict: "id": assistant.id, "name": assistant.name, "callCount": assistant.call_count, + "firstTurnMode": assistant.first_turn_mode or "bot_first", "opener": assistant.opener or "", "generatedOpenerEnabled": bool(assistant.generated_opener_enabled), "prompt": assistant.prompt or "", @@ -131,6 +133,7 @@ def assistant_to_dict(assistant: Assistant) -> dict: def _apply_assistant_update(assistant: Assistant, update_data: dict) -> None: field_map = { "knowledgeBaseId": "knowledge_base_id", + "firstTurnMode": "first_turn_mode", "interruptionSensitivity": "interruption_sensitivity", "botCannotBeInterrupted": "bot_cannot_be_interrupted", "configMode": "config_mode", @@ -192,6 +195,7 @@ def create_assistant(data: AssistantCreate, db: Session = Depends(get_db)): id=str(uuid.uuid4())[:8], user_id=1, # 默认用户,后续添加认证 name=data.name, + first_turn_mode=data.firstTurnMode, opener=data.opener, generated_opener_enabled=data.generatedOpenerEnabled, prompt=data.prompt, diff --git a/api/app/schemas.py b/api/app/schemas.py index a974089..a2a486e 100644 --- a/api/app/schemas.py +++ b/api/app/schemas.py @@ -272,6 +272,7 @@ class ToolResourceOut(ToolResourceBase): # ============ Assistant ============ class AssistantBase(BaseModel): name: str + firstTurnMode: str = "bot_first" opener: str = "" generatedOpenerEnabled: bool = False prompt: str = "" @@ -300,6 +301,7 @@ class AssistantCreate(AssistantBase): class AssistantUpdate(BaseModel): name: Optional[str] = None + firstTurnMode: Optional[str] = None opener: Optional[str] = None generatedOpenerEnabled: Optional[bool] = None prompt: Optional[str] = None diff --git a/api/init_db.py b/api/init_db.py index fe8d342..8d8c137 100644 --- a/api/init_db.py +++ b/api/init_db.py @@ -38,6 +38,10 @@ def migrate_db_schema(): alter_statements.append( "ALTER TABLE assistants ADD COLUMN generated_opener_enabled BOOLEAN DEFAULT 0" ) + if "first_turn_mode" not in columns: + alter_statements.append( + "ALTER TABLE assistants ADD COLUMN first_turn_mode VARCHAR(32) DEFAULT 'bot_first'" + ) if "bot_cannot_be_interrupted" not in columns: alter_statements.append( "ALTER TABLE assistants ADD COLUMN bot_cannot_be_interrupted BOOLEAN DEFAULT 0" diff --git a/api/tests/test_assistants.py b/api/tests/test_assistants.py index 7bec31d..eaab617 100644 --- a/api/tests/test_assistants.py +++ b/api/tests/test_assistants.py @@ -24,6 +24,7 @@ class TestAssistantAPI: assert data["prompt"] == sample_assistant_data["prompt"] assert data["language"] == sample_assistant_data["language"] assert data["voiceOutputEnabled"] is True + assert data["firstTurnMode"] == "bot_first" assert data["generatedOpenerEnabled"] is False assert data["botCannotBeInterrupted"] is False assert "id" in data @@ -230,6 +231,7 @@ class TestAssistantAPI: def test_assistant_interrupt_and_generated_opener_flags(self, client, sample_assistant_data): sample_assistant_data.update({ + "firstTurnMode": "user_first", "generatedOpenerEnabled": True, "botCannotBeInterrupted": True, "interruptionSensitivity": 900, @@ -241,6 +243,7 @@ class TestAssistantAPI: get_resp = client.get(f"/api/assistants/{assistant_id}") assert get_resp.status_code == 200 payload = get_resp.json() + assert payload["firstTurnMode"] == "user_first" assert payload["generatedOpenerEnabled"] is True assert payload["botCannotBeInterrupted"] is True assert payload["interruptionSensitivity"] == 900 @@ -248,6 +251,7 @@ class TestAssistantAPI: runtime_resp = client.get(f"/api/assistants/{assistant_id}/runtime-config") assert runtime_resp.status_code == 200 metadata = runtime_resp.json()["sessionStartMetadata"] + assert metadata["firstTurnMode"] == "user_first" assert metadata["generatedOpenerEnabled"] is True assert metadata["bargeIn"]["enabled"] is False assert metadata["bargeIn"]["minDurationMs"] == 900 diff --git a/engine/core/duplex_pipeline.py b/engine/core/duplex_pipeline.py index a92a3f9..b836436 100644 --- a/engine/core/duplex_pipeline.py +++ b/engine/core/duplex_pipeline.py @@ -267,6 +267,7 @@ class DuplexPipeline: self._runtime_tts: Dict[str, Any] = {} self._runtime_output: Dict[str, Any] = {} self._runtime_system_prompt: Optional[str] = None + self._runtime_first_turn_mode: str = "bot_first" self._runtime_greeting: Optional[str] = None self._runtime_generated_opener_enabled: Optional[bool] = None self._runtime_barge_in_enabled: Optional[bool] = None @@ -303,6 +304,9 @@ class DuplexPipeline: self._runtime_system_prompt = str(metadata.get("systemPrompt") or "") if self._runtime_system_prompt: self.conversation.system_prompt = self._runtime_system_prompt + if "firstTurnMode" in metadata: + raw_mode = str(metadata.get("firstTurnMode") or "").strip().lower() + self._runtime_first_turn_mode = "user_first" if raw_mode == "user_first" else "bot_first" if "greeting" in metadata: greeting_payload = metadata.get("greeting") if isinstance(greeting_payload, dict): @@ -393,6 +397,9 @@ class DuplexPipeline: def _generated_opener_enabled(self) -> bool: return self._runtime_generated_opener_enabled is True + def _bot_starts_first(self) -> bool: + return self._runtime_first_turn_mode != "user_first" + def _barge_in_enabled(self) -> bool: if self._runtime_barge_in_enabled is not None: return self._runtime_barge_in_enabled @@ -540,23 +547,24 @@ class DuplexPipeline: # Resolve greeting once per session start. # Always emit text opener event so text-only sessions can display it. - greeting_to_speak = self.conversation.greeting - if self._generated_opener_enabled(): - generated_greeting = await self._generate_runtime_greeting() - if generated_greeting: - greeting_to_speak = generated_greeting - self.conversation.greeting = generated_greeting - if greeting_to_speak: - await self._send_event( - ev( - "assistant.response.final", - text=greeting_to_speak, - trackId=self.session_id, - ), - priority=20, - ) - if tts_output_enabled: - await self._speak(greeting_to_speak) + if self._bot_starts_first(): + greeting_to_speak = self.conversation.greeting + if self._generated_opener_enabled(): + generated_greeting = await self._generate_runtime_greeting() + if generated_greeting: + greeting_to_speak = generated_greeting + self.conversation.greeting = generated_greeting + if greeting_to_speak: + await self._send_event( + ev( + "assistant.response.final", + text=greeting_to_speak, + trackId=self.session_id, + ), + priority=20, + ) + if tts_output_enabled: + await self._speak(greeting_to_speak) except Exception as e: logger.error(f"Failed to start pipeline: {e}") diff --git a/web/pages/Assistants.tsx b/web/pages/Assistants.tsx index 157fcb7..fa0a4b6 100644 --- a/web/pages/Assistants.tsx +++ b/web/pages/Assistants.tsx @@ -117,6 +117,7 @@ export const AssistantsPage: React.FC = () => { const handleCreate = async () => { const newAssistantPayload: Partial = { name: 'New Assistant', + firstTurnMode: 'bot_first', opener: '', generatedOpenerEnabled: false, prompt: '', @@ -247,6 +248,7 @@ export const AssistantsPage: React.FC = () => { const isExternalConfig = selectedAssistant?.configMode === 'dify' || selectedAssistant?.configMode === 'fastgpt'; const isNoneConfig = selectedAssistant?.configMode === 'none' || !selectedAssistant?.configMode; const canAdjustInterruptionSensitivity = selectedAssistant?.botCannotBeInterrupted !== true; + const isBotFirstTurn = selectedAssistant?.firstTurnMode !== 'user_first'; return (
@@ -522,28 +524,63 @@ export const AssistantsPage: React.FC = () => {
+
+ + +
+
+

决定通话接通后由谁先开始第一句对话。

+
+ +
+
+
@@ -552,14 +589,22 @@ export const AssistantsPage: React.FC = () => { updateAssistant('opener', e.target.value)} - placeholder={selectedAssistant.generatedOpenerEnabled === true ? '将基于提示词自动生成开场白' : '例如:您好,我是您的专属AI助手...'} - disabled={selectedAssistant.generatedOpenerEnabled === true} + placeholder={ + !isBotFirstTurn + ? '当前为用户先说,开场白不会在首轮触发' + : selectedAssistant.generatedOpenerEnabled === true + ? '将基于提示词自动生成开场白' + : '例如:您好,我是您的专属AI助手...' + } + disabled={!isBotFirstTurn || selectedAssistant.generatedOpenerEnabled === true} className="bg-white/5 border-white/10 focus:border-primary/50 disabled:opacity-50 disabled:cursor-not-allowed" />

- {selectedAssistant.generatedOpenerEnabled === true - ? '通话接通后将根据提示词自动生成开场白。' - : '接通通话后的第一句话。'} + {!isBotFirstTurn + ? '已切换为“用户先说”,首轮不会发送开场白。' + : selectedAssistant.generatedOpenerEnabled === true + ? '通话接通后将根据提示词自动生成开场白。' + : '接通通话后的第一句话。'}

@@ -1836,6 +1881,7 @@ export const DebugDrawer: React.FC<{ mode: ttsEnabled ? 'audio' : 'text', }, systemPrompt: assistant.prompt || '', + firstTurnMode: assistant.firstTurnMode || 'bot_first', greeting: assistant.opener || '', generatedOpenerEnabled: assistant.generatedOpenerEnabled === true, bargeIn: { diff --git a/web/services/backendApi.ts b/web/services/backendApi.ts index 8028473..bea6c58 100644 --- a/web/services/backendApi.ts +++ b/web/services/backendApi.ts @@ -29,6 +29,7 @@ const mapAssistant = (raw: AnyRecord): Assistant => ({ id: String(readField(raw, ['id'], '')), name: readField(raw, ['name'], ''), callCount: Number(readField(raw, ['callCount', 'call_count'], 0)), + firstTurnMode: readField(raw, ['firstTurnMode', 'first_turn_mode'], 'bot_first') as 'bot_first' | 'user_first', opener: readField(raw, ['opener'], ''), generatedOpenerEnabled: Boolean(readField(raw, ['generatedOpenerEnabled', 'generated_opener_enabled'], false)), prompt: readField(raw, ['prompt'], ''), @@ -213,6 +214,7 @@ export const fetchAssistants = async (): Promise => { export const createAssistant = async (data: Partial): Promise => { const payload = { name: data.name || 'New Assistant', + firstTurnMode: data.firstTurnMode || 'bot_first', opener: data.opener || '', generatedOpenerEnabled: data.generatedOpenerEnabled ?? false, prompt: data.prompt || '', @@ -240,6 +242,7 @@ export const createAssistant = async (data: Partial): Promise): Promise => { const payload = { name: data.name, + firstTurnMode: data.firstTurnMode, opener: data.opener, generatedOpenerEnabled: data.generatedOpenerEnabled, prompt: data.prompt, diff --git a/web/types.ts b/web/types.ts index 34cf2df..069a829 100644 --- a/web/types.ts +++ b/web/types.ts @@ -3,6 +3,7 @@ export interface Assistant { id: string; name: string; callCount: number; + firstTurnMode?: 'bot_first' | 'user_first'; opener: string; generatedOpenerEnabled?: boolean; prompt: string;