diff --git a/.claude/launch.json b/.claude/launch.json
index dd66d24..d74c463 100644
--- a/.claude/launch.json
+++ b/.claude/launch.json
@@ -8,6 +8,13 @@
"cwd": "frontend",
"port": 3001,
"autoPort": false
+ },
+ {
+ "name": "ui-docker",
+ "runtimeExecutable": "docker",
+ "runtimeArgs": ["compose", "up", "ui"],
+ "port": 3030,
+ "autoPort": false
}
]
}
diff --git a/backend/services/pipecat/pipeline.py b/backend/services/pipecat/pipeline.py
index d3db21b..d63a892 100644
--- a/backend/services/pipecat/pipeline.py
+++ b/backend/services/pipecat/pipeline.py
@@ -10,11 +10,19 @@ from loguru import logger
from models import AssistantConfig
from services.pipecat.service_factory import create_services
-from pipecat.frames.frames import EndFrame, TTSSpeakFrame
+from pipecat.frames.frames import (
+ EndFrame,
+ InterruptionTaskFrame,
+ TranscriptionFrame,
+ TransportMessageUrgentFrame,
+ TTSSpeakFrame,
+)
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
+from pipecat.processors.transcript_processor import TranscriptProcessor
+from pipecat.utils.time import time_now_iso8601
async def run_pipeline(transport, cfg: AssistantConfig) -> None:
@@ -32,14 +40,20 @@ async def run_pipeline(transport, cfg: AssistantConfig) -> None:
context = OpenAILLMContext(messages=[{"role": "system", "content": cfg.prompt}])
context_aggregator = llm.create_context_aggregator(context)
+ # 转写收集:user 侧收 ASR 最终转写,assistant 侧聚合 TTS 实际播报的文本,
+ # 统一通过 data channel 推给前端聊天记录面板。
+ transcript = TranscriptProcessor()
+
pipeline = Pipeline(
[
transport.input(),
stt,
+ transcript.user(),
context_aggregator.user(),
llm,
tts,
transport.output(),
+ transcript.assistant(),
context_aggregator.assistant(),
]
)
@@ -52,6 +66,39 @@ async def run_pipeline(transport, cfg: AssistantConfig) -> None:
),
)
+ @transcript.event_handler("on_transcript_update")
+ async def on_transcript_update(_processor, frame):
+ # 每条最终转写(用户/助手)推给前端,前端据此渲染聊天记录
+ for msg in frame.messages:
+ await task.queue_frame(
+ TransportMessageUrgentFrame(
+ message={
+ "type": "transcript",
+ "role": msg.role,
+ "content": msg.content,
+ "timestamp": msg.timestamp,
+ }
+ )
+ )
+
+ @transport.event_handler("on_app_message")
+ async def on_app_message(_transport, message, _sender):
+ # 前端文字输入:先打断当前播报,再当作一条用户最终转写注入,
+ # 走与语音完全相同的 转写→上下文→LLM→TTS 链路
+ if not isinstance(message, dict) or message.get("type") != "user-text":
+ return
+ text = str(message.get("text") or "").strip()
+ if not text:
+ return
+ await task.queue_frames(
+ [
+ InterruptionTaskFrame(),
+ TranscriptionFrame(
+ text=text, user_id="debug", timestamp=time_now_iso8601()
+ ),
+ ]
+ )
+
@transport.event_handler("on_client_connected")
async def on_client_connected(_transport, _client):
if cfg.greeting:
diff --git a/frontend/src/components/pages/AssistantPage.tsx b/frontend/src/components/pages/AssistantPage.tsx
index 769cbfb..c5ab86f 100644
--- a/frontend/src/components/pages/AssistantPage.tsx
+++ b/frontend/src/components/pages/AssistantPage.tsx
@@ -78,7 +78,7 @@ import {
type Credential,
type KnowledgeBase,
} from "@/lib/api";
-import { useVoicePreview } from "@/hooks/use-voice-preview";
+import { useVoicePreview, type ChatMessage } from "@/hooks/use-voice-preview";
type RuntimeMode = "pipeline" | "realtime";
@@ -1856,19 +1856,28 @@ function DebugVoicePanel({
error,
micWarning,
localStream,
+ messages,
+ sendText,
connect,
disconnect,
audioRef,
} = useVoicePreview(assistantId);
// 连接中或已连通都视作"会话进行中"
const recording = status === "connecting" || status === "connected";
+ const [textDraft, setTextDraft] = useState("");
+
+ function handleSendText() {
+ if (sendText(textDraft)) {
+ setTextDraft("");
+ }
+ }
return (
{/* 后端 TTS 音频经 WebRTC 媒体流过来,挂这里播放 */}
{showTranscript ? (
-
+
) : (
@@ -1978,31 +2006,79 @@ function DebugVoicePanel({
);
}
-function DebugTranscriptPanel() {
+// ISO 时间戳 → HH:MM(本地时区),解析失败返回空串
+function formatMessageTime(iso: string): string {
+ const d = new Date(iso);
+ if (Number.isNaN(d.getTime())) return "";
+ const pad = (n: number) => String(n).padStart(2, "0");
+ return `${pad(d.getHours())}:${pad(d.getMinutes())}`;
+}
+
+function DebugTranscriptPanel({
+ messages,
+ recording,
+}: {
+ messages: ChatMessage[];
+ recording: boolean;
+}) {
+ const scrollRef = useRef
(null);
+
+ // 新消息时滚到底部
+ useEffect(() => {
+ const el = scrollRef.current;
+ if (el) el.scrollTop = el.scrollHeight;
+ }, [messages]);
+
+ if (messages.length === 0) {
+ return (
+
+
+
+ {recording ? "暂无聊天记录" : "尚未开始对话"}
+
+
+ {recording
+ ? "开口说话或在下方输入文字,对话内容会实时显示在这里。"
+ : "点击「开始对话」后,语音与文字消息会实时显示在这里。"}
+
+
+ );
+ }
+
return (
-
+
-
-
助手 · 10:24
-
- 您好,我是 AI 视频助手,请问有什么可以帮您?
-
-
-
-
-
我 · 10:25
-
- 我想了解一下社保卡的办理流程。
-
-
-
-
-
助手 · 10:25
-
- 社保卡可通过线上或线下渠道办理。线上可在政务服务 App
- 提交申请,线下可前往社保经办网点。
-
-
+ {messages.map((message) => {
+ const time = formatMessageTime(message.timestamp);
+ return message.role === "assistant" ? (
+
+
+ 助手{time ? ` · ${time}` : ""}
+
+
+ {message.content}
+
+
+ ) : (
+
+
+ 我{time ? ` · ${time}` : ""}
+
+
+ {message.content}
+
+
+ );
+ })}
);
diff --git a/frontend/src/hooks/use-voice-preview.ts b/frontend/src/hooks/use-voice-preview.ts
index bd5f4e6..b573426 100644
--- a/frontend/src/hooks/use-voice-preview.ts
+++ b/frontend/src/hooks/use-voice-preview.ts
@@ -9,6 +9,10 @@
* client → {type:"ice-candidate", payload:{pc_id, candidate:{...}}}
* 音频本身走 WebRTC 媒体流(Opus),不经 ws;后端 TTS 帧从 ontrack 拿到直接播放。
*
+ * 另开一条 data channel 与后端管线(pipeline.py)互通应用消息:
+ * client → {type:"user-text", text} 文字输入(打断并触发新回复)
+ * server → {type:"transcript", role, content, timestamp} 用户/助手最终转写(聊天记录)
+ *
* 纯本机(localhost)即可跑:localhost 是 secure context,麦克风可用,ws 用明文。
* 局域网/别的设备要 https+wss,见 deploy/README.md。
*/
@@ -19,6 +23,14 @@ import { API_BASE } from "@/lib/api";
export type VoicePreviewStatus = "idle" | "connecting" | "connected" | "failed";
+export type ChatMessage = {
+ id: string;
+ role: "user" | "assistant";
+ content: string;
+ /** 后端给的 ISO 时间戳 */
+ timestamp: string;
+};
+
// http→ws、https→wss,自动跟随 API 基址(同源反代时也对)
function wsBaseUrl(): string {
const url = new URL(API_BASE, window.location.origin);
@@ -62,11 +74,14 @@ export function useVoicePreview(assistantId: string | null) {
const [error, setError] = useState
(null);
const [micWarning, setMicWarning] = useState(null);
const [localStream, setLocalStream] = useState(null);
+ const [messages, setMessages] = useState([]);
const audioRef = useRef(null);
const pcRef = useRef(null);
const wsRef = useRef(null);
+ const dataChannelRef = useRef(null);
const localStreamRef = useRef(null);
const startingRef = useRef(false);
+ const messageSeqRef = useRef(0);
const releaseResources = useCallback(() => {
const ws = wsRef.current;
@@ -78,6 +93,13 @@ export function useVoicePreview(assistantId: string | null) {
ws.close();
}
+ const channel = dataChannelRef.current;
+ dataChannelRef.current = null;
+ if (channel) {
+ channel.onmessage = null;
+ channel.close();
+ }
+
const pc = pcRef.current;
pcRef.current = null;
if (pc) {
@@ -122,6 +144,7 @@ export function useVoicePreview(assistantId: string | null) {
startingRef.current = true;
setError(null);
setMicWarning(null);
+ setMessages([]); // 新会话清空上一轮聊天记录
setStatus("connecting");
// 麦克风是可选的:获取失败时继续建立仅接收后端音频的 WebRTC 会话。
@@ -213,6 +236,36 @@ export function useVoicePreview(assistantId: string | null) {
);
};
+ // 应用消息通道:收后端转写(聊天记录),发文字输入。
+ // 由浏览器侧主动创建,后端 SmallWebRTCConnection 的 on("datachannel") 会接住。
+ const channel = pc.createDataChannel("chat");
+ dataChannelRef.current = channel;
+ channel.onmessage = (event) => {
+ try {
+ const msg = JSON.parse(event.data);
+ if (
+ msg?.type === "transcript" &&
+ (msg.role === "user" || msg.role === "assistant") &&
+ typeof msg.content === "string" &&
+ msg.content.trim()
+ ) {
+ messageSeqRef.current += 1;
+ const next: ChatMessage = {
+ id: `msg-${messageSeqRef.current}`,
+ role: msg.role,
+ content: msg.content,
+ timestamp:
+ typeof msg.timestamp === "string"
+ ? msg.timestamp
+ : new Date().toISOString(),
+ };
+ setMessages((prev) => [...prev, next]);
+ }
+ } catch {
+ /* 非 JSON / 未知消息,忽略 */
+ }
+ };
+
pc.ontrack = (e) => {
if (e.track.kind === "audio" && audioRef.current) {
audioRef.current.srcObject =
@@ -268,6 +321,16 @@ export function useVoicePreview(assistantId: string | null) {
}
}, [assistantId, fail]);
+ // 发送文字消息:后端先打断当前播报,再按用户输入触发新回复。
+ // 成功返回 true;通道未就绪(未开始对话/连接中)返回 false。
+ const sendText = useCallback((text: string): boolean => {
+ const trimmed = text.trim();
+ const channel = dataChannelRef.current;
+ if (!trimmed || !channel || channel.readyState !== "open") return false;
+ channel.send(JSON.stringify({ type: "user-text", text: trimmed }));
+ return true;
+ }, []);
+
// 卸载时收尾
useEffect(() => releaseResources, [releaseResources]);
@@ -276,6 +339,8 @@ export function useVoicePreview(assistantId: string | null) {
error,
micWarning,
localStream,
+ messages,
+ sendText,
connect,
disconnect,
audioRef,