xfyun asr update one bubble
This commit is contained in:
1
AGENTS.md
Normal file
1
AGENTS.md
Normal file
@@ -0,0 +1 @@
|
||||
Write readable, maintainable, extensible code for a voice agent endpoint with pipecat as engine
|
||||
@@ -78,6 +78,12 @@ class XfyunASRService(STTService):
|
||||
self._sent_final_frame = False
|
||||
self._partials: list[str] = []
|
||||
self._last_text = ""
|
||||
# Text already finalized by xfyun within the current VAD turn. xfyun
|
||||
# may emit several status=2 segments within one turn (e.g. when the
|
||||
# user pauses briefly); each segment resets `_partials`/`_last_text`,
|
||||
# but interim frames pushed to clients should still grow
|
||||
# monotonically across segments. Reset on VADUserStartedSpeakingFrame.
|
||||
self._turn_committed_text = ""
|
||||
|
||||
async def cleanup(self) -> None:
|
||||
await self._close_utterance()
|
||||
@@ -95,6 +101,7 @@ class XfyunASRService(STTService):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, VADUserStartedSpeakingFrame):
|
||||
self._turn_committed_text = ""
|
||||
await self._start_utterance()
|
||||
elif isinstance(frame, VADUserStoppedSpeakingFrame):
|
||||
await self._finish_utterance()
|
||||
@@ -261,7 +268,7 @@ class XfyunASRService(STTService):
|
||||
self._last_text = text
|
||||
await self.push_frame(
|
||||
InterimTranscriptionFrame(
|
||||
text,
|
||||
self._turn_committed_text + text,
|
||||
self._user_id,
|
||||
time_now_iso8601(),
|
||||
_language_or_none(self._language),
|
||||
@@ -273,6 +280,10 @@ class XfyunASRService(STTService):
|
||||
final_text = self._last_text
|
||||
if final_text:
|
||||
self.confirm_finalize()
|
||||
# Emit just this segment's text. The pipecat user aggregator
|
||||
# concatenates TranscriptionFrames within a VAD turn, so we
|
||||
# must NOT prepend `_turn_committed_text` here or the
|
||||
# aggregated turn text would double-count earlier segments.
|
||||
await self.push_frame(
|
||||
TranscriptionFrame(
|
||||
final_text,
|
||||
@@ -282,6 +293,9 @@ class XfyunASRService(STTService):
|
||||
result=payload,
|
||||
)
|
||||
)
|
||||
# Accumulate so the next sub-session's interim frames carry
|
||||
# the full turn so far (used for client UI display only).
|
||||
self._turn_committed_text += final_text
|
||||
await self._close_utterance()
|
||||
|
||||
def _apply_recognition_result(self, recognition: dict[str, Any]) -> str:
|
||||
|
||||
@@ -538,9 +538,14 @@ function handleUserTranscript(text) {
|
||||
body.textContent = text;
|
||||
state.currentUserBubble.classList.remove("bubble--interim");
|
||||
} else {
|
||||
addBubble("user", text);
|
||||
state.currentUserBubble = addBubble("user", text);
|
||||
}
|
||||
state.currentUserBubble = null;
|
||||
// Intentionally keep `state.currentUserBubble` set. Streaming ASRs (e.g.
|
||||
// xfyun) can emit multiple interim/final cycles within a single dialog
|
||||
// turn — for example when the user pauses mid-sentence or the upstream
|
||||
// service segments the utterance. Keeping the bubble open until the
|
||||
// assistant starts replying (see `handleAssistantStarted` /
|
||||
// `response.audio.started`) collapses those cycles into one bubble.
|
||||
}
|
||||
|
||||
function handleUserTranscriptInterim(text) {
|
||||
@@ -590,6 +595,11 @@ function handleAssistantDelta(text) {
|
||||
|
||||
function handleAssistantStarted() {
|
||||
state.currentAssistantBubble = null;
|
||||
// Close the in-flight user bubble so the next user turn starts a fresh
|
||||
// one. We do this here (and on `response.audio.started`) rather than on
|
||||
// every `input.transcript.final`, because streaming ASRs may emit
|
||||
// several finals within a single dialog turn.
|
||||
state.currentUserBubble = null;
|
||||
}
|
||||
|
||||
function handleAssistantFinal(text, interrupted) {
|
||||
@@ -633,6 +643,7 @@ function handleEvent(event) {
|
||||
break;
|
||||
case "response.audio.started":
|
||||
setBotIndicator(true);
|
||||
state.currentUserBubble = null;
|
||||
break;
|
||||
case "response.audio.stopped":
|
||||
finalizeAssistantBubble();
|
||||
|
||||
Reference in New Issue
Block a user