Improve tts stream

This commit is contained in:
Xin Wang
2026-02-09 13:39:55 +08:00
parent 7df11dd846
commit 5349ed88e7
2 changed files with 56 additions and 20 deletions

View File

@@ -634,7 +634,6 @@ class DuplexPipeline:
break break
await self.transport.send_audio(chunk.audio) await self.transport.send_audio(chunk.audio)
await asyncio.sleep(0.005) # Small delay to prevent flooding
except asyncio.CancelledError: except asyncio.CancelledError:
logger.debug("TTS sentence cancelled") logger.debug("TTS sentence cancelled")
except Exception as e: except Exception as e:

View File

@@ -1087,6 +1087,12 @@ export const DebugDrawer: React.FC<{
const audioCtxRef = useRef<AudioContext | null>(null); const audioCtxRef = useRef<AudioContext | null>(null);
const playbackTimeRef = useRef<number>(0); const playbackTimeRef = useRef<number>(0);
const activeAudioSourcesRef = useRef<Set<AudioBufferSourceNode>>(new Set()); const activeAudioSourcesRef = useRef<Set<AudioBufferSourceNode>>(new Set());
const queuedAudioBuffersRef = useRef<AudioBuffer[]>([]);
const queuedAudioDurationRef = useRef<number>(0);
const PLAYBACK_INITIAL_BUFFER_SECONDS = 0.25;
const PLAYBACK_MAX_AHEAD_SECONDS = 0.8;
const PLAYBACK_SCHEDULE_LEAD_SECONDS = 0.04;
const [devices, setDevices] = useState<MediaDeviceInfo[]>([]); const [devices, setDevices] = useState<MediaDeviceInfo[]>([]);
const [selectedCamera, setSelectedCamera] = useState<string>(''); const [selectedCamera, setSelectedCamera] = useState<string>('');
@@ -1169,8 +1175,10 @@ export const DebugDrawer: React.FC<{
const clearPlaybackQueue = () => { const clearPlaybackQueue = () => {
const ctx = audioCtxRef.current; const ctx = audioCtxRef.current;
if (!ctx) return; const now = ctx ? ctx.currentTime : 0;
playbackTimeRef.current = ctx.currentTime; playbackTimeRef.current = now;
queuedAudioBuffersRef.current = [];
queuedAudioDurationRef.current = 0;
}; };
const stopPlaybackImmediately = () => { const stopPlaybackImmediately = () => {
@@ -1186,6 +1194,49 @@ export const DebugDrawer: React.FC<{
clearPlaybackQueue(); clearPlaybackQueue();
}; };
const scheduleQueuedPlayback = (ctx: AudioContext) => {
const queue = queuedAudioBuffersRef.current;
if (queue.length === 0) return;
const now = ctx.currentTime;
if (playbackTimeRef.current < now) {
playbackTimeRef.current = now;
}
const hasActivePlayback = activeAudioSourcesRef.current.size > 0;
const minBufferSeconds = hasActivePlayback
? 0
: PLAYBACK_INITIAL_BUFFER_SECONDS;
if (queuedAudioDurationRef.current < minBufferSeconds) {
return;
}
while (queue.length > 0 && (playbackTimeRef.current - now) < PLAYBACK_MAX_AHEAD_SECONDS) {
const audioBuffer = queue.shift();
if (!audioBuffer) break;
queuedAudioDurationRef.current = Math.max(0, queuedAudioDurationRef.current - audioBuffer.duration);
const source = ctx.createBufferSource();
source.buffer = audioBuffer;
source.connect(ctx.destination);
activeAudioSourcesRef.current.add(source);
source.onended = () => {
activeAudioSourcesRef.current.delete(source);
try {
source.disconnect();
} catch {
// no-op
}
scheduleQueuedPlayback(ctx);
};
const startAt = Math.max(ctx.currentTime + PLAYBACK_SCHEDULE_LEAD_SECONDS, playbackTimeRef.current);
source.start(startAt);
playbackTimeRef.current = startAt + audioBuffer.duration;
}
};
const playPcm16Chunk = async (pcmBuffer: ArrayBuffer) => { const playPcm16Chunk = async (pcmBuffer: ArrayBuffer) => {
if (!textTtsEnabled) return; if (!textTtsEnabled) return;
if (mode !== 'text') return; if (mode !== 'text') return;
@@ -1201,23 +1252,9 @@ export const DebugDrawer: React.FC<{
const sampleRate = 16000; const sampleRate = 16000;
const audioBuffer = ctx.createBuffer(1, float32.length, sampleRate); const audioBuffer = ctx.createBuffer(1, float32.length, sampleRate);
audioBuffer.copyToChannel(float32, 0); audioBuffer.copyToChannel(float32, 0);
queuedAudioBuffersRef.current.push(audioBuffer);
const source = ctx.createBufferSource(); queuedAudioDurationRef.current += audioBuffer.duration;
source.buffer = audioBuffer; scheduleQueuedPlayback(ctx);
source.connect(ctx.destination);
activeAudioSourcesRef.current.add(source);
source.onended = () => {
activeAudioSourcesRef.current.delete(source);
try {
source.disconnect();
} catch {
// no-op
}
};
const startAt = Math.max(ctx.currentTime + 0.02, playbackTimeRef.current);
source.start(startAt);
playbackTimeRef.current = startAt + audioBuffer.duration;
}; };
useEffect(() => { useEffect(() => {