Improve tts stream
This commit is contained in:
@@ -634,7 +634,6 @@ class DuplexPipeline:
|
|||||||
break
|
break
|
||||||
|
|
||||||
await self.transport.send_audio(chunk.audio)
|
await self.transport.send_audio(chunk.audio)
|
||||||
await asyncio.sleep(0.005) # Small delay to prevent flooding
|
|
||||||
except asyncio.CancelledError:
|
except asyncio.CancelledError:
|
||||||
logger.debug("TTS sentence cancelled")
|
logger.debug("TTS sentence cancelled")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
@@ -1087,6 +1087,12 @@ export const DebugDrawer: React.FC<{
|
|||||||
const audioCtxRef = useRef<AudioContext | null>(null);
|
const audioCtxRef = useRef<AudioContext | null>(null);
|
||||||
const playbackTimeRef = useRef<number>(0);
|
const playbackTimeRef = useRef<number>(0);
|
||||||
const activeAudioSourcesRef = useRef<Set<AudioBufferSourceNode>>(new Set());
|
const activeAudioSourcesRef = useRef<Set<AudioBufferSourceNode>>(new Set());
|
||||||
|
const queuedAudioBuffersRef = useRef<AudioBuffer[]>([]);
|
||||||
|
const queuedAudioDurationRef = useRef<number>(0);
|
||||||
|
|
||||||
|
const PLAYBACK_INITIAL_BUFFER_SECONDS = 0.25;
|
||||||
|
const PLAYBACK_MAX_AHEAD_SECONDS = 0.8;
|
||||||
|
const PLAYBACK_SCHEDULE_LEAD_SECONDS = 0.04;
|
||||||
|
|
||||||
const [devices, setDevices] = useState<MediaDeviceInfo[]>([]);
|
const [devices, setDevices] = useState<MediaDeviceInfo[]>([]);
|
||||||
const [selectedCamera, setSelectedCamera] = useState<string>('');
|
const [selectedCamera, setSelectedCamera] = useState<string>('');
|
||||||
@@ -1169,8 +1175,10 @@ export const DebugDrawer: React.FC<{
|
|||||||
|
|
||||||
const clearPlaybackQueue = () => {
|
const clearPlaybackQueue = () => {
|
||||||
const ctx = audioCtxRef.current;
|
const ctx = audioCtxRef.current;
|
||||||
if (!ctx) return;
|
const now = ctx ? ctx.currentTime : 0;
|
||||||
playbackTimeRef.current = ctx.currentTime;
|
playbackTimeRef.current = now;
|
||||||
|
queuedAudioBuffersRef.current = [];
|
||||||
|
queuedAudioDurationRef.current = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
const stopPlaybackImmediately = () => {
|
const stopPlaybackImmediately = () => {
|
||||||
@@ -1186,6 +1194,49 @@ export const DebugDrawer: React.FC<{
|
|||||||
clearPlaybackQueue();
|
clearPlaybackQueue();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const scheduleQueuedPlayback = (ctx: AudioContext) => {
|
||||||
|
const queue = queuedAudioBuffersRef.current;
|
||||||
|
if (queue.length === 0) return;
|
||||||
|
|
||||||
|
const now = ctx.currentTime;
|
||||||
|
if (playbackTimeRef.current < now) {
|
||||||
|
playbackTimeRef.current = now;
|
||||||
|
}
|
||||||
|
|
||||||
|
const hasActivePlayback = activeAudioSourcesRef.current.size > 0;
|
||||||
|
const minBufferSeconds = hasActivePlayback
|
||||||
|
? 0
|
||||||
|
: PLAYBACK_INITIAL_BUFFER_SECONDS;
|
||||||
|
|
||||||
|
if (queuedAudioDurationRef.current < minBufferSeconds) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (queue.length > 0 && (playbackTimeRef.current - now) < PLAYBACK_MAX_AHEAD_SECONDS) {
|
||||||
|
const audioBuffer = queue.shift();
|
||||||
|
if (!audioBuffer) break;
|
||||||
|
queuedAudioDurationRef.current = Math.max(0, queuedAudioDurationRef.current - audioBuffer.duration);
|
||||||
|
|
||||||
|
const source = ctx.createBufferSource();
|
||||||
|
source.buffer = audioBuffer;
|
||||||
|
source.connect(ctx.destination);
|
||||||
|
activeAudioSourcesRef.current.add(source);
|
||||||
|
source.onended = () => {
|
||||||
|
activeAudioSourcesRef.current.delete(source);
|
||||||
|
try {
|
||||||
|
source.disconnect();
|
||||||
|
} catch {
|
||||||
|
// no-op
|
||||||
|
}
|
||||||
|
scheduleQueuedPlayback(ctx);
|
||||||
|
};
|
||||||
|
|
||||||
|
const startAt = Math.max(ctx.currentTime + PLAYBACK_SCHEDULE_LEAD_SECONDS, playbackTimeRef.current);
|
||||||
|
source.start(startAt);
|
||||||
|
playbackTimeRef.current = startAt + audioBuffer.duration;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
const playPcm16Chunk = async (pcmBuffer: ArrayBuffer) => {
|
const playPcm16Chunk = async (pcmBuffer: ArrayBuffer) => {
|
||||||
if (!textTtsEnabled) return;
|
if (!textTtsEnabled) return;
|
||||||
if (mode !== 'text') return;
|
if (mode !== 'text') return;
|
||||||
@@ -1201,23 +1252,9 @@ export const DebugDrawer: React.FC<{
|
|||||||
const sampleRate = 16000;
|
const sampleRate = 16000;
|
||||||
const audioBuffer = ctx.createBuffer(1, float32.length, sampleRate);
|
const audioBuffer = ctx.createBuffer(1, float32.length, sampleRate);
|
||||||
audioBuffer.copyToChannel(float32, 0);
|
audioBuffer.copyToChannel(float32, 0);
|
||||||
|
queuedAudioBuffersRef.current.push(audioBuffer);
|
||||||
const source = ctx.createBufferSource();
|
queuedAudioDurationRef.current += audioBuffer.duration;
|
||||||
source.buffer = audioBuffer;
|
scheduleQueuedPlayback(ctx);
|
||||||
source.connect(ctx.destination);
|
|
||||||
activeAudioSourcesRef.current.add(source);
|
|
||||||
source.onended = () => {
|
|
||||||
activeAudioSourcesRef.current.delete(source);
|
|
||||||
try {
|
|
||||||
source.disconnect();
|
|
||||||
} catch {
|
|
||||||
// no-op
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
const startAt = Math.max(ctx.currentTime + 0.02, playbackTimeRef.current);
|
|
||||||
source.start(startAt);
|
|
||||||
playbackTimeRef.current = startAt + audioBuffer.duration;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
|
|||||||
Reference in New Issue
Block a user