api has llm response event
This commit is contained in:
@@ -426,6 +426,15 @@ class DuplexPipeline:
|
||||
sentence_buffer += text_chunk
|
||||
await self.conversation.update_assistant_text(text_chunk)
|
||||
|
||||
# Send LLM response streaming event to client
|
||||
await self.transport.send_event({
|
||||
"event": "llmResponse",
|
||||
"trackId": self.session_id,
|
||||
"text": text_chunk,
|
||||
"isFinal": False,
|
||||
"timestamp": self._get_timestamp_ms()
|
||||
})
|
||||
|
||||
# Check for sentence completion - synthesize immediately for low latency
|
||||
while any(end in sentence_buffer for end in sentence_ends):
|
||||
# Find first sentence end
|
||||
@@ -454,6 +463,16 @@ class DuplexPipeline:
|
||||
else:
|
||||
break
|
||||
|
||||
# Send final LLM response event
|
||||
if full_response and not self._interrupt_event.is_set():
|
||||
await self.transport.send_event({
|
||||
"event": "llmResponse",
|
||||
"trackId": self.session_id,
|
||||
"text": full_response,
|
||||
"isFinal": True,
|
||||
"timestamp": self._get_timestamp_ms()
|
||||
})
|
||||
|
||||
# Speak any remaining text
|
||||
if sentence_buffer.strip() and not self._interrupt_event.is_set():
|
||||
if not first_audio_sent:
|
||||
|
||||
@@ -4,10 +4,12 @@ Microphone client for testing duplex voice conversation.
|
||||
|
||||
This client captures audio from the microphone, sends it to the server,
|
||||
and plays back the AI's voice response through the speakers.
|
||||
It also displays the LLM's text responses in the console.
|
||||
|
||||
Usage:
|
||||
python examples/mic_client.py --url ws://localhost:8000/ws
|
||||
python examples/mic_client.py --url ws://localhost:8000/ws --chat "Hello!"
|
||||
python examples/mic_client.py --url ws://localhost:8000/ws --verbose
|
||||
|
||||
Requirements:
|
||||
pip install sounddevice soundfile websockets numpy
|
||||
@@ -102,6 +104,9 @@ class MicrophoneClient:
|
||||
self._discard_audio = False
|
||||
self._audio_sequence = 0 # Track audio sequence to detect stale chunks
|
||||
|
||||
# Verbose mode for streaming LLM responses
|
||||
self.verbose = False
|
||||
|
||||
async def connect(self) -> None:
|
||||
"""Connect to WebSocket server."""
|
||||
print(f"Connecting to {self.url}...")
|
||||
@@ -314,6 +319,17 @@ class MicrophoneClient:
|
||||
# Server-side TTFB event
|
||||
latency_ms = event.get("latencyMs", 0)
|
||||
print(f"← [TTFB] Server reported latency: {latency_ms}ms")
|
||||
elif event_type == "llmResponse":
|
||||
# LLM text response
|
||||
text = event.get("text", "")
|
||||
is_final = event.get("isFinal", False)
|
||||
if is_final:
|
||||
# Print final LLM response
|
||||
print(f"← AI: {text}")
|
||||
elif self.verbose:
|
||||
# Show streaming chunks only in verbose mode
|
||||
display_text = text[:60] + "..." if len(text) > 60 else text
|
||||
print(f" [streaming] {display_text}")
|
||||
elif event_type == "trackStart":
|
||||
print("← Bot started speaking")
|
||||
# IMPORTANT: Accept audio again after trackStart
|
||||
@@ -552,6 +568,11 @@ async def main():
|
||||
action="store_true",
|
||||
help="Disable interactive mode"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--verbose", "-v",
|
||||
action="store_true",
|
||||
help="Show streaming LLM response chunks"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
@@ -565,6 +586,7 @@ async def main():
|
||||
input_device=args.input_device,
|
||||
output_device=args.output_device
|
||||
)
|
||||
client.verbose = args.verbose
|
||||
|
||||
await client.run(
|
||||
chat_message=args.chat,
|
||||
|
||||
@@ -115,7 +115,13 @@ class WavFileClient:
|
||||
"direction": direction,
|
||||
"message": message
|
||||
})
|
||||
# Handle encoding errors on Windows
|
||||
try:
|
||||
print(f"{direction} {message}")
|
||||
except UnicodeEncodeError:
|
||||
# Replace problematic characters for console output
|
||||
safe_message = message.encode('ascii', errors='replace').decode('ascii')
|
||||
print(f"{direction} {safe_message}")
|
||||
|
||||
async def connect(self) -> None:
|
||||
"""Connect to WebSocket server."""
|
||||
@@ -285,6 +291,14 @@ class WavFileClient:
|
||||
elif event_type == "ttfb":
|
||||
latency_ms = event.get("latencyMs", 0)
|
||||
self.log_event("←", f"[TTFB] Server latency: {latency_ms}ms")
|
||||
elif event_type == "llmResponse":
|
||||
text = event.get("text", "")
|
||||
is_final = event.get("isFinal", False)
|
||||
if is_final:
|
||||
self.log_event("←", f"LLM Response (final): {text[:100]}{'...' if len(text) > 100 else ''}")
|
||||
elif self.verbose:
|
||||
# Show streaming chunks only in verbose mode
|
||||
self.log_event("←", f"LLM: {text}")
|
||||
elif event_type == "trackStart":
|
||||
self.track_started = True
|
||||
self.log_event("←", "Bot started speaking")
|
||||
|
||||
@@ -6,7 +6,7 @@ Creates a 16kHz mono WAV file with real speech segments separated by
|
||||
configurable silence (for VAD/testing).
|
||||
|
||||
Usage:
|
||||
python scripts/generate_test_audio.py [OPTIONS]
|
||||
python generate_test_audio.py [OPTIONS]
|
||||
|
||||
Options:
|
||||
-o, --output PATH Output WAV path (default: data/audio_examples/two_utterances_16k.wav)
|
||||
@@ -18,19 +18,18 @@ Options:
|
||||
|
||||
Examples:
|
||||
# Default utterances and output
|
||||
python scripts/generate_test_audio.py
|
||||
python generate_test_audio.py
|
||||
|
||||
# Custom output path
|
||||
python scripts/generate_test_audio.py -o out.wav
|
||||
python generate_test_audio.py -o out.wav
|
||||
|
||||
# Utterances from command line
|
||||
python scripts/generate_test_audio.py -u "Hello" -u "World" -o test.wav
|
||||
python generate_test_audio.py -u "Hello" -u "World" -o test.wav
|
||||
|
||||
# Utterances from JSON file
|
||||
python scripts/generate_test_audio.py -j utterances.json -o test.wav
|
||||
# Utterancgenerate_test_audio.py -j utterances.json -o test.wav
|
||||
|
||||
# Custom silence (1s between utterances)
|
||||
python scripts/generate_test_audio.py -u "One" -u "Two" --silence-ms 1000 -o test.wav
|
||||
python generate_test_audio.py -u "One" -u "Two" --silence-ms 1000 -o test.wav
|
||||
|
||||
Requires SILICONFLOW_API_KEY in .env.
|
||||
"""
|
||||
@@ -47,7 +46,7 @@ from dotenv import load_dotenv
|
||||
|
||||
|
||||
# Load .env file from project root
|
||||
project_root = Path(__file__).parent.parent
|
||||
project_root = Path(__file__).parent.parent.parent
|
||||
load_dotenv(project_root / ".env")
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user