Unify db api
This commit is contained in:
@@ -52,9 +52,21 @@ if not PYAUDIO_AVAILABLE and not SD_AVAILABLE:
|
||||
class SimpleVoiceClient:
|
||||
"""Simple voice client with reliable audio playback."""
|
||||
|
||||
def __init__(self, url: str, sample_rate: int = 16000):
|
||||
def __init__(
|
||||
self,
|
||||
url: str,
|
||||
sample_rate: int = 16000,
|
||||
app_id: str = "assistant_demo",
|
||||
channel: str = "simple_client",
|
||||
config_version_id: str = "local-dev",
|
||||
track_debug: bool = False,
|
||||
):
|
||||
self.url = url
|
||||
self.sample_rate = sample_rate
|
||||
self.app_id = app_id
|
||||
self.channel = channel
|
||||
self.config_version_id = config_version_id
|
||||
self.track_debug = track_debug
|
||||
self.ws = None
|
||||
self.running = False
|
||||
|
||||
@@ -75,6 +87,17 @@ class SimpleVoiceClient:
|
||||
|
||||
# Interrupt handling - discard audio until next trackStart
|
||||
self._discard_audio = False
|
||||
|
||||
@staticmethod
|
||||
def _event_ids_suffix(event: dict) -> str:
|
||||
data = event.get("data") if isinstance(event.get("data"), dict) else {}
|
||||
keys = ("turn_id", "utterance_id", "response_id", "tool_call_id", "tts_id")
|
||||
parts = []
|
||||
for key in keys:
|
||||
value = data.get(key, event.get(key))
|
||||
if value:
|
||||
parts.append(f"{key}={value}")
|
||||
return f" [{' '.join(parts)}]" if parts else ""
|
||||
|
||||
async def connect(self):
|
||||
"""Connect to server."""
|
||||
@@ -83,12 +106,25 @@ class SimpleVoiceClient:
|
||||
self.running = True
|
||||
print("Connected!")
|
||||
|
||||
# Send invite
|
||||
# WS v1 handshake: hello -> session.start
|
||||
await self.ws.send(json.dumps({
|
||||
"command": "invite",
|
||||
"option": {"codec": "pcm", "sampleRate": self.sample_rate}
|
||||
"type": "hello",
|
||||
"version": "v1",
|
||||
}))
|
||||
print("-> invite")
|
||||
await self.ws.send(json.dumps({
|
||||
"type": "session.start",
|
||||
"audio": {
|
||||
"encoding": "pcm_s16le",
|
||||
"sample_rate_hz": self.sample_rate,
|
||||
"channels": 1,
|
||||
},
|
||||
"metadata": {
|
||||
"appId": self.app_id,
|
||||
"channel": self.channel,
|
||||
"configVersionId": self.config_version_id,
|
||||
},
|
||||
}))
|
||||
print("-> hello/session.start")
|
||||
|
||||
async def send_chat(self, text: str):
|
||||
"""Send chat message."""
|
||||
@@ -96,8 +132,8 @@ class SimpleVoiceClient:
|
||||
self.request_start_time = time.time()
|
||||
self.first_audio_received = False
|
||||
|
||||
await self.ws.send(json.dumps({"command": "chat", "text": text}))
|
||||
print(f"-> chat: {text}")
|
||||
await self.ws.send(json.dumps({"type": "input.text", "text": text}))
|
||||
print(f"-> input.text: {text}")
|
||||
|
||||
def play_audio(self, audio_data: bytes):
|
||||
"""Play audio data immediately."""
|
||||
@@ -152,34 +188,39 @@ class SimpleVoiceClient:
|
||||
else:
|
||||
# JSON event
|
||||
event = json.loads(msg)
|
||||
etype = event.get("event", "?")
|
||||
etype = event.get("type", event.get("event", "?"))
|
||||
ids = self._event_ids_suffix(event)
|
||||
if self.track_debug:
|
||||
print(f"[track-debug] event={etype} trackId={event.get('trackId')}{ids}")
|
||||
|
||||
if etype == "transcript":
|
||||
if etype in {"transcript", "transcript.delta", "transcript.final"}:
|
||||
# User speech transcription
|
||||
text = event.get("text", "")
|
||||
is_final = event.get("isFinal", False)
|
||||
is_final = etype == "transcript.final" or bool(event.get("isFinal"))
|
||||
if is_final:
|
||||
print(f"<- You said: {text}")
|
||||
print(f"<- You said: {text}{ids}")
|
||||
else:
|
||||
print(f"<- [listening] {text}", end="\r")
|
||||
elif etype == "ttfb":
|
||||
elif etype in {"ttfb", "metrics.ttfb"}:
|
||||
# Server-side TTFB event
|
||||
latency_ms = event.get("latencyMs", 0)
|
||||
print(f"<- [TTFB] Server reported latency: {latency_ms}ms")
|
||||
elif etype == "trackStart":
|
||||
elif etype in {"trackStart", "output.audio.start"}:
|
||||
# New track starting - accept audio again
|
||||
self._discard_audio = False
|
||||
print(f"<- {etype}")
|
||||
elif etype == "interrupt":
|
||||
print(f"<- {etype}{ids}")
|
||||
elif etype in {"interrupt", "response.interrupted"}:
|
||||
# Interrupt - discard audio until next trackStart
|
||||
self._discard_audio = True
|
||||
print(f"<- {etype} (discarding audio until new track)")
|
||||
elif etype == "hangup":
|
||||
print(f"<- {etype}")
|
||||
print(f"<- {etype}{ids} (discarding audio until new track)")
|
||||
elif etype in {"hangup", "session.stopped"}:
|
||||
print(f"<- {etype}{ids}")
|
||||
self.running = False
|
||||
break
|
||||
elif etype == "config.resolved":
|
||||
print(f"<- config.resolved {event.get('config', {}).get('output', {})}{ids}")
|
||||
else:
|
||||
print(f"<- {etype}")
|
||||
print(f"<- {etype}{ids}")
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
continue
|
||||
@@ -270,6 +311,10 @@ async def main():
|
||||
parser.add_argument("--text", help="Send text and play response")
|
||||
parser.add_argument("--list-devices", action="store_true")
|
||||
parser.add_argument("--sample-rate", type=int, default=16000)
|
||||
parser.add_argument("--app-id", default="assistant_demo")
|
||||
parser.add_argument("--channel", default="simple_client")
|
||||
parser.add_argument("--config-version-id", default="local-dev")
|
||||
parser.add_argument("--track-debug", action="store_true")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
@@ -277,7 +322,14 @@ async def main():
|
||||
list_audio_devices()
|
||||
return
|
||||
|
||||
client = SimpleVoiceClient(args.url, args.sample_rate)
|
||||
client = SimpleVoiceClient(
|
||||
args.url,
|
||||
args.sample_rate,
|
||||
app_id=args.app_id,
|
||||
channel=args.channel,
|
||||
config_version_id=args.config_version_id,
|
||||
track_debug=args.track_debug,
|
||||
)
|
||||
await client.run(args.text)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user