first version push to talk

This commit is contained in:
2025-12-16 15:11:55 +08:00
parent 9f05f067a6
commit 1774f550dd
3 changed files with 301 additions and 19 deletions

View File

@@ -31,6 +31,7 @@ from livekit.agents import (
cli,
get_job_context,
metrics,
RoomIO
)
from livekit.agents.llm import ImageContent, ToolError, function_tool
from typing import Any, List, Optional
@@ -953,6 +954,8 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_
# Increase the maximum number of function calls per turn to avoid hitting the limit
max_tool_steps=15,
)
room_io = RoomIO(session, room=ctx.room)
await room_io.start()
# log metrics as they are emitted, and total usage after session is over
usage_collector = metrics.UsageCollector()
@@ -1011,6 +1014,45 @@ async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None, vision_
room_output_options=RoomOutputOptions(transcription_enabled=True),
)
# disable input audio at the start
session.input.set_audio_enabled(False)
@ctx.room.local_participant.register_rpc_method("start_turn")
async def start_turn(data: rtc.RpcInvocationData):
try:
session.interrupt()
except RuntimeError as e:
logger.error(f"Failed to interrupt session: {e}")
# Raise RPC error so client can detect interrupt failure
# Use ERROR_INTERNAL (code 13) to indicate application error
raise rtc.RpcError(
code=13, # ERROR_INTERNAL
message="Application error in method handler"
)
session.clear_user_turn()
# listen to the caller if multi-user
room_io.set_participant(data.caller_identity)
session.input.set_audio_enabled(True)
@ctx.room.local_participant.register_rpc_method("end_turn")
async def end_turn(data: rtc.RpcInvocationData):
session.input.set_audio_enabled(False)
session.commit_user_turn(
# the timeout for the final transcript to be received after committing the user turn
# increase this value if the STT is slow to respond
transcript_timeout=10.0,
# the duration of the silence to be appended to the STT to make it generate the final transcript
stt_flush_duration=2.0,
)
@ctx.room.local_participant.register_rpc_method("cancel_turn")
async def cancel_turn(data: rtc.RpcInvocationData):
session.input.set_audio_enabled(False)
session.clear_user_turn()
logger.info("cancel turn")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--avatar-url", type=str, default=None, help="Avatar dispatcher URL")