commit backend agent
This commit is contained in:
22
agents/.env.example
Normal file
22
agents/.env.example
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
LIVEKIT_API_SECRET="secret"
|
||||||
|
LIVEKIT_API_KEY="devkey"
|
||||||
|
LIVEKIT_URL="ws://127.0.0.1:7880"
|
||||||
|
|
||||||
|
MINIMAX_API_KEY="eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJHcm91cE5hbWUiOiJYaW4gV2FuZyIsIlVzZXJOYW1lIjoiWGluIFdhbmciLCJBY2NvdW50IjoiIiwiU3ViamVjdElEIjoiMTg1NzMzNTM0ODcwNzcyNTM2MyIsIlBob25lIjoiIiwiR3JvdXBJRCI6IjE4NTczMzUzNDg2OTkzMzY3NTUiLCJQYWdlTmFtZSI6IiIsIk1haWwiOiJ3YW5neGluMzE0MTU5MjZAZ21haWwuY29tIiwiQ3JlYXRlVGltZSI6IjIwMjUtMTItMDMgMTQ6MjE6MDkiLCJUb2tlblR5cGUiOjEsImlzcyI6Im1pbmltYXgifQ.MVL2R-3gxcItQHl7cgfQuqcdYTUI80_QtfXeZtNxRNUPFSpPp_a2Om6K0BrwRE7PvnSyr2Kv-w2yeQoXc_SpgoecVgct-Qfl4-w9pgRySMBfIdMWekxuvI6KDIHjnaFyvls11yiuWAeKWK6PHuil_JqttgSAOz-HitbAmuuGyhpnHrE9u-QhoXcXy9s9lAyhuXMq_OyB06Ps58B5u8ziIqQZQG6SNU6_uMiql3Nl2QU4ukGAoRqUQn6lqQExGdFv43FDwlG6NJ_avWnsTnkjcnPqqay6Sf50zyaRl1sm_c1EPkEWcrLOz1aWHSUrA7rhCJMaU5VCvPtTaGJUZk9_yA"
|
||||||
|
|
||||||
|
DEEPSEEK_API_KEY="sk-230701ff1b6143ecbf322b3170606016"
|
||||||
|
|
||||||
|
AZURE_SPEECH_KEY="48KfrDwcw6hM0g0WtmF0ZDigaMW8YdUwjrlYDYIL6Rftp5U0V1yfJQQJ99BAAC3pKaRXJ3w3AAAYACOGCI1o"
|
||||||
|
AZURE_SPEECH_REGION="eastasia"
|
||||||
|
|
||||||
|
CARTESIA_API_KEY="sk_car_Yu-_vYZsCq8ZOe-WQn43t"
|
||||||
|
CARTESIA_LANGUAGE="zh"
|
||||||
|
|
||||||
|
SILICONFLOW_API_KEY="sk-thmzysdpqqmhqxxshyqoxvjeiflexjdgaftyufrsgrhpjnyx"
|
||||||
|
|
||||||
|
DASHSCOPE_API_KEY="sk-391f5126d18345d497c6e8717c8c9ad7"
|
||||||
|
|
||||||
|
VOLCENGINE_TTS_ACCESS_TOKEN="4ustCTIpdCq8dE_msFrZvFn4nDpioIVo"
|
||||||
|
VOLCENGINE_STT_ACCESS_TOKEN="QiO0AptfmU0GLTSitwn7t5-zeo4gJ6K1"
|
||||||
|
VOLCENGINE_LLM_API_KEY="1224b4c6-ada7-4c43-be2b-8d48c686e3ff"
|
||||||
|
VOLCENGINE_REALTIME_ACCESS_TOKEN="1224b4c6-ada7-4c43-be2b-8d48c686e3ff"
|
||||||
245
agents/my_basic_agent_debate.py
Executable file
245
agents/my_basic_agent_debate.py
Executable file
@@ -0,0 +1,245 @@
|
|||||||
|
import argparse
|
||||||
|
import asyncio
|
||||||
|
import base64
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from dataclasses import asdict, dataclass
|
||||||
|
from functools import partial
|
||||||
|
|
||||||
|
import aiohttp
|
||||||
|
import httpx
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
from livekit import api, rtc
|
||||||
|
from livekit.agents import (
|
||||||
|
Agent,
|
||||||
|
AgentSession,
|
||||||
|
AudioConfig,
|
||||||
|
BackgroundAudioPlayer,
|
||||||
|
BuiltinAudioClip,
|
||||||
|
JobContext,
|
||||||
|
JobProcess,
|
||||||
|
MetricsCollectedEvent,
|
||||||
|
RoomInputOptions,
|
||||||
|
RoomOutputOptions,
|
||||||
|
RunContext,
|
||||||
|
WorkerOptions,
|
||||||
|
cli,
|
||||||
|
get_job_context,
|
||||||
|
metrics,
|
||||||
|
)
|
||||||
|
from livekit.agents.llm import ImageContent, ToolError, function_tool
|
||||||
|
from livekit.agents.voice.avatar import DataStreamAudioOutput
|
||||||
|
from livekit.agents.voice.io import PlaybackFinishedEvent
|
||||||
|
from livekit.agents.voice.room_io import ATTRIBUTE_PUBLISH_ON_BEHALF
|
||||||
|
from livekit.plugins import silero
|
||||||
|
# from livekit.plugins.turn_detector.multilingual import MultilingualModel
|
||||||
|
from livekit.plugins import openai, azure, minimax, cartesia, deepgram, aliyun
|
||||||
|
from pydantic_core.core_schema import SerializerFunction
|
||||||
|
|
||||||
|
# uncomment to enable Krisp background voice/noise cancellation
|
||||||
|
# from livekit.plugins import noise_cancellation
|
||||||
|
|
||||||
|
logger = logging.getLogger("basic-agent")
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
AVATAR_IDENTITY = "avatar_worker"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class AvatarConnectionInfo:
|
||||||
|
room_name: str
|
||||||
|
url: str
|
||||||
|
"""LiveKit server URL"""
|
||||||
|
token: str
|
||||||
|
"""Token for avatar worker to join"""
|
||||||
|
|
||||||
|
|
||||||
|
class MyAgent(Agent):
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self._tasks = [] # Prevent garbage collection of running tasks
|
||||||
|
super().__init__(
|
||||||
|
instructions="Your name is Kelly. You would interact with users via voice."
|
||||||
|
"with that in mind keep your responses concise and to the point."
|
||||||
|
"do not use emojis, asterisks, markdown, or other special characters in your responses."
|
||||||
|
"You are curious and friendly, and have a sense of humor."
|
||||||
|
"你正在和用户辩论AI是否可以取代人工,你的观点是AI可以取代人工"
|
||||||
|
"you will speak chinese to the user",
|
||||||
|
)
|
||||||
|
|
||||||
|
async def on_enter(self):
|
||||||
|
# when the agent is added to the session, it'll generate a reply
|
||||||
|
# according to its instructions
|
||||||
|
self.session.generate_reply()
|
||||||
|
|
||||||
|
def prewarm(proc: JobProcess):
|
||||||
|
proc.userdata["vad"] = silero.VAD.load()
|
||||||
|
|
||||||
|
|
||||||
|
async def launch_avatar(ctx: JobContext, avatar_dispatcher_url: str, avatar_identity: str) -> None:
|
||||||
|
"""
|
||||||
|
Send a request to the avatar service for it to join the room
|
||||||
|
|
||||||
|
This function should be wrapped in a avatar plugin.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# create a token for the avatar to join the room
|
||||||
|
token = (
|
||||||
|
api.AccessToken()
|
||||||
|
.with_identity(avatar_identity)
|
||||||
|
.with_name("Avatar Runner")
|
||||||
|
.with_grants(api.VideoGrants(room_join=True, room=ctx.room.name))
|
||||||
|
.with_kind("agent")
|
||||||
|
.with_attributes({ATTRIBUTE_PUBLISH_ON_BEHALF: ctx.local_participant_identity})
|
||||||
|
.to_jwt()
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"Sending connection info to avatar dispatcher {avatar_dispatcher_url}")
|
||||||
|
connection_info = AvatarConnectionInfo(room_name=ctx.room.name, url=ctx._info.url, token=token)
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
response = await client.post(avatar_dispatcher_url, json=asdict(connection_info))
|
||||||
|
response.raise_for_status()
|
||||||
|
logger.info("Avatar handshake completed")
|
||||||
|
|
||||||
|
|
||||||
|
async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None):
|
||||||
|
# each log entry will include these fields
|
||||||
|
ctx.log_context_fields = {
|
||||||
|
"room": ctx.room.name,
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info("connecting to room")
|
||||||
|
await ctx.connect()
|
||||||
|
|
||||||
|
logger.info("waiting for participant")
|
||||||
|
participant = await ctx.wait_for_participant()
|
||||||
|
logger.info(f"starting agent for participant {participant.identity}")
|
||||||
|
|
||||||
|
initial_voice_id = "Chinese (Mandarin)_Male_Announcer"
|
||||||
|
if participant.attributes.get("voice"):
|
||||||
|
initial_voice_id = participant.attributes.get("voice")
|
||||||
|
logger.info(f"User selected voice: {initial_voice_id}")
|
||||||
|
|
||||||
|
session = AgentSession(
|
||||||
|
# Speech-to-text (STT) is your agent's ears, turning the user's speech into text that the LLM can understand
|
||||||
|
# See all available models at https://docs.livekit.io/agents/models/stt/
|
||||||
|
# stt="deepgram/nova-3",
|
||||||
|
# stt=azure.STT(
|
||||||
|
# speech_key="48KfrDwcw6hM0g0WtmF0ZDigaMW8YdUwjrlYDYIL6Rftp5U0V1yfJQQJ99BAAC3pKaRXJ3w3AAAYACOGCI1o",
|
||||||
|
# speech_region="eastasia",
|
||||||
|
# language="zh-CN"
|
||||||
|
# ),
|
||||||
|
# stt=deepgram.STT(
|
||||||
|
# api_key="61dbb8aa4badb820c24029052e106b00f7498598",
|
||||||
|
# language="zh-CN",
|
||||||
|
# model="nova-2-general"
|
||||||
|
# ),
|
||||||
|
stt = aliyun.STT(model="paraformer-realtime-v2"),
|
||||||
|
# A Large Language Model (LLM) is your agent's brain, processing user input and generating a response
|
||||||
|
# See all available models at https://docs.livekit.io/agents/models/llm/
|
||||||
|
# llm="openai/gpt-4.1-mini",
|
||||||
|
llm=openai.LLM.with_deepseek(
|
||||||
|
model='deepseek-chat'
|
||||||
|
),
|
||||||
|
# Text-to-speech (TTS) is your agent's voice, turning the LLM's text into speech that the user can hear
|
||||||
|
# See all available models as well as voice selections at https://docs.livekit.io/agents/models/tts/
|
||||||
|
# tts="cartesia/sonic-2:9626c31c-bec5-4cca-baa8-f8ba9e84c8bc",
|
||||||
|
# tts=minimax.TTS(
|
||||||
|
# model="speech-2.6-turbo",
|
||||||
|
# voice=initial_voice_id,
|
||||||
|
# # voice="Friendly_Person",
|
||||||
|
# # voice="Chinese (Mandarin)_Male_Announcer"
|
||||||
|
# ),
|
||||||
|
tts=aliyun.TTS(model="cosyvoice-v2", voice="longcheng_v2"),
|
||||||
|
# tts=azure.TTS(
|
||||||
|
# speech_key="48KfrDwcw6hM0g0WtmF0ZDigaMW8YdUwjrlYDYIL6Rftp5U0V1yfJQQJ99BAAC3pKaRXJ3w3AAAYACOGCI1o",
|
||||||
|
# speech_region="eastasia",
|
||||||
|
# language='zh-CN'
|
||||||
|
# ),
|
||||||
|
# tts = openai.TTS(
|
||||||
|
# model='kokoro',
|
||||||
|
# voice='zf_xiaoyi',
|
||||||
|
# base_url='http://127.0.0.1:8880/v1',
|
||||||
|
# api_key='not-needed',
|
||||||
|
# ),
|
||||||
|
# tts=cartesia.TTS(),
|
||||||
|
# VAD and turn detection are used to determine when the user is speaking and when the agent should respond
|
||||||
|
# See more at https://docs.livekit.io/agents/build/turns
|
||||||
|
# turn_detection=MultilingualModel(),
|
||||||
|
vad=ctx.proc.userdata["vad"],
|
||||||
|
# allow the LLM to generate a response while waiting for the end of turn
|
||||||
|
# See more at https://docs.livekit.io/agents/build/audio/#preemptive-generation
|
||||||
|
preemptive_generation=True,
|
||||||
|
# sometimes background noise could interrupt the agent session, these are considered false positive interruptions
|
||||||
|
# when it's detected, you may resume the agent's speech
|
||||||
|
resume_false_interruption=True,
|
||||||
|
false_interruption_timeout=1.0,
|
||||||
|
)
|
||||||
|
|
||||||
|
# log metrics as they are emitted, and total usage after session is over
|
||||||
|
usage_collector = metrics.UsageCollector()
|
||||||
|
|
||||||
|
@session.on("metrics_collected")
|
||||||
|
def _on_metrics_collected(ev: MetricsCollectedEvent):
|
||||||
|
metrics.log_metrics(ev.metrics)
|
||||||
|
usage_collector.collect(ev.metrics)
|
||||||
|
|
||||||
|
async def log_usage():
|
||||||
|
summary = usage_collector.get_summary()
|
||||||
|
logger.info(f"Usage: {summary}")
|
||||||
|
|
||||||
|
# shutdown callbacks are triggered when the session is over
|
||||||
|
ctx.add_shutdown_callback(log_usage)
|
||||||
|
|
||||||
|
# Launch avatar if avatar_dispatcher_url is provided
|
||||||
|
if avatar_dispatcher_url:
|
||||||
|
await launch_avatar(ctx, avatar_dispatcher_url, AVATAR_IDENTITY)
|
||||||
|
session.output.audio = DataStreamAudioOutput(
|
||||||
|
ctx.room,
|
||||||
|
destination_identity=AVATAR_IDENTITY,
|
||||||
|
# (optional) wait for the avatar to publish video track before generating a reply
|
||||||
|
wait_remote_track=rtc.TrackKind.KIND_VIDEO,
|
||||||
|
)
|
||||||
|
|
||||||
|
@session.output.audio.on("playback_finished")
|
||||||
|
def on_playback_finished(ev: PlaybackFinishedEvent) -> None:
|
||||||
|
# the avatar should notify when the audio playback is finished
|
||||||
|
logger.info(
|
||||||
|
"playback_finished",
|
||||||
|
extra={
|
||||||
|
"playback_position": ev.playback_position,
|
||||||
|
"interrupted": ev.interrupted,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
await session.start(
|
||||||
|
agent=MyAgent(),
|
||||||
|
room=ctx.room,
|
||||||
|
room_input_options=RoomInputOptions(
|
||||||
|
# uncomment to enable Krisp BVC noise cancellation
|
||||||
|
# noise_cancellation=noise_cancellation.BVC(),
|
||||||
|
),
|
||||||
|
room_output_options=RoomOutputOptions(transcription_enabled=True),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# --- 3. 核心:监听 Metadata 变化 ---
|
||||||
|
@ctx.room.on("room_metadata_changed")
|
||||||
|
def on_metadata_changed(old_metadata: str, new_metadata: str):
|
||||||
|
logger.info(f"收到新的比赛状态: {new_metadata} (旧状态: {old_metadata})")
|
||||||
|
print(f"收到新的比赛状态: {new_metadata} (旧状态: {old_metadata})")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("--avatar-url", type=str, default=None, help="Avatar dispatcher URL (e.g., http://localhost:8089/launch)")
|
||||||
|
args, remaining_args = parser.parse_known_args()
|
||||||
|
sys.argv = sys.argv[:1] + remaining_args
|
||||||
|
|
||||||
|
if args.avatar_url:
|
||||||
|
cli.run_app(WorkerOptions(entrypoint_fnc=partial(entrypoint, avatar_dispatcher_url=args.avatar_url), prewarm_fnc=prewarm))
|
||||||
|
else:
|
||||||
|
cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint, prewarm_fnc=prewarm))
|
||||||
Reference in New Issue
Block a user