commit backend agent
This commit is contained in:
22
agents/.env.example
Normal file
22
agents/.env.example
Normal file
@@ -0,0 +1,22 @@
|
||||
LIVEKIT_API_SECRET="secret"
|
||||
LIVEKIT_API_KEY="devkey"
|
||||
LIVEKIT_URL="ws://127.0.0.1:7880"
|
||||
|
||||
MINIMAX_API_KEY="eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJHcm91cE5hbWUiOiJYaW4gV2FuZyIsIlVzZXJOYW1lIjoiWGluIFdhbmciLCJBY2NvdW50IjoiIiwiU3ViamVjdElEIjoiMTg1NzMzNTM0ODcwNzcyNTM2MyIsIlBob25lIjoiIiwiR3JvdXBJRCI6IjE4NTczMzUzNDg2OTkzMzY3NTUiLCJQYWdlTmFtZSI6IiIsIk1haWwiOiJ3YW5neGluMzE0MTU5MjZAZ21haWwuY29tIiwiQ3JlYXRlVGltZSI6IjIwMjUtMTItMDMgMTQ6MjE6MDkiLCJUb2tlblR5cGUiOjEsImlzcyI6Im1pbmltYXgifQ.MVL2R-3gxcItQHl7cgfQuqcdYTUI80_QtfXeZtNxRNUPFSpPp_a2Om6K0BrwRE7PvnSyr2Kv-w2yeQoXc_SpgoecVgct-Qfl4-w9pgRySMBfIdMWekxuvI6KDIHjnaFyvls11yiuWAeKWK6PHuil_JqttgSAOz-HitbAmuuGyhpnHrE9u-QhoXcXy9s9lAyhuXMq_OyB06Ps58B5u8ziIqQZQG6SNU6_uMiql3Nl2QU4ukGAoRqUQn6lqQExGdFv43FDwlG6NJ_avWnsTnkjcnPqqay6Sf50zyaRl1sm_c1EPkEWcrLOz1aWHSUrA7rhCJMaU5VCvPtTaGJUZk9_yA"
|
||||
|
||||
DEEPSEEK_API_KEY="sk-230701ff1b6143ecbf322b3170606016"
|
||||
|
||||
AZURE_SPEECH_KEY="48KfrDwcw6hM0g0WtmF0ZDigaMW8YdUwjrlYDYIL6Rftp5U0V1yfJQQJ99BAAC3pKaRXJ3w3AAAYACOGCI1o"
|
||||
AZURE_SPEECH_REGION="eastasia"
|
||||
|
||||
CARTESIA_API_KEY="sk_car_Yu-_vYZsCq8ZOe-WQn43t"
|
||||
CARTESIA_LANGUAGE="zh"
|
||||
|
||||
SILICONFLOW_API_KEY="sk-thmzysdpqqmhqxxshyqoxvjeiflexjdgaftyufrsgrhpjnyx"
|
||||
|
||||
DASHSCOPE_API_KEY="sk-391f5126d18345d497c6e8717c8c9ad7"
|
||||
|
||||
VOLCENGINE_TTS_ACCESS_TOKEN="4ustCTIpdCq8dE_msFrZvFn4nDpioIVo"
|
||||
VOLCENGINE_STT_ACCESS_TOKEN="QiO0AptfmU0GLTSitwn7t5-zeo4gJ6K1"
|
||||
VOLCENGINE_LLM_API_KEY="1224b4c6-ada7-4c43-be2b-8d48c686e3ff"
|
||||
VOLCENGINE_REALTIME_ACCESS_TOKEN="1224b4c6-ada7-4c43-be2b-8d48c686e3ff"
|
||||
245
agents/my_basic_agent_debate.py
Executable file
245
agents/my_basic_agent_debate.py
Executable file
@@ -0,0 +1,245 @@
|
||||
import argparse
|
||||
import asyncio
|
||||
import base64
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from dataclasses import asdict, dataclass
|
||||
from functools import partial
|
||||
|
||||
import aiohttp
|
||||
import httpx
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from livekit import api, rtc
|
||||
from livekit.agents import (
|
||||
Agent,
|
||||
AgentSession,
|
||||
AudioConfig,
|
||||
BackgroundAudioPlayer,
|
||||
BuiltinAudioClip,
|
||||
JobContext,
|
||||
JobProcess,
|
||||
MetricsCollectedEvent,
|
||||
RoomInputOptions,
|
||||
RoomOutputOptions,
|
||||
RunContext,
|
||||
WorkerOptions,
|
||||
cli,
|
||||
get_job_context,
|
||||
metrics,
|
||||
)
|
||||
from livekit.agents.llm import ImageContent, ToolError, function_tool
|
||||
from livekit.agents.voice.avatar import DataStreamAudioOutput
|
||||
from livekit.agents.voice.io import PlaybackFinishedEvent
|
||||
from livekit.agents.voice.room_io import ATTRIBUTE_PUBLISH_ON_BEHALF
|
||||
from livekit.plugins import silero
|
||||
# from livekit.plugins.turn_detector.multilingual import MultilingualModel
|
||||
from livekit.plugins import openai, azure, minimax, cartesia, deepgram, aliyun
|
||||
from pydantic_core.core_schema import SerializerFunction
|
||||
|
||||
# uncomment to enable Krisp background voice/noise cancellation
|
||||
# from livekit.plugins import noise_cancellation
|
||||
|
||||
logger = logging.getLogger("basic-agent")
|
||||
|
||||
load_dotenv()
|
||||
|
||||
AVATAR_IDENTITY = "avatar_worker"
|
||||
|
||||
|
||||
@dataclass
|
||||
class AvatarConnectionInfo:
|
||||
room_name: str
|
||||
url: str
|
||||
"""LiveKit server URL"""
|
||||
token: str
|
||||
"""Token for avatar worker to join"""
|
||||
|
||||
|
||||
class MyAgent(Agent):
|
||||
def __init__(self) -> None:
|
||||
self._tasks = [] # Prevent garbage collection of running tasks
|
||||
super().__init__(
|
||||
instructions="Your name is Kelly. You would interact with users via voice."
|
||||
"with that in mind keep your responses concise and to the point."
|
||||
"do not use emojis, asterisks, markdown, or other special characters in your responses."
|
||||
"You are curious and friendly, and have a sense of humor."
|
||||
"你正在和用户辩论AI是否可以取代人工,你的观点是AI可以取代人工"
|
||||
"you will speak chinese to the user",
|
||||
)
|
||||
|
||||
async def on_enter(self):
|
||||
# when the agent is added to the session, it'll generate a reply
|
||||
# according to its instructions
|
||||
self.session.generate_reply()
|
||||
|
||||
def prewarm(proc: JobProcess):
|
||||
proc.userdata["vad"] = silero.VAD.load()
|
||||
|
||||
|
||||
async def launch_avatar(ctx: JobContext, avatar_dispatcher_url: str, avatar_identity: str) -> None:
|
||||
"""
|
||||
Send a request to the avatar service for it to join the room
|
||||
|
||||
This function should be wrapped in a avatar plugin.
|
||||
"""
|
||||
|
||||
# create a token for the avatar to join the room
|
||||
token = (
|
||||
api.AccessToken()
|
||||
.with_identity(avatar_identity)
|
||||
.with_name("Avatar Runner")
|
||||
.with_grants(api.VideoGrants(room_join=True, room=ctx.room.name))
|
||||
.with_kind("agent")
|
||||
.with_attributes({ATTRIBUTE_PUBLISH_ON_BEHALF: ctx.local_participant_identity})
|
||||
.to_jwt()
|
||||
)
|
||||
|
||||
logger.info(f"Sending connection info to avatar dispatcher {avatar_dispatcher_url}")
|
||||
connection_info = AvatarConnectionInfo(room_name=ctx.room.name, url=ctx._info.url, token=token)
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.post(avatar_dispatcher_url, json=asdict(connection_info))
|
||||
response.raise_for_status()
|
||||
logger.info("Avatar handshake completed")
|
||||
|
||||
|
||||
async def entrypoint(ctx: JobContext, avatar_dispatcher_url: str = None):
|
||||
# each log entry will include these fields
|
||||
ctx.log_context_fields = {
|
||||
"room": ctx.room.name,
|
||||
}
|
||||
|
||||
logger.info("connecting to room")
|
||||
await ctx.connect()
|
||||
|
||||
logger.info("waiting for participant")
|
||||
participant = await ctx.wait_for_participant()
|
||||
logger.info(f"starting agent for participant {participant.identity}")
|
||||
|
||||
initial_voice_id = "Chinese (Mandarin)_Male_Announcer"
|
||||
if participant.attributes.get("voice"):
|
||||
initial_voice_id = participant.attributes.get("voice")
|
||||
logger.info(f"User selected voice: {initial_voice_id}")
|
||||
|
||||
session = AgentSession(
|
||||
# Speech-to-text (STT) is your agent's ears, turning the user's speech into text that the LLM can understand
|
||||
# See all available models at https://docs.livekit.io/agents/models/stt/
|
||||
# stt="deepgram/nova-3",
|
||||
# stt=azure.STT(
|
||||
# speech_key="48KfrDwcw6hM0g0WtmF0ZDigaMW8YdUwjrlYDYIL6Rftp5U0V1yfJQQJ99BAAC3pKaRXJ3w3AAAYACOGCI1o",
|
||||
# speech_region="eastasia",
|
||||
# language="zh-CN"
|
||||
# ),
|
||||
# stt=deepgram.STT(
|
||||
# api_key="61dbb8aa4badb820c24029052e106b00f7498598",
|
||||
# language="zh-CN",
|
||||
# model="nova-2-general"
|
||||
# ),
|
||||
stt = aliyun.STT(model="paraformer-realtime-v2"),
|
||||
# A Large Language Model (LLM) is your agent's brain, processing user input and generating a response
|
||||
# See all available models at https://docs.livekit.io/agents/models/llm/
|
||||
# llm="openai/gpt-4.1-mini",
|
||||
llm=openai.LLM.with_deepseek(
|
||||
model='deepseek-chat'
|
||||
),
|
||||
# Text-to-speech (TTS) is your agent's voice, turning the LLM's text into speech that the user can hear
|
||||
# See all available models as well as voice selections at https://docs.livekit.io/agents/models/tts/
|
||||
# tts="cartesia/sonic-2:9626c31c-bec5-4cca-baa8-f8ba9e84c8bc",
|
||||
# tts=minimax.TTS(
|
||||
# model="speech-2.6-turbo",
|
||||
# voice=initial_voice_id,
|
||||
# # voice="Friendly_Person",
|
||||
# # voice="Chinese (Mandarin)_Male_Announcer"
|
||||
# ),
|
||||
tts=aliyun.TTS(model="cosyvoice-v2", voice="longcheng_v2"),
|
||||
# tts=azure.TTS(
|
||||
# speech_key="48KfrDwcw6hM0g0WtmF0ZDigaMW8YdUwjrlYDYIL6Rftp5U0V1yfJQQJ99BAAC3pKaRXJ3w3AAAYACOGCI1o",
|
||||
# speech_region="eastasia",
|
||||
# language='zh-CN'
|
||||
# ),
|
||||
# tts = openai.TTS(
|
||||
# model='kokoro',
|
||||
# voice='zf_xiaoyi',
|
||||
# base_url='http://127.0.0.1:8880/v1',
|
||||
# api_key='not-needed',
|
||||
# ),
|
||||
# tts=cartesia.TTS(),
|
||||
# VAD and turn detection are used to determine when the user is speaking and when the agent should respond
|
||||
# See more at https://docs.livekit.io/agents/build/turns
|
||||
# turn_detection=MultilingualModel(),
|
||||
vad=ctx.proc.userdata["vad"],
|
||||
# allow the LLM to generate a response while waiting for the end of turn
|
||||
# See more at https://docs.livekit.io/agents/build/audio/#preemptive-generation
|
||||
preemptive_generation=True,
|
||||
# sometimes background noise could interrupt the agent session, these are considered false positive interruptions
|
||||
# when it's detected, you may resume the agent's speech
|
||||
resume_false_interruption=True,
|
||||
false_interruption_timeout=1.0,
|
||||
)
|
||||
|
||||
# log metrics as they are emitted, and total usage after session is over
|
||||
usage_collector = metrics.UsageCollector()
|
||||
|
||||
@session.on("metrics_collected")
|
||||
def _on_metrics_collected(ev: MetricsCollectedEvent):
|
||||
metrics.log_metrics(ev.metrics)
|
||||
usage_collector.collect(ev.metrics)
|
||||
|
||||
async def log_usage():
|
||||
summary = usage_collector.get_summary()
|
||||
logger.info(f"Usage: {summary}")
|
||||
|
||||
# shutdown callbacks are triggered when the session is over
|
||||
ctx.add_shutdown_callback(log_usage)
|
||||
|
||||
# Launch avatar if avatar_dispatcher_url is provided
|
||||
if avatar_dispatcher_url:
|
||||
await launch_avatar(ctx, avatar_dispatcher_url, AVATAR_IDENTITY)
|
||||
session.output.audio = DataStreamAudioOutput(
|
||||
ctx.room,
|
||||
destination_identity=AVATAR_IDENTITY,
|
||||
# (optional) wait for the avatar to publish video track before generating a reply
|
||||
wait_remote_track=rtc.TrackKind.KIND_VIDEO,
|
||||
)
|
||||
|
||||
@session.output.audio.on("playback_finished")
|
||||
def on_playback_finished(ev: PlaybackFinishedEvent) -> None:
|
||||
# the avatar should notify when the audio playback is finished
|
||||
logger.info(
|
||||
"playback_finished",
|
||||
extra={
|
||||
"playback_position": ev.playback_position,
|
||||
"interrupted": ev.interrupted,
|
||||
},
|
||||
)
|
||||
|
||||
await session.start(
|
||||
agent=MyAgent(),
|
||||
room=ctx.room,
|
||||
room_input_options=RoomInputOptions(
|
||||
# uncomment to enable Krisp BVC noise cancellation
|
||||
# noise_cancellation=noise_cancellation.BVC(),
|
||||
),
|
||||
room_output_options=RoomOutputOptions(transcription_enabled=True),
|
||||
)
|
||||
|
||||
|
||||
# --- 3. 核心:监听 Metadata 变化 ---
|
||||
@ctx.room.on("room_metadata_changed")
|
||||
def on_metadata_changed(old_metadata: str, new_metadata: str):
|
||||
logger.info(f"收到新的比赛状态: {new_metadata} (旧状态: {old_metadata})")
|
||||
print(f"收到新的比赛状态: {new_metadata} (旧状态: {old_metadata})")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--avatar-url", type=str, default=None, help="Avatar dispatcher URL (e.g., http://localhost:8089/launch)")
|
||||
args, remaining_args = parser.parse_known_args()
|
||||
sys.argv = sys.argv[:1] + remaining_args
|
||||
|
||||
if args.avatar_url:
|
||||
cli.run_app(WorkerOptions(entrypoint_fnc=partial(entrypoint, avatar_dispatcher_url=args.avatar_url), prewarm_fnc=prewarm))
|
||||
else:
|
||||
cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint, prewarm_fnc=prewarm))
|
||||
Reference in New Issue
Block a user