Use voiceAssistant components (#98)

This commit is contained in:
lukasIO
2024-09-16 10:47:15 +02:00
committed by GitHub
parent 302afd8844
commit e2c3b8bf35
10 changed files with 2839 additions and 290 deletions

View File

@@ -1,6 +1,9 @@
const createNextPluginPreval = require("next-plugin-preval/config");
const withNextPluginPreval = createNextPluginPreval();
/** @type {import('next').NextConfig} */ /** @type {import('next').NextConfig} */
const nextConfig = { const nextConfig = {
reactStrictMode: false, reactStrictMode: false,
}; };
module.exports = nextConfig; module.exports = withNextPluginPreval(nextConfig);

2850
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -9,15 +9,17 @@
"lint": "next lint" "lint": "next lint"
}, },
"dependencies": { "dependencies": {
"@livekit/components-react": "^2.3.1", "@livekit/components-react": "^2.5.2",
"@livekit/components-styles": "^1.1.1",
"@radix-ui/react-dropdown-menu": "^2.0.6", "@radix-ui/react-dropdown-menu": "^2.0.6",
"cookies-next": "^4.1.1", "cookies-next": "^4.1.1",
"framer-motion": "^10.16.16", "framer-motion": "^10.16.16",
"js-yaml": "^4.1.0", "js-yaml": "^4.1.0",
"livekit-client": "^2.1.5", "livekit-client": "^2.5.1",
"livekit-server-sdk": "^2.1.2", "livekit-server-sdk": "^2.6.1",
"lodash": "^4.17.21", "lodash": "^4.17.21",
"next": "^14.0.4", "next": "^14.0.4",
"next-plugin-preval": "^1.2.6",
"qrcode.react": "^4.0.0", "qrcode.react": "^4.0.0",
"react": "^18", "react": "^18",
"react-dom": "^18" "react-dom": "^18"

View File

@@ -1,25 +1,22 @@
import { useRef } from "react"; import {
import { AgentMultibandAudioVisualizer } from "../visualization/AgentMultibandAudioVisualizer"; BarVisualizer,
TrackReferenceOrPlaceholder,
} from "@livekit/components-react";
type AudioInputTileProps = { export const AudioInputTile = ({
frequencies: Float32Array[]; trackRef,
}; }: {
trackRef: TrackReferenceOrPlaceholder;
export const AudioInputTile = ({ frequencies }: AudioInputTileProps) => { }) => {
return ( return (
<div <div
className={`flex flex-row gap-2 h-[100px] items-center w-full justify-center border rounded-sm border-gray-800 bg-gray-900`} className={`flex flex-row gap-2 h-[100px] items-center w-full justify-center border rounded-sm border-gray-800 bg-gray-900`}
> >
<AgentMultibandAudioVisualizer <BarVisualizer
state="speaking" trackRef={trackRef}
barWidth={4} className="h-full w-full"
minBarHeight={2} barCount={20}
maxBarHeight={50} options={{ minHeight: 0 }}
accentColor={"gray"}
accentShade={400}
frequencies={frequencies}
borderRadius={2}
gap={4}
/> />
</div> </div>
); );

View File

@@ -12,28 +12,22 @@ import {
PlaygroundTabbedTile, PlaygroundTabbedTile,
PlaygroundTile, PlaygroundTile,
} from "@/components/playground/PlaygroundTile"; } from "@/components/playground/PlaygroundTile";
import { AgentMultibandAudioVisualizer } from "@/components/visualization/AgentMultibandAudioVisualizer";
import { useConfig } from "@/hooks/useConfig"; import { useConfig } from "@/hooks/useConfig";
import { useMultibandTrackVolume } from "@/hooks/useTrackVolume";
import { TranscriptionTile } from "@/transcriptions/TranscriptionTile"; import { TranscriptionTile } from "@/transcriptions/TranscriptionTile";
import { import {
TrackReferenceOrPlaceholder, BarVisualizer,
VideoTrack, VideoTrack,
useConnectionState, useConnectionState,
useDataChannel, useDataChannel,
useLocalParticipant, useLocalParticipant,
useRemoteParticipants,
useRoomInfo, useRoomInfo,
useTracks, useTracks,
useVoiceAssistant,
} from "@livekit/components-react"; } from "@livekit/components-react";
import { import { ConnectionState, LocalParticipant, Track } from "livekit-client";
ConnectionState,
LocalParticipant,
RoomEvent,
Track,
} from "livekit-client";
import { QRCodeSVG } from "qrcode.react"; import { QRCodeSVG } from "qrcode.react";
import { ReactNode, useCallback, useEffect, useMemo, useState } from "react"; import { ReactNode, useCallback, useEffect, useMemo, useState } from "react";
import tailwindTheme from "../../lib/tailwindTheme.preval";
export interface PlaygroundMeta { export interface PlaygroundMeta {
name: string; name: string;
@@ -55,15 +49,10 @@ export default function Playground({
}: PlaygroundProps) { }: PlaygroundProps) {
const { config, setUserSettings } = useConfig(); const { config, setUserSettings } = useConfig();
const { name } = useRoomInfo(); const { name } = useRoomInfo();
const [messages, setMessages] = useState<ChatMessageType[]>([]);
const [transcripts, setTranscripts] = useState<ChatMessageType[]>([]); const [transcripts, setTranscripts] = useState<ChatMessageType[]>([]);
const { localParticipant } = useLocalParticipant(); const { localParticipant } = useLocalParticipant();
const participants = useRemoteParticipants({ const voiceAssistant = useVoiceAssistant();
updateOnlyOn: [RoomEvent.ParticipantMetadataChanged],
});
const agentParticipant = participants.find((p) => p.isAgent);
const isAgentConnected = agentParticipant !== undefined;
const roomState = useConnectionState(); const roomState = useConnectionState();
const tracks = useTracks(); const tracks = useTracks();
@@ -75,32 +64,12 @@ export default function Playground({
} }
}, [config, localParticipant, roomState]); }, [config, localParticipant, roomState]);
let agentAudioTrack: TrackReferenceOrPlaceholder | undefined;
const aat = tracks.find(
(trackRef) =>
trackRef.publication.kind === Track.Kind.Audio &&
trackRef.participant.isAgent
);
if (aat) {
agentAudioTrack = aat;
} else if (agentParticipant) {
agentAudioTrack = {
participant: agentParticipant,
source: Track.Source.Microphone,
};
}
const agentVideoTrack = tracks.find( const agentVideoTrack = tracks.find(
(trackRef) => (trackRef) =>
trackRef.publication.kind === Track.Kind.Video && trackRef.publication.kind === Track.Kind.Video &&
trackRef.participant.isAgent trackRef.participant.isAgent
); );
const subscribedVolumes = useMultibandTrackVolume(
agentAudioTrack?.publication?.track,
5
);
const localTracks = tracks.filter( const localTracks = tracks.filter(
({ participant }) => participant instanceof LocalParticipant ({ participant }) => participant instanceof LocalParticipant
); );
@@ -111,11 +80,6 @@ export default function Playground({
({ source }) => source === Track.Source.Microphone ({ source }) => source === Track.Source.Microphone
); );
const localMultibandVolume = useMultibandTrackVolume(
localMicTrack?.publication.track,
20
);
const onDataReceived = useCallback( const onDataReceived = useCallback(
(msg: any) => { (msg: any) => {
if (msg.topic === "transcription") { if (msg.topic === "transcription") {
@@ -181,6 +145,18 @@ export default function Playground({
); );
}, [agentVideoTrack, config, roomState]); }, [agentVideoTrack, config, roomState]);
useEffect(() => {
document.body.style.setProperty(
"--lk-theme-color",
// @ts-ignore
tailwindTheme.colors[config.settings.theme_color]["500"]
);
document.body.style.setProperty(
"--lk-drop-shadow",
`var(--lk-theme-color) 0px 0px 18px`
);
}, [config.settings.theme_color]);
const audioTileContent = useMemo(() => { const audioTileContent = useMemo(() => {
const disconnectedContent = ( const disconnectedContent = (
<div className="flex flex-col items-center justify-center gap-2 text-gray-700 text-center w-full"> <div className="flex flex-col items-center justify-center gap-2 text-gray-700 text-center w-full">
@@ -195,19 +171,15 @@ export default function Playground({
</div> </div>
); );
// TODO: keep it in the speaking state until we come up with a better protocol for agent states
const visualizerContent = ( const visualizerContent = (
<div className="flex items-center justify-center w-full"> <div
<AgentMultibandAudioVisualizer className={`flex items-center justify-center w-full h-48 [--lk-va-bar-width:30px] [--lk-va-bar-gap:20px] [--lk-fg:var(--lk-theme-color)]`}
state="speaking" >
barWidth={30} <BarVisualizer
minBarHeight={30} state={voiceAssistant.state}
maxBarHeight={150} trackRef={voiceAssistant.audioTrack}
accentColor={config.settings.theme_color} barCount={5}
accentShade={500} options={{ minHeight: 20 }}
frequencies={subscribedVolumes}
borderRadius={12}
gap={16}
/> />
</div> </div>
); );
@@ -216,29 +188,29 @@ export default function Playground({
return disconnectedContent; return disconnectedContent;
} }
if (!agentAudioTrack) { if (!voiceAssistant.audioTrack) {
return waitingContent; return waitingContent;
} }
return visualizerContent; return visualizerContent;
}, [ }, [
agentAudioTrack, voiceAssistant.audioTrack,
config.settings.theme_color, config.settings.theme_color,
subscribedVolumes,
roomState, roomState,
voiceAssistant.state,
]); ]);
const chatTileContent = useMemo(() => { const chatTileContent = useMemo(() => {
if (agentAudioTrack) { if (voiceAssistant.audioTrack) {
return ( return (
<TranscriptionTile <TranscriptionTile
agentAudioTrack={agentAudioTrack} agentAudioTrack={voiceAssistant.audioTrack}
accentColor={config.settings.theme_color} accentColor={config.settings.theme_color}
/> />
); );
} }
return <></>; return <></>;
}, [config.settings.theme_color, agentAudioTrack]); }, [config.settings.theme_color, voiceAssistant.audioTrack]);
const settingsTileContent = useMemo(() => { const settingsTileContent = useMemo(() => {
return ( return (
@@ -284,7 +256,7 @@ export default function Playground({
<NameValueRow <NameValueRow
name="Agent connected" name="Agent connected"
value={ value={
isAgentConnected ? ( voiceAssistant.agent ? (
"TRUE" "TRUE"
) : roomState === ConnectionState.Connected ? ( ) : roomState === ConnectionState.Connected ? (
<LoadingSVG diameter={12} strokeWidth={2} /> <LoadingSVG diameter={12} strokeWidth={2} />
@@ -293,7 +265,7 @@ export default function Playground({
) )
} }
valueColor={ valueColor={
isAgentConnected voiceAssistant.agent
? `${config.settings.theme_color}-500` ? `${config.settings.theme_color}-500`
: "gray-500" : "gray-500"
} }
@@ -318,7 +290,7 @@ export default function Playground({
title="Microphone" title="Microphone"
deviceSelectorKind="audioinput" deviceSelectorKind="audioinput"
> >
<AudioInputTile frequencies={localMultibandVolume} /> <AudioInputTile trackRef={localMicTrack} />
</ConfigurationPanelItem> </ConfigurationPanelItem>
)} )}
<div className="w-full"> <div className="w-full">
@@ -350,12 +322,11 @@ export default function Playground({
localParticipant, localParticipant,
name, name,
roomState, roomState,
isAgentConnected,
localVideoTrack, localVideoTrack,
localMicTrack, localMicTrack,
localMultibandVolume,
themeColors, themeColors,
setUserSettings, setUserSettings,
voiceAssistant.agent,
]); ]);
let mobileTabs: PlaygroundTab[] = []; let mobileTabs: PlaygroundTab[] = [];

View File

@@ -1,114 +0,0 @@
import { useEffect, useState } from "react";
type VisualizerState = "listening" | "idle" | "speaking" | "thinking";
type AgentMultibandAudioVisualizerProps = {
state: VisualizerState;
barWidth: number;
minBarHeight: number;
maxBarHeight: number;
accentColor: string;
accentShade?: number;
frequencies: Float32Array[];
borderRadius: number;
gap: number;
};
export const AgentMultibandAudioVisualizer = ({
state,
barWidth,
minBarHeight,
maxBarHeight,
accentColor,
accentShade,
frequencies,
borderRadius,
gap,
}: AgentMultibandAudioVisualizerProps) => {
const summedFrequencies = frequencies.map((bandFrequencies) => {
const sum = bandFrequencies.reduce((a, b) => a + b, 0);
return Math.sqrt(sum / bandFrequencies.length);
});
const [thinkingIndex, setThinkingIndex] = useState(
Math.floor(summedFrequencies.length / 2)
);
const [thinkingDirection, setThinkingDirection] = useState<"left" | "right">(
"right"
);
useEffect(() => {
if (state !== "thinking") {
setThinkingIndex(Math.floor(summedFrequencies.length / 2));
return;
}
const timeout = setTimeout(() => {
if (thinkingDirection === "right") {
if (thinkingIndex === summedFrequencies.length - 1) {
setThinkingDirection("left");
setThinkingIndex((prev) => prev - 1);
} else {
setThinkingIndex((prev) => prev + 1);
}
} else {
if (thinkingIndex === 0) {
setThinkingDirection("right");
setThinkingIndex((prev) => prev + 1);
} else {
setThinkingIndex((prev) => prev - 1);
}
}
}, 200);
return () => clearTimeout(timeout);
}, [state, summedFrequencies.length, thinkingDirection, thinkingIndex]);
return (
<div
className={`flex flex-row items-center`}
style={{
gap: gap + "px",
}}
>
{summedFrequencies.map((frequency, index) => {
const isCenter = index === Math.floor(summedFrequencies.length / 2);
let color = `${accentColor}-${accentShade}`;
let shadow = `shadow-lg-${accentColor}`;
let transform;
if (state === "listening" || state === "idle") {
color = isCenter ? `${accentColor}-${accentShade}` : "gray-950";
shadow = !isCenter ? "" : shadow;
transform = !isCenter ? "scale(1.0)" : "scale(1.2)";
} else if (state === "speaking") {
color = `${accentColor}${accentShade ? "-" + accentShade : ""}`;
} else if (state === "thinking") {
color =
index === thinkingIndex
? `${accentColor}-${accentShade}`
: "gray-950";
shadow = "";
transform = thinkingIndex !== index ? "scale(1)" : "scale(1.1)";
}
return (
<div
className={`bg-${color} ${shadow} ${
isCenter && state === "listening" ? "animate-pulse" : ""
}`}
key={"frequency-" + index}
style={{
height:
minBarHeight + frequency * (maxBarHeight - minBarHeight) + "px",
borderRadius: borderRadius + "px",
width: barWidth + "px",
transition:
"background-color 0.35s ease-out, transform 0.25s ease-out",
transform: transform,
}}
></div>
);
})}
</div>
);
};

View File

@@ -0,0 +1,10 @@
import preval from "next-plugin-preval";
import resolveConfig from "tailwindcss/resolveConfig";
import tailwindConfig from "../../tailwind.config.js";
async function getTheme() {
const fullTWConfig = resolveConfig(tailwindConfig);
return fullTWConfig.theme;
}
export default preval(getTheme());

View File

@@ -1,4 +1,5 @@
import { CloudProvider } from "@/cloud/useCloud"; import { CloudProvider } from "@/cloud/useCloud";
import "@livekit/components-styles/components/participant";
import "@/styles/globals.css"; import "@/styles/globals.css";
import type { AppProps } from "next/app"; import type { AppProps } from "next/app";
@@ -7,4 +8,5 @@ export default function App({ Component, pageProps }: AppProps) {
<CloudProvider> <CloudProvider>
<Component {...pageProps} /> <Component {...pageProps} />
</CloudProvider> </CloudProvider>
);} );
}

View File

@@ -4,6 +4,9 @@
body { body {
background: black; background: black;
--lk-va-bar-gap: 4px;
--lk-va-bar-width: 4px;
--lk-va-border-radius: 2px;
} }
#__next { #__next {
@@ -21,19 +24,16 @@ body {
animation: fadeIn 0.5s ease-in-out alternate-reverse infinite; animation: fadeIn 0.5s ease-in-out alternate-reverse infinite;
} }
::-webkit-scrollbar-track { ::-webkit-scrollbar-track {
background: rgba(255, 255, 255, 0.05); background: rgba(255, 255, 255, 0.05);
border-radius: 5px; border-radius: 5px;
} }
::-webkit-scrollbar-thumb { ::-webkit-scrollbar-thumb {
background: rgba(255, 255, 255, 0.1); background: rgba(255, 255, 255, 0.1);
border-radius: 5px; border-radius: 5px;
} }
::-webkit-scrollbar-thumb:hover { ::-webkit-scrollbar-thumb:hover {
background: #555; /* Even lighter grey thumb on hover */ background: #555; /* Even lighter grey thumb on hover */
} }

View File

@@ -14,7 +14,7 @@ const customColors = {
pink: colors.pink, pink: colors.pink,
teal: colors.teal, teal: colors.teal,
red: colors.red, red: colors.red,
} };
let customShadows = {}; let customShadows = {};
let shadowNames = []; let shadowNames = [];