diff --git a/examples/foundational/07l-interruptible-groq.py b/examples/foundational/07l-interruptible-groq.py index 208d39c2d..a4bfee8dc 100644 --- a/examples/foundational/07l-interruptible-groq.py +++ b/examples/foundational/07l-interruptible-groq.py @@ -57,7 +57,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): llm = GroqLLMService( api_key=os.getenv("GROQ_API_KEY"), settings=GroqLLMService.Settings( - model="meta-llama/llama-4-maverick-17b-128e-instruct", + model="llama-3.1-8b-instant", system_instruction="You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", ), ) diff --git a/examples/foundational/07n-interruptible-google-http.py b/examples/foundational/07n-interruptible-google-http.py index 91e822ec0..35c80a972 100644 --- a/examples/foundational/07n-interruptible-google-http.py +++ b/examples/foundational/07n-interruptible-google-http.py @@ -73,7 +73,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): llm = GoogleLLMService( api_key=os.getenv("GOOGLE_API_KEY"), - settings=GoogleLLMService.GoogleLLMSettings( + settings=GoogleLLMService.Settings( model="gemini-2.5-flash", # force a certain amount of thinking if you want it # thinking=GoogleLLMService.ThinkingConfig(thinking_budget=4096) diff --git a/examples/foundational/14j-function-calling-nvidia.py b/examples/foundational/14j-function-calling-nvidia.py index 39b75b7ac..7dd772c03 100644 --- a/examples/foundational/14j-function-calling-nvidia.py +++ b/examples/foundational/14j-function-calling-nvidia.py @@ -75,7 +75,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): model="nvidia/llama-3.3-nemotron-super-49b-v1.5", # Recommended when turning thinking off temperature=0.0, - system_instruction="You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + system_instruction="/no_think You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", ), ) # You can also register a function_name of None to get all functions