From 51ba245e1030af0f7b7366f5d804fff3d6165c3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Tue, 18 Nov 2025 14:13:34 -0800 Subject: [PATCH] scripts(evals): fix EVAL_CONVERSATION/EVAL_WEATHER eval --- scripts/evals/run-release-evals.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/evals/run-release-evals.py b/scripts/evals/run-release-evals.py index da6df053d..4f8268d78 100644 --- a/scripts/evals/run-release-evals.py +++ b/scripts/evals/run-release-evals.py @@ -30,8 +30,8 @@ EVAL_SIMPLE_MATH = EvalConfig( ) EVAL_WEATHER = EvalConfig( - prompt="What's the weather in San Francisco?", - eval="The user says something specific about the current weather in San Francisco, including the degrees.", + prompt="What's the weather in San Francisco (in farhenheit or celsius)?", + eval="The user says something specific about the current weather in San Francisco, including the degrees (in farhenheit or celsius).", ) EVAL_ONLINE_SEARCH = EvalConfig( @@ -70,7 +70,7 @@ EVAL_VOICEMAIL = EvalConfig( EVAL_CONVERSATION = EvalConfig( prompt="Hello, this is Mark.", - eval="The user replies with a greeting.", + eval="The user acknowledges the greeting.", eval_speaks_first=True, )