From aed44c863af04fe8eec7e9272e10a5c703c1e41e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Mon, 19 Jan 2026 14:37:00 -0800 Subject: [PATCH] scripts(eval): give examples to numerical word answers Some models need extra help. --- scripts/evals/eval.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/evals/eval.py b/scripts/evals/eval.py index 7c6a72604..16598b4d7 100644 --- a/scripts/evals/eval.py +++ b/scripts/evals/eval.py @@ -293,12 +293,13 @@ async def run_eval_pipeline( "You should only call the eval function if:\n" "- The user explicitly attempts to answer the question, AND\n" f"- Their answer can be cleanly evaluated using: {eval_config.eval}\n" - "Ignore greetings, comments, non-answers, or requests for clarification." + "Ignore greetings, comments, non-answers, or requests for clarification.\n" + "Numerical word answers are allowed (e.g., 'five' is the same as '5').\n" ) if eval_config.eval_speaks_first: - system_prompt = f"You are an evaluation agent, be extremly brief. Numerical word answers are allowed. You will start the conversation by saying: '{example_prompt}'. {common_system_prompt}" + system_prompt = f"You are an evaluation agent, be extremly brief. You will start the conversation by saying: '{example_prompt}'. {common_system_prompt}" else: - system_prompt = f"You are an evaluation agent, be extremly brief. Numerical word answers are allowed. First, ask one question: {example_prompt}. {common_system_prompt}" + system_prompt = f"You are an evaluation agent, be extremly brief. First, ask one question: {example_prompt}. {common_system_prompt}" messages = [ {