From aed44c863af04fe8eec7e9272e10a5c703c1e41e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= <aleix@daily.co>
Date: Mon, 19 Jan 2026 14:37:00 -0800
Subject: [PATCH] scripts(eval): give examples to numerical word answers

Some models need extra help.
---
 scripts/evals/eval.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/scripts/evals/eval.py b/scripts/evals/eval.py
index 7c6a72604..16598b4d7 100644
--- a/scripts/evals/eval.py
+++ b/scripts/evals/eval.py
@@ -293,12 +293,13 @@ async def run_eval_pipeline(
         "You should only call the eval function if:\n"
         "- The user explicitly attempts to answer the question, AND\n"
         f"- Their answer can be cleanly evaluated using: {eval_config.eval}\n"
-        "Ignore greetings, comments, non-answers, or requests for clarification."
+        "Ignore greetings, comments, non-answers, or requests for clarification.\n"
+        "Numerical word answers are allowed (e.g., 'five' is the same as '5').\n"
     )
     if eval_config.eval_speaks_first:
-        system_prompt = f"You are an evaluation agent, be extremly brief. Numerical word answers are allowed. You will start the conversation by saying: '{example_prompt}'. {common_system_prompt}"
+        system_prompt = f"You are an evaluation agent, be extremly brief. You will start the conversation by saying: '{example_prompt}'. {common_system_prompt}"
     else:
-        system_prompt = f"You are an evaluation agent, be extremly brief. Numerical word answers are allowed. First, ask one question: {example_prompt}. {common_system_prompt}"
+        system_prompt = f"You are an evaluation agent, be extremly brief. First, ask one question: {example_prompt}. {common_system_prompt}"
 
     messages = [
         {