scripts(eval): give examples to numerical word answers

Some models need extra help.
This commit is contained in:
Aleix Conchillo Flaqué
2026-01-19 14:37:00 -08:00
parent cddd6d5b0a
commit aed44c863a

View File

@@ -293,12 +293,13 @@ async def run_eval_pipeline(
"You should only call the eval function if:\n"
"- The user explicitly attempts to answer the question, AND\n"
f"- Their answer can be cleanly evaluated using: {eval_config.eval}\n"
"Ignore greetings, comments, non-answers, or requests for clarification."
"Ignore greetings, comments, non-answers, or requests for clarification.\n"
"Numerical word answers are allowed (e.g., 'five' is the same as '5').\n"
)
if eval_config.eval_speaks_first:
system_prompt = f"You are an evaluation agent, be extremly brief. Numerical word answers are allowed. You will start the conversation by saying: '{example_prompt}'. {common_system_prompt}"
system_prompt = f"You are an evaluation agent, be extremly brief. You will start the conversation by saying: '{example_prompt}'. {common_system_prompt}"
else:
system_prompt = f"You are an evaluation agent, be extremly brief. Numerical word answers are allowed. First, ask one question: {example_prompt}. {common_system_prompt}"
system_prompt = f"You are an evaluation agent, be extremly brief. First, ask one question: {example_prompt}. {common_system_prompt}"
messages = [
{