From 5546c8e01c014a1a3e6769fc8e2184919eb506dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Tue, 5 Aug 2025 11:46:28 -0700 Subject: [PATCH] scripts(evals): update to use new runner function --- scripts/evals/README.md | 6 +++--- scripts/evals/eval.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/evals/README.md b/scripts/evals/README.md index b67d5d75b..74f1b2e27 100644 --- a/scripts/evals/README.md +++ b/scripts/evals/README.md @@ -32,7 +32,7 @@ also explains why it thinks the answer is valid or invalid. To run the release evals: ```sh -python run-release-evals.py -a -v +uv run run-release-evals.py -a -v ``` This runs all the evals and stores logs and audio (`-a`) for each test. @@ -41,7 +41,7 @@ You can also specify which tests to run. For example, to run all `07` series tests: ```sh -python run-release-evals.py -p 07 -a -v +uv run run-release-evals.py -p 07 -a -v ``` ## Script Evals @@ -49,7 +49,7 @@ python run-release-evals.py -p 07 -a -v You can also run evals for a single example (not part of the release set): ```sh -python run-eval.py -p "A simple math addition" -a -v YOUR_EXAMPLE_SCRIPT +uv run run-eval.py -p "A simple math addition" -a -v YOUR_EXAMPLE_SCRIPT ``` Your script needs to follow any of the foundation examples pattern. diff --git a/scripts/evals/eval.py b/scripts/evals/eval.py index 71d5c9e9e..fba701f9c 100644 --- a/scripts/evals/eval.py +++ b/scripts/evals/eval.py @@ -176,7 +176,7 @@ async def run_example_pipeline(script_path: Path): ), ) - await module.run_example(transport, argparse.Namespace(), True) + await module.run_bot(transport) async def run_eval_pipeline(