Don't crash when benchmark contains unknown metric (model-checking#2985)

Previously, the code assumed that all metrics of the benchmark results would be 'declared' in the "metrics" key of the parser output. This commit ensures that if a parser emits a benchmark containing a metric that hasn't been declared in the "metrics" key, that result is ignored and benchcomp does not crash.
celinval · Feb 1, 2024 · e09993b · e09993b
1 parent 769561c
commit e09993b
Show file tree

Hide file tree

Showing 2 changed files with 77 additions and 1 deletion.
diff --git a/tools/benchcomp/benchcomp/visualizers/__init__.py b/tools/benchcomp/benchcomp/visualizers/__init__.py
@@ -6,6 +6,7 @@
 import json
 import logging
 import subprocess
+import sys
 import textwrap
 
 import jinja2
@@ -232,12 +233,19 @@ def _organize_results_into_metrics(results):
         for bench, bench_result in results["benchmarks"].items():
             for variant, variant_result in bench_result["variants"].items():
                 for metric, value in variant_result["metrics"].items():
+                    if metric not in ret:
+                        ret[metric] = {}
+                        logging.warning(
+                            "Benchmark '%s' contained a metric '%s' in the "
+                            "'%s' variant result that was not declared in "
+                            "the 'metrics' dict. Add '%s: {}' to the metrics "
+                            "dict", bench, metric, variant, metric)
                     try:
                         ret[metric][bench][variant] = variant_result["metrics"][metric]
                     except KeyError:
                         ret[metric][bench] = {
                             variant: variant_result["metrics"][metric]
-                    }
+                        }
         return ret
 
 

diff --git a/tools/benchcomp/test/test_regression.py b/tools/benchcomp/test/test_regression.py
@@ -6,6 +6,7 @@
 # tests.
 
 import pathlib
+import re
 import subprocess
 import tempfile
 import textwrap
@@ -834,3 +835,70 @@ def test_run_failing_command_visualization(self):
             run_bc()
             self.assertNotEqual(
                 run_bc.proc.returncode, 0, msg=run_bc.stderr)
+
+
+    def test_unknown_metric_in_benchmark(self):
+        """Ensure that benchcomp continues with warning if a benchmark result contained an unknown metric"""
+
+        with tempfile.TemporaryDirectory() as tmp:
+            out_file = pathlib.Path(tmp) / str(uuid.uuid4())
+            run_bc = Benchcomp({
+                "variants": {
+                    "v1": {
+                        "config": {
+                            "command_line": "true",
+                            "directory": tmp,
+                        }
+                    },
+                    "v2": {
+                        "config": {
+                            "command_line": "true",
+                            "directory": tmp,
+                        }
+                    }
+                },
+                "run": {
+                    "suites": {
+                        "suite_1": {
+                            "parser": {
+                                "command": """
+                                    echo '{
+                                      metrics: {
+                                        foo: {},
+                                        bar: {},
+                                      },
+                                      benchmarks: {
+                                        bench_1: {
+                                          metrics: {
+                                            baz: 11
+                                          }
+                                        }
+                                      }
+                                    }'
+                                """
+                            },
+                            "variants": ["v2", "v1"]
+                        }
+                    }
+                },
+                "visualize": [{
+                    "type": "dump_markdown_results_table",
+                    "out_file": "-",
+                    "extra_columns": {},
+                }],
+            })
+
+            output_pat = re.compile(
+                "Benchmark 'bench_1' contained a metric 'baz' in the 'v1' "
+                "variant result that was not declared in the 'metrics' dict.")
+
+            run_bc()
+            self.assertRegex(run_bc.stderr, output_pat)
+
+            self.assertEqual(run_bc.proc.returncode, 0, msg=run_bc.stderr)
+
+            with open(run_bc.working_directory / "result.yaml") as handle:
+                result = yaml.safe_load(handle)
+
+            for item in ["benchmarks", "metrics"]:
+                self.assertIn(item, result)