Switch to more flexible JSON schema

m-hurlow · m-hurlow · commit 169c51cc4d74 · 2024-07-25T13:06:14.000+01:00
diff --git a/eval_tests.json b/eval_tests.json
@@ -1,18 +1,76 @@
 [
     {
-        "response": "A & Test",
-        "answer": "A & Test",
-        "params": {},
-        "is_correct": true,
-        "results": {
-            "response_latex": "A \\cdot \\mathrm{Test}"
-        }
-    },
-    {
-        "response": "A | B",
-        "answer": "A & B",
-        "params": {},
-        "is_correct": false,
-        "feedback": "The expressions are not equal."
+        "title": "Basic expression equality and inequality",
+        "masterContent": "Demonstrates trivial comparisons",
+        "parts": [
+            {
+                "content": "The response and answer are exactly the same, so the response should be considered correct.",
+                "responseAreas": [
+                    {
+                        "preResponseText": "",
+                        "answer": "A & B",
+                        "params": {},
+                        "tests": [
+                            {
+                                "description": "Most basic possible case",
+                                "response": "A & B",
+                                "expectedResult": {
+                                    "is_correct": true,
+                                    "response_latex": "A \\cdot B"
+                                }
+                            }
+                        ]
+                    },
+                    {
+                        "preResponseText": "Multi-character variable names are supported.",
+                        "answer": "A & Test",
+                        "params": {},
+                        "tests": [
+                            {
+                                "description": "Works with variable names of any length",
+                                "response": "A & Test",
+                                "expectedResult": {
+                                    "is_correct": true,
+                                    "response_latex": "A \\cdot \\mathrm{Test}"
+                                }
+                            }
+                        ]
+                    }
+                ]
+            },
+            {
+                "content": "",
+                "responseAreas": [
+                    {
+                        "preResponseText": "Transposition of variables:",
+                        "answer": "A & B",
+                        "params": {},
+                        "tests": [
+                            {
+                                "description": "Tests transposed variables are correct",
+                                "response": "B & A",
+                                "expectedResult": {
+                                    "is_correct": true
+                                }
+                            }
+                        ]
+                    },
+                    {
+                        "preResponseText": "Trivially incorrect response:",
+                        "answer": "A & B",
+                        "params": {},
+                        "tests": [
+                            {
+                                "description": "Incorrect results marked as false",
+                                "response": "A | B",
+                                "expectedResult": {
+                                    "is_correct": false
+                                }
+                            }
+                        ]
+                    }
+                ]
+            }
+        ]
     }
 ]
diff --git a/evaluation_function/evaluation_test.py b/evaluation_function/evaluation_test.py
@@ -91,8 +91,7 @@ def test_nor_nand(self):
         self.assertFalse(result.get("feedback"))
 
     def test_complex(self):
-        response, answer, params = "A & B | B & C & (B | C)", "B & (A | C)", Params(
-        )
+        response, answer, params = "A & B | B & C & (B | C)", "B & (A | C)", Params()
 
         result = evaluation_function(response, answer, params).to_dict()
 
diff --git a/evaluation_function/json_tests.py b/evaluation_function/json_tests.py
@@ -5,8 +5,10 @@ def __init__(self, test_dict: dict):
         self.response = test_dict["response"]
         self.answer = test_dict["answer"]
         self.params = test_dict["params"]
-        self.is_correct = test_dict["is_correct"]
-        self.results = test_dict.get("results")
+        expected_result = test_dict["expectedResult"]
+        self.is_correct = expected_result["is_correct"]
+        self.results = expected_result
+        self.desc = test_dict["description"]
 
     def evaluate(self, func) -> dict:
         return func(self.response, self.answer, self.params)
@@ -17,7 +19,7 @@ def compare(self, eval_result: dict) -> tuple[bool, str]:
         if eval_correct != self.is_correct:
             return (
                 False,
-                f"response \"{self.response}\" with answer \"{self.answer}\" was {'' if eval_correct else 'in'}correct: {eval_result['feedback']}."
+                f"response \"{self.response}\" with answer \"{self.answer}\" was {'' if eval_correct else 'in'}correct: {eval_result['feedback']}\nTest description: {self.desc}"
             )
         
         # Are there any other fields in the eval function result that need to be checked?
@@ -31,20 +33,28 @@ def compare(self, eval_result: dict) -> tuple[bool, str]:
                 if actual_result_val != value:
                     return (
                         False,
-                        f"expected {key} = \"{value}\", got {key} = \"{actual_result_val}\""
+                        f"expected {key} = \"{value}\", got {key} = \"{actual_result_val}\"\nTest description: {self.desc}"
                     )
         
         return (True, "")
         
 
 def get_tests_from_json(filename: str) -> list[TestData]:
     out = []
-    tests = []
+    questions = []
     with open(filename, "r") as test_file:
         test_json = test_file.read()
-        tests = json.loads(test_json)
-    for test in tests:  
-        out.append(TestData(test))
+        questions = json.loads(test_json)
+    # Convert the structured test data into a flat list of tests
+    for question in questions:
+        for part in question["parts"]:
+            for response_area in part["responseAreas"]:
+                params = response_area["params"]
+                answer = response_area["answer"]
+                for test in response_area["tests"]:
+                    test.update({"answer": answer})
+                    test.update({"params": params})
+                    out.append(TestData(test))
     
     return out