Skip to content

Commit cc368f7

Browse files
Custom feedback for arbitrary incorrect answer can now be specified
1 parent fc2803a commit cc368f7

File tree

2 files changed

+27
-6
lines changed

2 files changed

+27
-6
lines changed

app/evaluation.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,27 +28,39 @@ def evaluation_function(response, answer, params) -> dict:
2828
raise Exception("Answer has empty fields.")
2929
response_ok = process_element(response)
3030
if not response_ok:
31-
return {"is_correct": False, "feedback": "Response has empty fields."}
31+
return {
32+
"is_correct": False,
33+
"feedback": "Response has empty fields."
34+
}
3235

3336
try:
3437
res = np.array(response, dtype=np.float32)
3538
except Exception as e:
36-
raise EvaluationException(f"Failed to parse user response",
37-
detail=repr(e))
39+
raise EvaluationException(
40+
f"Failed to parse user response",
41+
detail=repr(e)
42+
)
3843

3944
try:
4045
ans = np.array(answer, dtype=np.float32)
4146
except Exception as e:
42-
raise EvaluationException(f"Failed to parse correct answer",
43-
detail=repr(e))
47+
raise EvaluationException(
48+
f"Failed to parse correct answer",
49+
detail=repr(e)
50+
)
4451

4552
rtol = params.get("rtol", 0)
4653
atol = params.get("atol", 0)
4754

4855
is_correct = np.allclose(res, ans, rtol=rtol, atol=atol)
4956

50-
# TODO: If incorrect, could compute which cells are, and return as feedback
57+
if is_correct is False and params.get("feedback_for_incorrect_case", None) is not None:
58+
return {
59+
"is_correct": is_correct,
60+
"feedback": params["feedback_for_incorrect_case"]
61+
}
5162

63+
# TODO: If incorrect, could compute which cells are, and return as feedback
5264
return {"is_correct": is_correct}
5365

5466
def process_element(element):

app/evaluation_tests.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,15 @@ def test_2D_incorrect(self):
114114

115115
self.assertEqual(response.get("is_correct"), False)
116116

117+
def test_2D_incorrect_with_custom_feedback(self):
118+
response = [[1, 1], [1, 1]]
119+
answer = [[1, 1], [1, 0]]
120+
121+
response = evaluation_function(response, answer, {"feedback_for_incorrect_case": "Custom feedback"})
122+
123+
self.assertEqual(response.get("is_correct"), False)
124+
self.assertEqual(response["feedback"], "Custom feedback")
125+
117126
# def test_3D_correct(self):
118127
# response = [[[1, 1], [2, 1]], [[2, 1.2], [2, 2]]],
119128
# answer = [[[1, 1], [2, 1.1]], [[2, 1], [2, 2]]]

0 commit comments

Comments
 (0)