@@ -24,72 +24,77 @@ def setUpClass(cls):
2424
2525 def test_basic_correct_response (self ):
2626 """Test if semantically similar responses are marked correct."""
27- response = [ "Density" , " Velocity" , " Viscosity" , " Length"]
28- answer = [ "Density" , " Velocity" , " Viscosity" , " Length"]
27+ response = "Density; Velocity; Viscosity; Length"
28+ answer = "Density; Velocity; Viscosity; Length"
2929 result = evaluation_function (response , answer , self .param )
3030
3131 self .assertTrue (result .get ("is_correct" ))
3232
3333 def test_basic_incorrect_response (self ):
3434 """Test if semantically different responses are marked incorrect."""
35- response = [ "Mass" , " Speed" , " Friction" , " Force"]
36- answer = [ "Density" , " Velocity" , " Viscosity" , " Length"]
35+ response = "Mass; Speed; Friction; Force"
36+ answer = "Density; Velocity; Viscosity; Length"
3737 result = evaluation_function (response , answer , self .param )
3838
39+
3940 self .assertFalse (result .get ("is_correct" ))
4041
4142 def test_partial_match (self ):
4243 """Test if a response too short is marked incorrect."""
43- response = [ "Density" , " Velocity" , " Viscosity"]
44- answer = [ "Density" , " Velocity" , " Viscosity" , " Length"]
44+ response = "Density; Velocity; Viscosity"
45+ answer = "Density; Velocity; Viscosity; Length"
4546
4647 self .param .response_num_required = 4
4748 result = evaluation_function (response , answer , self .param )
49+
4850 self .param .response_num_required = 0
49-
50- self .assertFalse (result .get ("is_correct" ))
5151
52+ self .assertFalse (result .get ("is_correct" ))
5253
5354 def test_synonyms_match (self ):
54- """Test if abbriviations are correctly identified."""
55- response = [ ' velocity' ]
56- answer = [ ' speed' ]
55+ """Test if abbreviations are correctly identified."""
56+ response = " velocity"
57+ answer = " speed"
5758 result = evaluation_function (response , answer , self .param )
5859
60+
5961 self .assertTrue (result .get ("is_correct" ))
6062
6163 def test_exact_match_requirement (self ):
6264 """Test enforcing exact match on keystrings."""
63- response = [ "density" , " speed" , " viscosity" , " length"]
64- answer = [ "Density" , " Velocity" , " Viscosity" , " Length"]
65+ response = "density; speed; viscosity; length"
66+ answer = "Density; Velocity; Viscosity; Length"
6567
6668 result = evaluation_function (response , answer , self .param )
69+
6770 self .assertTrue (result .get ("is_correct" ))
6871
6972 def test_should_not_contain (self ):
7073 """Test if a response with a prohibited keyword fails."""
71- response = [ "density" , " velocity" , " viscosity" , " length" , " direction"]
72- answer = [ "Density" , " Velocity" , " Viscosity" , " Length"]
74+ response = "density; velocity; viscosity; length; direction"
75+ answer = "Density; Velocity; Viscosity; Length"
7376
7477 result = evaluation_function (response , answer , self .param )
75- self .assertFalse (result .get ("is_correct" ))
7678
79+ self .assertFalse (result .get ("is_correct" ))
7780
7881 def test_negation_handling (self ):
7982 """Test how the model handles negation."""
80- response = [ "not light blue" , " dark blue"]
81- answer = [ "light blue" ]
83+ response = "not light blue; dark blue"
84+ answer = "light blue"
8285
8386 result = evaluation_function (response , answer , self .param )
8487
88+
8589 self .assertFalse (result .get ("is_correct" ))
8690
8791 def test_performance (self ):
8892 """Ensure that processing time is reasonable."""
89- response = [ "Density" , " Velocity" , " Viscosity" , " Length"]
90- answer = [ "Density" , " Velocity" , " Viscosity" , " Length"]
93+ response = "Density; Velocity; Viscosity; Length"
94+ answer = "Density; Velocity; Viscosity; Length"
9195
9296 result = evaluation_function (response , answer , self .param )
97+
9398 processing_time = result .get ("result" , {}).get ("processing_time" , 0 )
9499
95100 self .assertLess (processing_time , 5 , msg = "Evaluation function should run efficiently." )
0 commit comments