66import numpy as np
77
88
9-
109def test_version ():
1110 assert __version__ == "0.1.0"
1211
@@ -188,8 +187,10 @@ def test_scaler():
188187 )
189188
190189 mock_df_1_standard = pd .DataFrame (
191- {"col1" : [- 0.3302891295379082 , - 0.8807710121010884 , - 0.8807710121010884 , 0.7706746355884523 , 1.3211565181516325 ],
192- "col2" : [1.714389230829046 , - 0.26375218935831474 , - 0.26375218935831474 , - 0.9231326627541017 , - 0.26375218935831474 ],
190+ {"col1" : [- 0.3302891295379082 , - 0.8807710121010884 , - 0.8807710121010884 , 0.7706746355884523 ,
191+ 1.3211565181516325 ],
192+ "col2" : [1.714389230829046 , - 0.26375218935831474 , - 0.26375218935831474 , - 0.9231326627541017 ,
193+ - 0.26375218935831474 ],
193194 "col3" : [1.0 , - 1.0 , - 1.0 , 1.0 , 0.0 ]}
194195 )
195196
@@ -233,7 +234,6 @@ def test_scaler():
233234 minmax_scaled_mock_df_1 , mock_df_1_minmax
234235 ), "The returned dataframe using standard scaler method is not correct"
235236
236-
237237 assert pd .DataFrame .equals (
238238 standard_scaled_mock_df_2 , mock_df_2_standard
239239 ), "The returned dataframe using most_frequent inputer is not correct"
@@ -242,43 +242,47 @@ def test_scaler():
242242 ), "The returned dataframe using constant imputer is not correct"
243243
244244
245-
246245def test_outlier_identifier ():
247246 test_df = pd .DataFrame ({
248247 'SepalLengthCm' : [5.1 , 4.9 , 4.7 , 5.5 , 5.1 , 50 , 5.4 , 5.0 , 5.2 , 5.3 , 5.1 ],
249248 'SepalWidthCm' : [1.4 , 1.4 , 20 , 2.0 , 0.7 , 1.6 , 1.2 , 1.4 , 1.8 , 1.5 , 2.1 ],
250- 'PetalWidthCm' :[0.2 , 0.2 , 0.2 , 0.3 , 0.4 , 0.5 , 0.5 , 0.6 , 0.4 , 0.2 , 5 ],
251- 'Species' :['Iris-setosa' , 'Iris-virginica' , 'Iris-germanica' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' ]
249+ 'PetalWidthCm' : [0.2 , 0.2 , 0.2 , 0.3 , 0.4 , 0.5 , 0.5 , 0.6 , 0.4 , 0.2 , 5 ],
250+ 'Species' : ['Iris-setosa' , 'Iris-virginica' , 'Iris-germanica' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' ,
251+ 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' ]
252252 })
253253
254254 test_column = ['SepalLengthCm' , 'SepalWidthCm' , 'PetalWidthCm' ]
255255
256256 median_output = pd .DataFrame ({
257257 'SepalLengthCm' : [5.1 , 4.9 , 4.7 , 5.5 , 5.1 , 5.1 , 5.4 , 5.0 , 5.2 , 5.3 , 5.1 ],
258258 'SepalWidthCm' : [1.4 , 1.4 , 1.5 , 2.0 , 0.7 , 1.6 , 1.2 , 1.4 , 1.8 , 1.5 , 2.1 ],
259- 'PetalWidthCm' :[0.2 , 0.2 , 0.2 , 0.3 , 0.4 , 0.5 , 0.5 , 0.6 , 0.4 , 0.2 , 0.4 ],
260- 'Species' :['Iris-setosa' , 'Iris-virginica' , 'Iris-germanica' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' ]
259+ 'PetalWidthCm' : [0.2 , 0.2 , 0.2 , 0.3 , 0.4 , 0.5 , 0.5 , 0.6 , 0.4 , 0.2 , 0.4 ],
260+ 'Species' : ['Iris-setosa' , 'Iris-virginica' , 'Iris-germanica' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' ,
261+ 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' ]
261262 })
262263
263264 trim_output = pd .DataFrame ({
264265 'SepalLengthCm' : [5.1 , 4.9 , 5.5 , 5.1 , 5.4 , 5.0 , 5.2 , 5.3 ],
265266 'SepalWidthCm' : [1.4 , 1.4 , 2.0 , 0.7 , 1.2 , 1.4 , 1.8 , 1.5 ],
266- 'PetalWidthCm' :[0.2 , 0.2 , 0.3 , 0.4 , 0.5 , 0.6 , 0.4 , 0.2 ],
267- 'Species' :['Iris-setosa' , 'Iris-virginica' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' ]
267+ 'PetalWidthCm' : [0.2 , 0.2 , 0.3 , 0.4 , 0.5 , 0.6 , 0.4 , 0.2 ],
268+ 'Species' : ['Iris-setosa' , 'Iris-virginica' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' ,
269+ 'Iris-setosa' , 'Iris-setosa' ]
268270 })
269271
270272 mean_output = pd .DataFrame ({
271273 'SepalLengthCm' : [5.1 , 4.9 , 4.7 , 5.5 , 5.1 , 9.21 , 5.4 , 5.0 , 5.2 , 5.3 , 5.1 ],
272274 'SepalWidthCm' : [1.4 , 1.4 , 3.19 , 2.0 , 0.7 , 1.6 , 1.2 , 1.4 , 1.8 , 1.5 , 2.1 ],
273- 'PetalWidthCm' :[0.2 , 0.2 , 0.2 , 0.3 , 0.4 , 0.5 , 0.5 , 0.6 , 0.4 , 0.2 , 0.77 ],
274- 'Species' :['Iris-setosa' , 'Iris-virginica' , 'Iris-germanica' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' ]
275+ 'PetalWidthCm' : [0.2 , 0.2 , 0.2 , 0.3 , 0.4 , 0.5 , 0.5 , 0.6 , 0.4 , 0.2 , 0.77 ],
276+ 'Species' : ['Iris-setosa' , 'Iris-virginica' , 'Iris-germanica' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' ,
277+ 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' ]
275278 })
276279
277- column_output = pd .DataFrame ({
280+ column_output = pd .DataFrame ({
278281 'SepalLengthCm' : [5.1 , 4.9 , 4.7 , 5.5 , 5.1 , 9.21 , 5.4 , 5.0 , 5.2 , 5.3 , 5.1 ],
279282 'SepalWidthCm' : [1.4 , 1.4 , 20 , 2.0 , 0.7 , 1.6 , 1.2 , 1.4 , 1.8 , 1.5 , 2.1 ],
280- 'PetalWidthCm' :[0.2 , 0.2 , 0.2 , 0.3 , 0.4 , 0.5 , 0.5 , 0.6 , 0.4 , 0.2 , 5 ],
281- 'Species' :['Iris-setosa' , 'Iris-virginica' , 'Iris-germanica' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' ]
283+ 'PetalWidthCm' : [0.2 , 0.2 , 0.2 , 0.3 , 0.4 , 0.5 , 0.5 , 0.6 , 0.4 , 0.2 , 5 ],
284+ 'Species' : ['Iris-setosa' , 'Iris-virginica' , 'Iris-germanica' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' ,
285+ 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' ]
282286 })
283287
284288 # Test if the imput is not dataFrame
@@ -295,7 +299,7 @@ def test_outlier_identifier():
295299
296300 # Test if method input is not one of three methods provided
297301 with raises (Exception ):
298- eda_utils_py .outlier_identifier (test_df , columns = ["SepalLengthCm" ], method = "no" )
302+ eda_utils_py .outlier_identifier (test_df , columns = ["SepalLengthCm" ], method = "no" )
299303
300304 # Test if column selected included non-numeric columns
301305 with raises (Exception ):
@@ -305,12 +309,11 @@ def test_outlier_identifier():
305309 eda_utils_py .outlier_identifier (test_df , test_column ), trim_output
306310 ), "Default test not pass"
307311 assert pd .DataFrame .equals (
308- eda_utils_py .outlier_identifier (test_df , test_column ,method = "median" ), median_output
312+ eda_utils_py .outlier_identifier (test_df , test_column , method = "median" ), median_output
309313 ), "The median method is not correct"
310314 assert pd .DataFrame .equals (
311- eda_utils_py .outlier_identifier (test_df , test_column , method = "mean" ), mean_output
315+ eda_utils_py .outlier_identifier (test_df , test_column , method = "mean" ), mean_output
312316 ), "The mean method is not correct"
313317 assert pd .DataFrame .equals (
314- eda_utils_py .outlier_identifier (test_df , columns = ["SepalLengthCm" ], method = "mean" ), column_output
318+ eda_utils_py .outlier_identifier (test_df , columns = ["SepalLengthCm" ], method = "mean" ), column_output
315319 ), "The selected column method is not correct"
316-
0 commit comments