@@ -279,6 +279,18 @@ def test_outlier_identifier():
279279 'Species' :['Iris-setosa' , 'Iris-virginica' , 'Iris-germanica' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' ]
280280 })
281281
282+ numeric_only_df = pd .DataFrame ({
283+ 'SepalLengthCm' : [5.1 , 4.9 , 4.7 , 5.5 , 5.1 , 50 , 5.4 , 5.0 , 5.2 , 5.3 , 5.1 ],
284+ 'SepalWidthCm' : [1.4 , 1.4 , 20 , 2.0 , 0.7 , 1.6 , 1.2 , 1.4 , 1.8 , 1.5 , 2.1 ],
285+ 'PetalWidthCm' : [0.2 , 0.2 , 0.2 , 0.3 , 0.4 , 0.5 , 0.5 , 0.6 , 0.4 , 0.2 , 5 ]
286+ })
287+
288+ numeric_only_out = pd .DataFrame ({
289+ 'SepalLengthCm' : [5.1 , 4.9 , 5.5 , 5.1 , 5.4 , 5.0 , 5.2 , 5.3 ],
290+ 'SepalWidthCm' : [1.4 , 1.4 , 2.0 , 0.7 , 1.2 , 1.4 , 1.8 , 1.5 ],
291+ 'PetalWidthCm' :[0.2 , 0.2 , 0.3 , 0.4 , 0.5 , 0.6 , 0.4 , 0.2 ],
292+ })
293+
282294 # Test if the imput is not dataFrame
283295 with raises (TypeError ):
284296 eda_utils_py .outlier_identifier ("not dataframe" )
@@ -291,6 +303,10 @@ def test_outlier_identifier():
291303 with raises (Exception ):
292304 eda_utils_py .outlier_identifier (test_df , columns = ["not in" ])
293305
306+ # Test if dataframe contains non-numeric column, but the user want to do it for all columns.
307+ with raises (Exception ):
308+ eda_utils_py .outlier_identifier (test_df , method = "trim" )
309+
294310 # Test if method input is not one of three methods provided
295311 with raises (Exception ):
296312 eda_utils_py .outlier_identifier (test_df , columns = ["SepalLengthCm" ], method = "no" )
@@ -311,3 +327,6 @@ def test_outlier_identifier():
311327 assert pd .DataFrame .equals (
312328 eda_utils_py .outlier_identifier (test_df , columns = ["SepalLengthCm" ], method = "mean" ), column_output
313329 ), "The selected column method is not correct"
330+ assert pd .DataFrame .equals (
331+ eda_utils_py .outlier_identifier (numeric_only_df , method = "trim" ), numeric_only_out
332+ ), "The numeric only method is not correct"
0 commit comments