22from eda_utils_py import eda_utils_py
33
44def test_version ():
5- assert __version__ == '0.1.0'
5+ assert __version__ == '0.1.0'
6+
7+
8+ def test_outlier_identifier ():
9+ test_df = pd .DataFrame ({
10+ 'SepalLengthCm' : [5.1 , 4.9 , 4.7 , 5.5 , 5.1 , 50 , 5.4 , 5.0 , 5.2 , 5.3 , 5.1 ],
11+ 'SepalWidthCm' : [1.4 , 1.4 , 20 , 2.0 , 0.7 , 1.6 , 1.2 , 1.4 , 1.8 , 1.5 , 2.1 ],
12+ 'PetalWidthCm' :[0.2 , 0.2 , 0.2 , 0.3 , 0.4 , 0.5 , 0.5 , 0.6 , 0.4 , 0.2 , 5 ],
13+ 'Species' :['Iris-setosa' , 'Iris-virginica' , 'Iris-germanica' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' ]
14+ })
15+
16+ test_column = ['SepalLengthCm' , 'SepalWidthCm' , 'PetalWidthCm' ]
17+
18+ median_output = pd .DataFrame ({
19+ 'SepalLengthCm' : [5.1 , 4.9 , 4.7 , 5.5 , 5.1 , 5.1 , 5.4 , 5.0 , 5.2 , 5.3 , 5.1 ],
20+ 'SepalWidthCm' : [1.4 , 1.4 , 1.5 , 2.0 , 0.7 , 1.6 , 1.2 , 1.4 , 1.8 , 1.5 , 2.1 ],
21+ 'PetalWidthCm' :[0.2 , 0.2 , 0.2 , 0.3 , 0.4 , 0.5 , 0.5 , 0.6 , 0.4 , 0.2 , 0.4 ],
22+ 'Species' :['Iris-setosa' , 'Iris-virginica' , 'Iris-germanica' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' ]
23+ })
24+
25+ trim_output = pd .DataFrame ({
26+ 'SepalLengthCm' : [5.1 , 4.9 , 5.5 , 5.1 , 5.4 , 5.0 , 5.2 , 5.3 ],
27+ 'SepalWidthCm' : [1.4 , 1.4 , 2.0 , 0.7 , 1.2 , 1.4 , 1.8 , 1.5 ],
28+ 'PetalWidthCm' :[0.2 , 0.2 , 0.3 , 0.4 0.5 , 0.6 , 0.4 , 0.2 ],
29+ 'Species' :['Iris-setosa' , 'Iris-virginica' , 'Iris-germanica' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' ]
30+ })
31+
32+ mean_output = pd .DataFrame ({
33+ 'SepalLengthCm' : [5.1 , 4.9 , 4.7 , 5.5 , 5.1 , 9.21 , 5.4 , 5.0 , 5.2 , 5.3 , 5.1 ],
34+ 'SepalWidthCm' : [1.4 , 1.4 , 3.19 , 2.0 , 0.7 , 1.6 , 1.2 , 1.4 , 1.8 , 1.5 , 2.1 ],
35+ 'PetalWidthCm' :[0.2 , 0.2 , 0.2 , 0.3 , 0.4 , 0.5 , 0.5 , 0.6 , 0.4 , 0.2 , 0.77 ],
36+ 'Species' :['Iris-setosa' , 'Iris-virginica' , 'Iris-germanica' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' ]
37+ })
38+
39+ column_output = pd .DataFrame ({
40+ 'SepalLengthCm' : [5.1 , 4.9 , 4.7 , 5.5 , 5.1 , 9.21 , 5.4 , 5.0 , 5.2 , 5.3 , 5.1 ],
41+ 'SepalWidthCm' : [1.4 , 1.4 , 20 , 2.0 , 0.7 , 1.6 , 1.2 , 1.4 , 1.8 , 1.5 , 2.1 ],
42+ 'PetalWidthCm' :[0.2 , 0.2 , 0.2 , 0.3 , 0.4 , 0.5 , 0.5 , 0.6 , 0.4 , 0.2 , 5 ],
43+ 'Species' :['Iris-setosa' , 'Iris-virginica' , 'Iris-germanica' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' ]
44+ })
45+
46+ # Test if the imput is not dataFrame
47+ with raises (TypeError ):
48+ eda_utils_py .outlier_identifier ("not dataframe" )
49+
50+ # Test if columns input is not list
51+ with raises (TypeError ):
52+ eda_utils_py .outlier_identifier (test_df , columns = 2 )
53+
54+ # Test if input column list is in the dataframe
55+ with raises (TypeError ):
56+ eda_utils_py .outlier_identifier (test_df , columns = ["not in" ])
57+
58+ # Test if method input is not one of three methods provided
59+ with raises (TypeError ):
60+ eda_utils_py .outlier_identifier (test_df , columns = ["SepalLengthCm" ], method = "no" )
61+
62+ # Test if column selected included non-numeric columns
63+ with raises (Exception ):
64+ eda_utils_py .outlier_identifier (test_df , columns = ["Species" ])
65+
66+ assert pd .DataFrame .equals (
67+ eda_utils_py .outlier_identifier (test_df ), trim_output
68+ ), "Default test not pass"
69+ assert pd .DataFrame .equals (
70+ eda_utils_py .outlier_identifier (data , method = "median" ), median_output
71+ ), "The median method is not correct"
72+ assert pd .DataFrame .equals (
73+ eda_utils_py .outlier_identifier (data , method = "mean" ), mean_output
74+ ), "The mean method is not correct"
75+ assert pd .DataFrame .equals (
76+ eda_utils_py .outlier_identifier (data , columns = ["SepalLengthCm" ], method = "mean" ), column_output
77+ ), "The selected column method is not correct"
0 commit comments