11from eda_utils_py import __version__
22from eda_utils_py import eda_utils_py
3+ import pandas as pd
4+ import altair as alt
5+ from pandas .api .types import is_numeric_dtype
6+ import numpy as np
7+
38
49def test_version ():
510 assert __version__ == '0.1.0'
@@ -10,7 +15,7 @@ def test_outlier_identifier():
1015 'SepalLengthCm' : [5.1 , 4.9 , 4.7 , 5.5 , 5.1 , 50 , 5.4 , 5.0 , 5.2 , 5.3 , 5.1 ],
1116 'SepalWidthCm' : [1.4 , 1.4 , 20 , 2.0 , 0.7 , 1.6 , 1.2 , 1.4 , 1.8 , 1.5 , 2.1 ],
1217 'PetalWidthCm' :[0.2 , 0.2 , 0.2 , 0.3 , 0.4 , 0.5 , 0.5 , 0.6 , 0.4 , 0.2 , 5 ],
13- 'Species' :['Iris-setosa' , 'Iris-virginica' , 'Iris-germanica' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' ]
18+ 'Species' :['Iris-setosa' , 'Iris-virginica' , 'Iris-germanica' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' ]
1419 })
1520
1621 test_column = ['SepalLengthCm' , 'SepalWidthCm' , 'PetalWidthCm' ]
@@ -19,59 +24,59 @@ def test_outlier_identifier():
1924 'SepalLengthCm' : [5.1 , 4.9 , 4.7 , 5.5 , 5.1 , 5.1 , 5.4 , 5.0 , 5.2 , 5.3 , 5.1 ],
2025 'SepalWidthCm' : [1.4 , 1.4 , 1.5 , 2.0 , 0.7 , 1.6 , 1.2 , 1.4 , 1.8 , 1.5 , 2.1 ],
2126 'PetalWidthCm' :[0.2 , 0.2 , 0.2 , 0.3 , 0.4 , 0.5 , 0.5 , 0.6 , 0.4 , 0.2 , 0.4 ],
22- 'Species' :['Iris-setosa' , 'Iris-virginica' , 'Iris-germanica' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' ]
27+ 'Species' :['Iris-setosa' , 'Iris-virginica' , 'Iris-germanica' , 'Iris-setosa ' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' ]
2328 })
2429
2530 trim_output = pd .DataFrame ({
2631 'SepalLengthCm' : [5.1 , 4.9 , 5.5 , 5.1 , 5.4 , 5.0 , 5.2 , 5.3 ],
2732 'SepalWidthCm' : [1.4 , 1.4 , 2.0 , 0.7 , 1.2 , 1.4 , 1.8 , 1.5 ],
28- 'PetalWidthCm' :[0.2 , 0.2 , 0.3 , 0.4 0.5 , 0.6 , 0.4 , 0.2 ],
29- 'Species' :['Iris-setosa' , 'Iris-virginica' , 'Iris-germanica ' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' ]
33+ 'PetalWidthCm' :[0.2 , 0.2 , 0.3 , 0.4 , 0.5 , 0.6 , 0.4 , 0.2 ],
34+ 'Species' :['Iris-setosa' , 'Iris-virginica' , 'Iris-setosa' , 'Iris-setosa ' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' ]
3035 })
3136
3237 mean_output = pd .DataFrame ({
3338 'SepalLengthCm' : [5.1 , 4.9 , 4.7 , 5.5 , 5.1 , 9.21 , 5.4 , 5.0 , 5.2 , 5.3 , 5.1 ],
3439 'SepalWidthCm' : [1.4 , 1.4 , 3.19 , 2.0 , 0.7 , 1.6 , 1.2 , 1.4 , 1.8 , 1.5 , 2.1 ],
3540 'PetalWidthCm' :[0.2 , 0.2 , 0.2 , 0.3 , 0.4 , 0.5 , 0.5 , 0.6 , 0.4 , 0.2 , 0.77 ],
36- 'Species' :['Iris-setosa' , 'Iris-virginica' , 'Iris-germanica' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' ]
41+ 'Species' :['Iris-setosa' , 'Iris-virginica' , 'Iris-germanica' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' ]
3742 })
3843
3944 column_output = pd .DataFrame ({
4045 'SepalLengthCm' : [5.1 , 4.9 , 4.7 , 5.5 , 5.1 , 9.21 , 5.4 , 5.0 , 5.2 , 5.3 , 5.1 ],
4146 'SepalWidthCm' : [1.4 , 1.4 , 20 , 2.0 , 0.7 , 1.6 , 1.2 , 1.4 , 1.8 , 1.5 , 2.1 ],
4247 'PetalWidthCm' :[0.2 , 0.2 , 0.2 , 0.3 , 0.4 , 0.5 , 0.5 , 0.6 , 0.4 , 0.2 , 5 ],
43- 'Species' :['Iris-setosa' , 'Iris-virginica' , 'Iris-germanica' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' ]
48+ 'Species' :['Iris-setosa' , 'Iris-virginica' , 'Iris-germanica' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' , 'Iris-setosa' ]
4449 })
4550
4651 # Test if the imput is not dataFrame
47- with raises (TypeError ):
52+ with raise (TypeError ):
4853 eda_utils_py .outlier_identifier ("not dataframe" )
4954
5055 # Test if columns input is not list
51- with raises (TypeError ):
56+ with raise (TypeError ):
5257 eda_utils_py .outlier_identifier (test_df , columns = 2 )
5358
5459 # Test if input column list is in the dataframe
55- with raises (TypeError ):
60+ with raise (TypeError ):
5661 eda_utils_py .outlier_identifier (test_df , columns = ["not in" ])
5762
5863 # Test if method input is not one of three methods provided
59- with raises (TypeError ):
64+ with raise (TypeError ):
6065 eda_utils_py .outlier_identifier (test_df , columns = ["SepalLengthCm" ], method = "no" )
6166
6267 # Test if column selected included non-numeric columns
63- with raises (Exception ):
68+ with raise (Exception ):
6469 eda_utils_py .outlier_identifier (test_df , columns = ["Species" ])
6570
6671 assert pd .DataFrame .equals (
67- eda_utils_py . outlier_identifier (test_df ), trim_output
72+ outlier_identifier (test_df , test_column ), trim_output
6873 ), "Default test not pass"
6974 assert pd .DataFrame .equals (
70- eda_utils_py . outlier_identifier (data , method = "median" ), median_output
75+ outlier_identifier (test_df , test_column , method = "median" ), median_output
7176 ), "The median method is not correct"
7277 assert pd .DataFrame .equals (
73- eda_utils_py . outlier_identifier (data , method = "mean" ), mean_output
78+ outlier_identifier (test_df , test_column , method = "mean" ), mean_output
7479 ), "The mean method is not correct"
7580 assert pd .DataFrame .equals (
76- eda_utils_py . outlier_identifier (data , columns = ["SepalLengthCm" ], method = "mean" ), column_output
77- ), "The selected column method is not correct"
81+ outlier_identifier (test_df , columns = ["SepalLengthCm" ], method = "mean" ), column_output
82+ ), "The selected column method is not correct"
0 commit comments