Skip to content

Commit 981cb8f

Browse files
committed
add exception test for imputer
1 parent f809463 commit 981cb8f

File tree

4 files changed

+134
-81
lines changed

4 files changed

+134
-81
lines changed

eda_utils_py/eda_utils_py.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ def imputer(df, strategy="mean", fill_value=None):
5252
if not isinstance(strategy, str):
5353
raise TypeError("strategy must be of type str")
5454

55-
# Tests whether input strategy is of type str
55+
# Tests whether input fill_value is of type numbers or None
5656
if not isinstance(fill_value, type(None)) and not isinstance(
5757
fill_value, numbers.Number
5858
):

poetry.lock

Lines changed: 9 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ license = "MIT"
88
[tool.poetry.dependencies]
99
python = "^3.8"
1010
pandas = "^1.2.2"
11+
altair = "^4.1.0"
1112

1213
[tool.poetry.dev-dependencies]
1314
Sphinx = "^3.5.1"

tests/test_eda_utils_py.py

Lines changed: 123 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -3,95 +3,147 @@
33
from pytest import raises
44
import pandas as pd
55

6+
67
def test_version():
7-
assert __version__ == '0.1.0'
8-
9-
8+
assert __version__ == "0.1.0"
9+
10+
1011
def test_imputer():
11-
data = pd.DataFrame({
12-
'col1':[None, 4, 4, 7],
13-
'col2':[2, None, None, 2],
14-
'col3':[3, None, 6, 6]
15-
})
16-
17-
imp_mean = pd.DataFrame({
18-
'col1':[5, 4, 4, 7],
19-
'col2':[2, 2, 2, 2],
20-
'col3':[3, 5, 6, 6]
21-
})
22-
23-
imp_median = pd.DataFrame({
24-
'col1':[4, 4, 4, 7],
25-
'col2':[2, 2, 2, 2],
26-
'col3':[3, 6, 6, 6]
27-
})
28-
29-
imp_most_frequent = pd.DataFrame({
30-
'col1':[4, 4, 4, 7],
31-
'col2':[2, 2, 2, 2],
32-
'col3':[3, 6, 6, 6]
33-
})
34-
35-
imp_constant = pd.DataFrame({
36-
'col1':[1, 4, 4, 7],
37-
'col2':[2, 1, 1, 2],
38-
'col3':[3, 1, 6, 6]
39-
})
40-
assert (eda_utils_py.imputer(data) == imp_mean).all()
41-
assert (eda_utils_py.imputer(data, "median") == imp_median).all()
42-
assert (eda_utils_py.imputer(data, "most_frequent") == imp_most_frequent).all()
43-
assert (eda_utils_py.imputer(data, "constant", 1) == imp_constant).all()
44-
45-
12+
data = pd.DataFrame(
13+
{"col1": [None, 4, 4, 7], "col2": [2, None, None, 2], "col3": [3, None, 6, 6]}
14+
)
15+
16+
imp_mean = pd.DataFrame(
17+
{
18+
"col1": [5.0, 4.0, 4.0, 7.0],
19+
"col2": [2.0, 2.0, 2.0, 2.0],
20+
"col3": [3.0, 5.0, 6.0, 6.0],
21+
}
22+
)
23+
24+
imp_median = pd.DataFrame(
25+
{
26+
"col1": [4.0, 4.0, 4.0, 7.0],
27+
"col2": [2.0, 2.0, 2.0, 2.0],
28+
"col3": [3.0, 6.0, 6.0, 6.0],
29+
}
30+
)
31+
32+
imp_most_frequent = pd.DataFrame(
33+
{
34+
"col1": [4.0, 4.0, 4.0, 7.0],
35+
"col2": [2.0, 2.0, 2.0, 2.0],
36+
"col3": [3.0, 6.0, 6.0, 6.0],
37+
}
38+
)
39+
40+
imp_constant = pd.DataFrame(
41+
{
42+
"col1": [1.0, 4.0, 4.0, 7.0],
43+
"col2": [2.0, 1.0, 1.0, 2.0],
44+
"col3": [3.0, 1.0, 6.0, 6.0],
45+
}
46+
)
47+
48+
# Tests whether data is not of dataframe raises TypeError
49+
with raises(TypeError):
50+
eda_utils_py.imputer([4, None, 4, 7])
51+
52+
# Tests whether strategy of incorrect type raises TypeError
53+
with raises(TypeError):
54+
eda_utils_py.imputer(data, strategy=2)
55+
56+
# Tests whether fill_value of incorrect type raises TypeError
57+
with raises(TypeError):
58+
eda_utils_py.imputer(data, strategy="constant", fill_value="a string")
59+
60+
# Tests whether inconsistency between strategy and fill_value raises Exception
61+
with raises(Exception):
62+
eda_utils_py.imputer(data, strategy="constant", fill_value=None)
63+
64+
# Tests whether inconsistency between strategy and fill_value raises Exception
65+
with raises(Exception):
66+
eda_utils_py.imputer(data, strategy="median", fill_value=3)
67+
68+
assert pd.DataFrame.equals(
69+
eda_utils_py.imputer(data), imp_mean
70+
), "The returned dataframe using mean inputer is not correct"
71+
assert pd.DataFrame.equals(
72+
eda_utils_py.imputer(data, "median"), imp_median
73+
), "The returned dataframe using median inputer is not correct"
74+
assert pd.DataFrame.equals(
75+
eda_utils_py.imputer(data, "most_frequent"), imp_most_frequent
76+
), "The returned dataframe using most_frequent inputer is not correct"
77+
assert pd.DataFrame.equals(
78+
eda_utils_py.imputer(data, "constant", 1), imp_constant
79+
), "The returned dataframe using constant imputer is not correct"
80+
4681

4782
def test_cor_map():
48-
83+
4984
"""
5085
A function to test whether the output of cor_map() is correct.
5186
"""
52-
53-
data = pd.DataFrame({
54-
'SepalLengthCm':[5.1, 4.9, 4.7],
55-
'SepalWidthCm':[1.4, 1.4, 1.3],
56-
'PetalWidthCm':[0.2, 0.1, 0.2],
57-
'Species':['Iris-setosa', 'Iris-virginica', 'Iris-germanica']
58-
})
59-
60-
num_col_test = ['SepalLengthCm', 'SepalWidthCm', 'PetalWidthCm']
61-
62-
plot = eda_utils_py.cor_map(data, num_col_test, 'redblue')
63-
87+
88+
data = pd.DataFrame(
89+
{
90+
"SepalLengthCm": [5.1, 4.9, 4.7],
91+
"SepalWidthCm": [1.4, 1.4, 1.3],
92+
"PetalWidthCm": [0.2, 0.1, 0.2],
93+
"Species": ["Iris-setosa", "Iris-virginica", "Iris-germanica"],
94+
}
95+
)
96+
97+
num_col_test = ["SepalLengthCm", "SepalWidthCm", "PetalWidthCm"]
98+
99+
plot = eda_utils_py.cor_map(data, num_col_test, "redblue")
100+
64101
# Tests whether output is of Altair type
65102
assert "altair" in str(type(plot)), "Output is not of Altair type"
66-
103+
67104
# Tests whether or not there are NaNs produced in the correlation values
68-
assert plot.data['cor'].isnull().sum() == 0, "There are NaN produced as correlation values"
69-
105+
assert (
106+
plot.data["cor"].isnull().sum() == 0
107+
), "There are NaN produced as correlation values"
108+
70109
# Tests whether plot output scheme is one of the three given color schemes
71-
plot_dict = plot.to_dict()
72-
assert plot_dict["layer"][0]['encoding']['color']['scale']['scheme'] in ('purpleorange','blueorange', 'redblue'), "The plot color scheme is not one of the expected schemes"
73-
110+
plot_dict = plot.to_dict()
111+
assert plot_dict["layer"][0]["encoding"]["color"]["scale"]["scheme"] in (
112+
"purpleorange",
113+
"blueorange",
114+
"redblue",
115+
), "The plot color scheme is not one of the expected schemes"
116+
74117
# Tests whether heatmap portion of plot is mark_rect()
75-
assert plot_dict["layer"][0]['mark'] == 'rect', "mark should be rect"
76-
118+
assert plot_dict["layer"][0]["mark"] == "rect", "mark should be rect"
119+
77120
# Tests whether heatmap and correlation values have the same referenced var column
78-
assert plot_dict['layer'][0]['encoding']['x']['field'] == plot_dict['layer'][1]['encoding']['x']['field'], "The heatmap and the correlation values are not referring to the same corresponding underlying variable x"
79-
assert plot_dict['layer'][0]['encoding']['y']['field'] == plot_dict['layer'][1]['encoding']['y']['field'], "The heatmap and the correlation values are not referring to the same corresponding underlying variable y"
80-
121+
assert (
122+
plot_dict["layer"][0]["encoding"]["x"]["field"]
123+
== plot_dict["layer"][1]["encoding"]["x"]["field"]
124+
), "The heatmap and the correlation values are not referring to the same corresponding underlying variable x"
125+
assert (
126+
plot_dict["layer"][0]["encoding"]["y"]["field"]
127+
== plot_dict["layer"][1]["encoding"]["y"]["field"]
128+
), "The heatmap and the correlation values are not referring to the same corresponding underlying variable y"
129+
81130
# Tests whether axes is using correct calculated var column as reference
82-
assert plot_dict['layer'][0]['encoding']['x']['field'] == 'var1', "x should be referring to var1"
83-
assert plot_dict['layer'][0]['encoding']['y']['field'] == 'var2', "y should be referring to var2"
84-
85-
131+
assert (
132+
plot_dict["layer"][0]["encoding"]["x"]["field"] == "var1"
133+
), "x should be referring to var1"
134+
assert (
135+
plot_dict["layer"][0]["encoding"]["y"]["field"] == "var2"
136+
), "y should be referring to var2"
137+
86138
# Testing the Exception Errors
87139
data2 = data.copy().to_csv()
88140

89141
num_col_test1 = (1, 2, 3, 4)
90-
num_col_test2 = [1, 2, 3, 'SepalLengthCm']
91-
num_col_test3 = ['hi', 'hey', 'hi']
92-
num_col_test4 = ['SepalLengthCm', 'SepalWidthCm', 'PetalWidthCm', 'Species']
142+
num_col_test2 = [1, 2, 3, "SepalLengthCm"]
143+
num_col_test3 = ["hi", "hey", "hi"]
144+
num_col_test4 = ["SepalLengthCm", "SepalWidthCm", "PetalWidthCm", "Species"]
93145
col_scheme_test = 3
94-
146+
95147
# Tests whether data is not of dataframe raises TypeError
96148
with raises(TypeError):
97149
eda_utils_py.cor_map(data2, num_col_test)
@@ -106,7 +158,7 @@ def test_cor_map():
106158

107159
# Tests whether inputting unallowed col_scheme raises Exception
108160
with raises(Exception):
109-
eda_utils_py.cor_map(data, num_col_test, 'bluegreen')
161+
eda_utils_py.cor_map(data, num_col_test, "bluegreen")
110162

111163
# Tests whether inputting unallowed col_scheme raises TypeError
112164
with raises(TypeError):

0 commit comments

Comments
 (0)