Skip to content

Commit 57cd2c6

Browse files
committed
reformat other methods' specification
1 parent 0894aca commit 57cd2c6

File tree

1 file changed

+101
-67
lines changed

1 file changed

+101
-67
lines changed

eda_utils_py/eda_utils_py.py

Lines changed: 101 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1,112 +1,146 @@
1-
def imputer(dataframe, strategy = "mean", fill_value):
1+
def imputer(dataframe, strategy="mean", fill_value=None):
22
"""
33
A function to implement imputation functionality for completing missing values.
44
55
Parameters
66
----------
7-
dataframe : pandas.DataFrame
7+
dataframe : pandas.core.frame.DataFrame
88
a dataframe that might contain missing data
99
strategy : string, default="mean"
1010
The imputation strategy.
11-
- If “mean”, then replace missing values using the mean along each column. Can only be used with numeric data.
12-
- If “median”, then replace missing values using the median along each column. Can only be used with numeric data.
13-
- If “most_frequent”, then replace missing using the most frequent value along each column. Can be used with strings or numeric data. If there is more than one such value, only the smallest is returned.
14-
- If “constant”, then replace missing values with fill_value. Can be used with strings or numeric data.
11+
- If “mean”, then replace missing values using the mean along each column. Can only be used with numeric data.
12+
- If “median”, then replace missing values using the median along each column. Can only be used with numeric data.
13+
- If “most_frequent”, then replace missing using the most frequent value along each column. Can be used with strings or numeric data. If there is more than one such value, only the smallest is returned.
14+
- If “constant”, then replace missing values with fill_value. Can be used with strings or numeric data.
1515
fill_value : string or numerical value, default=None
1616
When strategy == “constant”, fill_value is used to replace all occurrences of missing_values. If left to the default, fill_value will be 0 when imputing numerical data and “missing_value” for strings or object data types.
1717
1818
Returns
1919
-------
20-
pandas.DataFrame
20+
pandas.core.frame.DataFrame
2121
a dataframe that contains no missing data
22-
"""
22+
23+
Examples
24+
---------
25+
>>> import pandas as pd
26+
>>> from eda_utils_py import cor_map
2327
28+
>>> data = pd.DataFrame({
29+
>>> 'SepalLengthCm':[5.1, 4.9, 4.7],
30+
>>> 'SepalWidthCm':[1.4, 1.4, 1.3],
31+
>>> 'PetalWidthCm':[0.2, None, 0.2]
32+
>>> })
33+
34+
>>> imputer(data, numerical_columns)
35+
SepalLengthCm SepalWidthCm PetalWidthCm
36+
0 5.1 1.4 0.2
37+
1 4.9 1.4 0.2
38+
2 4.7 1.3 0.2
39+
"""
40+
pass
41+
2442

2543
def cor_map(dataframe, num_col):
2644
"""
2745
A function to implement a correlation heatmap including coefficients based on given numeric columns of a data frame.
2846
29-
Args:
30-
dataframe (pandas.DataFrame): The data frame to be used for EDA.
31-
num_col (list): A list of string of column names with numeric data from the data frame.
32-
33-
Returns:
34-
(altair): A correlation heatmap plot with correlation coefficient labels based on the numeric columns specified by user.
35-
36-
Examples:
37-
import pandas as pd
38-
from eda_utils_py import cor_map
47+
Parameters
48+
----------
49+
dataframe : pandas.core.frame.DataFrame
50+
The data frame to be used for EDA.
51+
num_col : list
52+
A list of string of column names with numeric data from the data frame.
3953
40-
data = pd.DataFrame({
41-
'SepalLengthCm':[5.1, 4.9, 4.7],
42-
'SepalWidthCm':[1.4, 1.4, 1.3],
43-
'PetalWidthCm:[0.2, 0.2, 0.2],
44-
'Species':['Iris-setosa','Iris-virginica']
45-
})
54+
Returns
55+
-------
56+
altair.vegalite.v4.api.Chart
57+
A correlation heatmap plot with correlation coefficient labels based on the numeric columns specified by user.
4658
47-
numerical_columns = ['SepalLengthCm','SepalWidthCm','PetalWidthCm']
48-
49-
cor_map(data, numerical_columns)
59+
Examples
60+
---------
61+
>>> import pandas as pd
62+
>>> from eda_utils_py import cor_map
63+
64+
>>> data = pd.DataFrame({
65+
>>> 'SepalLengthCm':[5.1, 4.9, 4.7],
66+
>>> 'SepalWidthCm':[1.4, 1.4, 1.3],
67+
>>> 'PetalWidthCm':[0.2, 0.2, 0.2],
68+
>>> 'Species':['Iris-setosa','Iris-virginica', 'Iris-germanica']
69+
>>> })
70+
71+
>>> numerical_columns = ['SepalLengthCm','SepalWidthCm','PetalWidthCm']
72+
>>> cor_map(data, numerical_columns)
5073
5174
"""
75+
pass
5276

5377

5478
def outlier_identifier(dataframe, columns=None, method="somefunction"):
5579
"""
5680
A function that identify and deal with outliers based on the method the user choose
5781
58-
Key arguments:
59-
dataframe [pandas.DataFrame]:
60-
The target dataframe where the function is performed.
61-
columns [list] : None
62-
The target columns where the function needed to be performed. Defualt is None, the function will check all columns
63-
method [string] : "somefunction"
64-
The method of dealing with outliers.
82+
Parameters
83+
----------
84+
dataframe : pandas.core.frame.DataFrame
85+
The target dataframe where the function is performed.
86+
columns : list, default=None
87+
The target columns where the function needed to be performed. Defualt is None, the function will check all columns
88+
method : string
89+
The method of dealing with outliers.
6590
66-
Returns:
67-
dataframe :
68-
The dataframe which the outlier has already process by the chosen method
91+
Returns
92+
-------
93+
pandas.core.frame.DataFrame
94+
a dataframe which the outlier has already process by the chosen method
6995
70-
Examples:
71-
data = pd.DataFrame({
72-
'SepalLengthCm':[5.1, 4.9, 4.7],
73-
'SepalWidthCm':[1.4, 1.4, 9999999.99],
74-
'PetalWidthCm:[0.2, 0.2, 0.2],
75-
'Species':['Iris-setosa','Iris-virginica']
76-
})
96+
Examples
97+
--------
98+
>>> import pandas as pd
99+
>>> from eda_utils_py import cor_map
100+
101+
>>> data = pd.DataFrame({
102+
>>> 'SepalLengthCm':[5.1, 4.9, 4.7],
103+
>>> 'SepalWidthCm':[1.4, 1.4, 9999999.99],
104+
>>> 'PetalWidthCm:[0.2, 0.2, 0.2],
105+
>>> 'Species':['Iris-setosa', 'Iris-virginica', 'Iris-germanica']
106+
>>> })
77107
78-
outlier_identifier(data)
108+
>>> outlier_identifier(data)
79109
80110
"""
111+
pass
81112

82113

83114
def scale(dataframe, columns=None):
84115
"""
85116
A function to scale features by removing the mean and scaling to unit variance
86-
.
87-
88-
Args:
89-
dataframe (pandas.DataFrame): The data frame to be used for EDA.
90-
columns (list): A list of string of column names with numeric data from the data frame that we wish to scale.
91-
92-
Returns:
93-
dataframe :
94-
The scaled dataframe for numerical features
95-
96-
Examples:
97-
import pandas as pd
98-
from eda_utils_py import scale
99-
100-
data = pd.DataFrame({
101-
'SepalLengthCm':[5.1, 4.9, 4.7],
102-
'SepalWidthCm':[1.4, 1.4, 1.3],
103-
'PetalWidthCm:[0.2, 0.2, 0.2],
104-
'Species':['Iris-setosa','Iris-virginica']
105-
})
106117
107-
numerical_columns = ['SepalLengthCm','SepalWidthCm','PetalWidthCm']
118+
Parameters
119+
----------
120+
dataframe : pandas.DataFrame
121+
The data frame to be used for EDA.
122+
columns : list, default=None
123+
A list of string of column names with numeric data from the data frame that we wish to scale.
108124
109-
scale(data, numerical_columns)
125+
Returns
126+
-------
127+
dataframe : pandas.core.frame.DataFrame
128+
The scaled dataframe for numerical features
110129
130+
Examples
131+
--------
132+
>>> import pandas as pd
133+
>>> from eda_utils_py import scale
134+
135+
>>> data = pd.DataFrame({
136+
>>> 'SepalLengthCm':[5.1, 4.9, 4.7],
137+
>>> 'SepalWidthCm':[1.4, 1.4, 1.3],
138+
>>> 'PetalWidthCm:[0.2, 0.2, 0.2],
139+
>>> 'Species':['Iris-setosa','Iris-virginica', 'Iris-germanica']
140+
>>> })
141+
142+
>>> numerical_columns = ['SepalLengthCm','SepalWidthCm','PetalWidthCm']
143+
144+
>>> scale(data, numerical_columns)
111145
"""
112146
pass

0 commit comments

Comments
 (0)