Skip to content

Commit 8741a03

Browse files
committed
Add specification for scaling function
1 parent c55ffcc commit 8741a03

File tree

1 file changed

+34
-3
lines changed

1 file changed

+34
-3
lines changed

eda_utils_py/eda_utils_py.py

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
21
def cor_map(dataframe, num_col):
32
"""
43
A function to implement a correlation heatmap including coefficients based on given numeric columns of a data frame.
@@ -28,7 +27,7 @@ def cor_map(dataframe, num_col):
2827
"""
2928

3029

31-
def outlier_identifier(dataframe, columns = None, method = "somefunction"):
30+
def outlier_identifier(dataframe, columns=None, method="somefunction"):
3231
"""
3332
A function that identify and deal with outliers based on the method the user choose
3433
@@ -54,4 +53,36 @@ def outlier_identifier(dataframe, columns = None, method = "somefunction"):
5453
5554
outlier_identifier(data)
5655
57-
"""
56+
"""
57+
58+
59+
def scale(dataframe, columns=None):
60+
"""
61+
A function to scale features by removing the mean and scaling to unit variance
62+
.
63+
64+
Args:
65+
dataframe (pandas.DataFrame): The data frame to be used for EDA.
66+
columns (list): A list of string of column names with numeric data from the data frame that we wish to scale.
67+
68+
Returns:
69+
dataframe :
70+
The scaled dataframe for numerical features
71+
72+
Examples:
73+
import pandas as pd
74+
from eda_utils_py import scale
75+
76+
data = pd.DataFrame({
77+
'SepalLengthCm':[5.1, 4.9, 4.7],
78+
'SepalWidthCm':[1.4, 1.4, 1.3],
79+
'PetalWidthCm:[0.2, 0.2, 0.2],
80+
'Species':['Iris-setosa','Iris-virginica']
81+
})
82+
83+
numerical_columns = ['SepalLengthCm','SepalWidthCm','PetalWidthCm']
84+
85+
scale(data, numerical_columns)
86+
87+
"""
88+
pass

0 commit comments

Comments
 (0)