1-
21def cor_map (dataframe , num_col ):
32 """
43 A function to implement a correlation heatmap including coefficients based on given numeric columns of a data frame.
@@ -28,7 +27,7 @@ def cor_map(dataframe, num_col):
2827 """
2928
3029
31- def outlier_identifier (dataframe , columns = None , method = "somefunction" ):
30+ def outlier_identifier (dataframe , columns = None , method = "somefunction" ):
3231 """
3332 A function that identify and deal with outliers based on the method the user choose
3433
@@ -54,4 +53,36 @@ def outlier_identifier(dataframe, columns = None, method = "somefunction"):
5453
5554 outlier_identifier(data)
5655
57- """
56+ """
57+
58+
59+ def scale (dataframe , columns = None ):
60+ """
61+ A function to scale features by removing the mean and scaling to unit variance
62+ .
63+
64+ Args:
65+ dataframe (pandas.DataFrame): The data frame to be used for EDA.
66+ columns (list): A list of string of column names with numeric data from the data frame that we wish to scale.
67+
68+ Returns:
69+ dataframe :
70+ The scaled dataframe for numerical features
71+
72+ Examples:
73+ import pandas as pd
74+ from eda_utils_py import scale
75+
76+ data = pd.DataFrame({
77+ 'SepalLengthCm':[5.1, 4.9, 4.7],
78+ 'SepalWidthCm':[1.4, 1.4, 1.3],
79+ 'PetalWidthCm:[0.2, 0.2, 0.2],
80+ 'Species':['Iris-setosa','Iris-virginica']
81+ })
82+
83+ numerical_columns = ['SepalLengthCm','SepalWidthCm','PetalWidthCm']
84+
85+ scale(data, numerical_columns)
86+
87+ """
88+ pass
0 commit comments