55import numpy as np
66
77
8+
89def imputer (df , strategy = "mean" , fill_value = None ):
910 """
1011 A function to implement imputation functionality for completing missing values.
@@ -67,6 +68,7 @@ def imputer(df, strategy="mean", fill_value=None):
6768 if isinstance (fill_value , type (None )) and strategy == "constant" :
6869 raise Exception ("fill_value should be a number when strategy is 'constant'" )
6970
71+
7072 result = pd .DataFrame ()
7173 if strategy == "mean" :
7274 result = df .apply (lambda x : x .fillna (x .mean ()), axis = 0 )
@@ -185,22 +187,23 @@ def outlier_identifier(dataframe, columns=None, method="trim"):
185187 A function that identify by z-test with threshold of 3, and deal with outliers based on the method the user choose
186188
187189 Parameters
188- ----------
190+ ----------
189191 dataframe : pandas.core.frame.DataFrame
190192 The target dataframe where the function is performed.
191193 columns : list, default=None
192194 The target columns where the function needed to be performed. Defualt is None, the function will check all columns
193195 method : string
194- The method of dealing with outliers.
196+ The method of dealing with outliers.
195197 - if "trim" : we completely remove data points that are outliers.
196198 - if "median" : we replace outliers with median values
197199 - if "mean" : we replace outliers with mean values
198200
201+
199202 Returns
200203 -------
201204 pandas.core.frame.DataFrame
202205 a dataframe which the outlier has already process by the chosen method
203-
206+
204207 Examples
205208 --------
206209 >>> import pandas as pd
@@ -215,6 +218,7 @@ def outlier_identifier(dataframe, columns=None, method="trim"):
215218
216219 >>> outlier_identifier(data)
217220
221+
218222 """
219223 if not isinstance (dataframe , pd .DataFrame ):
220224 raise TypeError ("The argument @dataframe must be of pd.DataFrame" )
@@ -237,7 +241,6 @@ def outlier_identifier(dataframe, columns=None, method="trim"):
237241
238242 if method not in ("trim" , "median" , "mean" ):
239243 raise Exception ("The method must be -trim- or -median- or -mean-" )
240-
241244
242245 df = dataframe .copy ()
243246 target_columns = []
@@ -273,6 +276,8 @@ def outlier_identifier(dataframe, columns=None, method="trim"):
273276 return df
274277
275278
279+
280+
276281def scale (dataframe , columns = None , scaler = "standard" ):
277282 """
278283 A function to scale features either by using standard scaler or minmax scaler method
@@ -373,6 +378,7 @@ def _standardize(dataframe):
373378 self : object
374379 Scaled dataset
375380 """
381+
376382 res = dataframe .copy ()
377383 for feature_name in dataframe .columns :
378384 mean = dataframe [feature_name ].mean ()
@@ -407,4 +413,7 @@ def _minmax(dataframe):
407413 max = dataframe [feature_name ].max ()
408414 min = dataframe [feature_name ].min ()
409415 res [feature_name ] = (dataframe [feature_name ] - min ) / (max - min )
410- return res
416+
417+ return res
418+
419+
0 commit comments