Skip to content

Commit 54b914e

Browse files
authored
Merge branch 'main' into main
2 parents 67363ce + 325b7b0 commit 54b914e

File tree

4 files changed

+293
-8
lines changed

4 files changed

+293
-8
lines changed

eda_utils_py/eda_utils_py.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import numpy as np
66

77

8+
89
def imputer(df, strategy="mean", fill_value=None):
910
"""
1011
A function to implement imputation functionality for completing missing values.
@@ -67,6 +68,7 @@ def imputer(df, strategy="mean", fill_value=None):
6768
if isinstance(fill_value, type(None)) and strategy == "constant":
6869
raise Exception("fill_value should be a number when strategy is 'constant'")
6970

71+
7072
result = pd.DataFrame()
7173
if strategy == "mean":
7274
result = df.apply(lambda x: x.fillna(x.mean()), axis=0)
@@ -185,22 +187,23 @@ def outlier_identifier(dataframe, columns=None, method="trim"):
185187
A function that identify by z-test with threshold of 3, and deal with outliers based on the method the user choose
186188
187189
Parameters
188-
----------
190+
----------
189191
dataframe : pandas.core.frame.DataFrame
190192
The target dataframe where the function is performed.
191193
columns : list, default=None
192194
The target columns where the function needed to be performed. Defualt is None, the function will check all columns
193195
method : string
194-
The method of dealing with outliers.
196+
The method of dealing with outliers.
195197
- if "trim" : we completely remove data points that are outliers.
196198
- if "median" : we replace outliers with median values
197199
- if "mean" : we replace outliers with mean values
198200
201+
199202
Returns
200203
-------
201204
pandas.core.frame.DataFrame
202205
a dataframe which the outlier has already process by the chosen method
203-
206+
204207
Examples
205208
--------
206209
>>> import pandas as pd
@@ -215,6 +218,7 @@ def outlier_identifier(dataframe, columns=None, method="trim"):
215218
216219
>>> outlier_identifier(data)
217220
221+
218222
"""
219223
if not isinstance(dataframe, pd.DataFrame):
220224
raise TypeError("The argument @dataframe must be of pd.DataFrame")
@@ -237,7 +241,6 @@ def outlier_identifier(dataframe, columns=None, method="trim"):
237241

238242
if method not in ("trim", "median", "mean"):
239243
raise Exception("The method must be -trim- or -median- or -mean-")
240-
241244

242245
df = dataframe.copy()
243246
target_columns = []
@@ -273,6 +276,8 @@ def outlier_identifier(dataframe, columns=None, method="trim"):
273276
return df
274277

275278

279+
280+
276281
def scale(dataframe, columns=None, scaler="standard"):
277282
"""
278283
A function to scale features either by using standard scaler or minmax scaler method
@@ -373,6 +378,7 @@ def _standardize(dataframe):
373378
self : object
374379
Scaled dataset
375380
"""
381+
376382
res = dataframe.copy()
377383
for feature_name in dataframe.columns:
378384
mean = dataframe[feature_name].mean()
@@ -407,4 +413,7 @@ def _minmax(dataframe):
407413
max = dataframe[feature_name].max()
408414
min = dataframe[feature_name].min()
409415
res[feature_name] = (dataframe[feature_name] - min) / (max - min)
410-
return res
416+
417+
return res
418+
419+

0 commit comments

Comments
 (0)