Skip to content

Commit 674caf5

Browse files
committed
Autoformat sctipt and make columns a required argument in scale function
1 parent a34d4b5 commit 674caf5

File tree

1 file changed

+18
-27
lines changed

1 file changed

+18
-27
lines changed

eda_utils_py/eda_utils_py.py

Lines changed: 18 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
import numpy as np
66

77

8-
98
def imputer(df, strategy="mean", fill_value=None):
109
"""
1110
A function to implement imputation functionality for completing missing values.
@@ -68,7 +67,6 @@ def imputer(df, strategy="mean", fill_value=None):
6867
if isinstance(fill_value, type(None)) and strategy == "constant":
6968
raise Exception("fill_value should be a number when strategy is 'constant'")
7069

71-
7270
result = pd.DataFrame()
7371
if strategy == "mean":
7472
result = df.apply(lambda x: x.fillna(x.mean()), axis=0)
@@ -226,59 +224,54 @@ def outlier_identifier(dataframe, columns=None, method="trim"):
226224
if columns is None:
227225
for col in dataframe.columns:
228226
if not is_numeric_dtype(dataframe[col]):
229-
raise Exception("The given dataframe contains column that is not numeric column.")
230-
227+
raise Exception("The given dataframe contains column that is not numeric column.")
228+
231229
if columns is not None:
232230
if not isinstance(columns, list):
233231
raise TypeError("The argument @columns must be of type list")
234-
235-
232+
236233
for col in columns:
237234
if col not in list(dataframe.columns):
238-
raise Exception("The given column list contains column that is not exist in the given dataframe.")
235+
raise Exception("The given column list contains column that is not exist in the given dataframe.")
239236
if not is_numeric_dtype(dataframe[col]):
240237
raise Exception("The given column list contains column that is not numeric column.")
241-
238+
242239
if method not in ("trim", "median", "mean"):
243240
raise Exception("The method must be -trim- or -median- or -mean-")
244-
241+
245242
df = dataframe.copy()
246243
target_columns = []
247-
if(columns is None):
248-
target_columns = list(df.columns.values.tolist())
244+
if (columns is None):
245+
target_columns = list(df.columns.values.tolist())
249246
else:
250247
target_columns = columns
251-
248+
252249
outlier_index = []
253250
for column in target_columns:
254251
current_column = df[column]
255252
mean = np.mean(current_column)
256253
std = np.std(current_column)
257-
threshold = 3
258-
259-
254+
threshold = 3
255+
260256
for i in range(len(current_column)):
261257
current_item = current_column[i]
262258
z = (current_item - mean) / std
263259
if z >= threshold:
264-
if(i not in outlier_index):
260+
if (i not in outlier_index):
265261
outlier_index.append(i)
266-
if(method == "mean"):
262+
if (method == "mean"):
267263
df.at[i, column] = round(mean, 2)
268-
if(method == "median"):
264+
if (method == "median"):
269265
df.at[i, column] = np.median(current_column)
270-
271-
272-
if(method == "trim"):
266+
267+
if (method == "trim"):
273268
df = df.drop(outlier_index)
274-
269+
275270
df.index = range(len(df))
276271
return df
277272

278273

279-
280-
281-
def scale(dataframe, columns=None, scaler="standard"):
274+
def scale(dataframe, columns, scaler="standard"):
282275
"""
283276
A function to scale features either by using standard scaler or minmax scaler method
284277
@@ -415,5 +408,3 @@ def _minmax(dataframe):
415408
res[feature_name] = (dataframe[feature_name] - min) / (max - min)
416409

417410
return res
418-
419-

0 commit comments

Comments
 (0)