From 4ec9b23a3d4734b0878be8058beb3e78c6458cae Mon Sep 17 00:00:00 2001 From: Tusharsharma86 Date: Sun, 21 Oct 2018 04:47:13 +0000 Subject: [PATCH 1/4] Done --- __pycache__/__init__.cpython-36.pyc | Bin 159 -> 158 bytes .../__pycache__/__init__.cpython-36.pyc | Bin 170 -> 178 bytes .../__pycache__/build.cpython-36.pyc | Bin 655 -> 638 bytes q01_outlier_removal/build.py | 22 +++++++++++++++++- .../tests/__pycache__/__init__.cpython-36.pyc | Bin 185 -> 184 bytes .../test_q01_outlier_removal.cpython-36.pyc | Bin 1835 -> 1838 bytes .../__pycache__/__init__.cpython-36.pyc | Bin 0 -> 180 bytes .../__pycache__/build.cpython-36.pyc | Bin 0 -> 1138 bytes q02_data_cleaning_all/build.py | 21 +++++++++++++++++ .../tests/__pycache__/__init__.cpython-36.pyc | Bin 0 -> 186 bytes .../test_q02_data_cleaning.cpython-36.pyc | Bin 0 -> 3408 bytes 11 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 q02_data_cleaning_all/__pycache__/__init__.cpython-36.pyc create mode 100644 q02_data_cleaning_all/__pycache__/build.cpython-36.pyc create mode 100644 q02_data_cleaning_all/tests/__pycache__/__init__.cpython-36.pyc create mode 100644 q02_data_cleaning_all/tests/__pycache__/test_q02_data_cleaning.cpython-36.pyc diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index 2e5da7da778c48a9c58d7d055bbdb642fcb66418..c24f22121ae5fbe609004e544d7aa022e7480178 100644 GIT binary patch delta 56 zcmbQwIFFITn3tF9x$~jui5%uC-ufB&xvBagrNtSEMIfA;Xkn%wo?nz*T#%TYs-K*n Jk~%TP8UP0K6J-DZ delta 57 zcmbQoIG>Tjn3tDpxuj9lL=JORAN|nc)S_bj#H5VO;*8Yn;?ks|#N5QZ%)~tXywvje Mw9K5;;)%)D0Qk=nh5!Hn diff --git a/q01_outlier_removal/__pycache__/__init__.cpython-36.pyc b/q01_outlier_removal/__pycache__/__init__.cpython-36.pyc index 2f9a42a105b2b26ec10c60ac4d11fa03f7624d22..9cc9d693e6cbd5a0db37e9fcc3dd6a134bc2bb6c 100644 GIT binary patch delta 56 zcmZ3*xQUU&n3tF9x$~jui5%uC@%kD0xvBagrNtSEMIfA;Xkn%wo?nz*T#%TYs-K*n Jk~%Tk9{>=c6QckC delta 48 zcmdnQxQda(n3tF9Mtoz`L=JPAaQ%$@+*JLP(Z#w<2QhKWAj^(;Vf zXOLVOkf>ouVU%RZVn|_XW~^ld@|fW~CLoUm&SS1&$O6hk)YmX(gJeK@fFe+Fpw1d5 zEaE_wHOvTcwi`K5U!n#{L23QH67N-}d&C-*R_X>tRd z_bWy}BR@A)KcuucBe4jCa}zDh^uzOuvWp86lT-DR^HWkMUuN`|D^ZLQ_1w-#LFj)OYb!Bs@jO+i6%aFKEe+Ft*e_QWCLa34U9I{QWZ z8ctc={QwT)n_Em?@{lLLH%~%-omY3X7ly%S_I~qC3Hc({V7NZS+3qYy@$&lg#YJ;$ zHr`Ma;cy2L$qAOZ0!6b$`|BNA{HXBfCFi)#4D};6%nN?k1z+^i__H&io*QH3( zLOdlWqMdbhlU&F;7t^loKj6F^Vi$qoss^yND!FN5J?%AV#nV>PhHGDnl;2Lu920~k1e&=yxt diff --git a/q01_outlier_removal/build.py b/q01_outlier_removal/build.py index ec278ba..435bfbb 100644 --- a/q01_outlier_removal/build.py +++ b/q01_outlier_removal/build.py @@ -1,8 +1,28 @@ +# %load q01_outlier_removal/build.py # Default imports import pandas as pd loan_data = pd.read_csv('data/loan_prediction_uncleaned.csv') loan_data = loan_data.drop('Loan_ID', 1) +# Function will remove the outliers +def outlier_removal(loan_data): + a = loan_data['ApplicantIncome'].quantile(0.95) # It comes out to be 14583.0 + b = loan_data['CoapplicantIncome'].quantile(0.95) # It comes out to be 4997.4 + c = loan_data['LoanAmount'].quantile(0.95) # It comes out to be 297.8 + loan_data = loan_data.drop(loan_data[loan_data['ApplicantIncome'] > a].index) + loan_data = loan_data.drop(loan_data[loan_data['CoapplicantIncome'] > b].index) + loan_data = loan_data.drop(loan_data[loan_data['LoanAmount'] > c].index) + return loan_data + +outlier_removal(loan_data).shape + + + + + + + + + -# Write your Solution here: diff --git a/q01_outlier_removal/tests/__pycache__/__init__.cpython-36.pyc b/q01_outlier_removal/tests/__pycache__/__init__.cpython-36.pyc index 5a057ffb73694628cef3ed87e03ee3a17f7410bc..8e40ab9f9f2e9ef8dcd9be8b88fea10232468e51 100644 GIT binary patch delta 56 zcmdnVxPy_yn3tF9h4Z24i5%uCsrniDxvBagrNtSEMIfA;Xkn%wo?nz*T#%TYs-K*n Jk~%RZ6aW@p6V3nt delta 57 zcmdnNxRa5?n3tDpxuj9lL=JP+H2u)x)S_bj#H5VO;*8Yn;?ks|#N5QZ%)~tXywvje Mw9K5;;)%(j00?Lk?f?J) diff --git a/q01_outlier_removal/tests/__pycache__/test_q01_outlier_removal.cpython-36.pyc b/q01_outlier_removal/tests/__pycache__/test_q01_outlier_removal.cpython-36.pyc index 4c0b6c7431c6a14108ba9d55dd44059612f144a5..d28d92f985aa505d66a60b582694a4a85549f869 100644 GIT binary patch delta 72 zcmZ3@w~mj~n3tF9h4Z24lM^{taqy+5mZX*Dm-q-GbFCKV;- ZCgx=(=IQ69mdB@M=A;&HR%YU61pp127hM1V diff --git a/q02_data_cleaning_all/__pycache__/__init__.cpython-36.pyc b/q02_data_cleaning_all/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0424e33a1401e2265c10017e98f5c3248fa091a6 GIT binary patch literal 180 zcmXr!<>h+rd?=a$2p)q77+?f49Dul(1xTbY1T$zd`mJOr0tq9CUy1q|`MIh3A*IC` ziA5lsn`mLCAD&;7U0jfuoT{IkpOUJdlb@bhT#}g_UzD0&lv-SznV%P5P?VpQnp~n^ zXkZkdl30=$pPZALn3tKC9-o+#qaPoinU`4-AFo$Xd5gm)H$SB`C)EyQbukb#001h$ BGFSir literal 0 HcmV?d00001 diff --git a/q02_data_cleaning_all/__pycache__/build.cpython-36.pyc b/q02_data_cleaning_all/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..df9daa8be359557344fb9f4e829bad308e72a3a8 GIT binary patch literal 1138 zcmaJ>&2H2%5VoCclHKlZmrH>jdO!%Nm)!yq;(&mPqJjjyv=S)%SSEJ2**bA*dn>le zo(jiacnBn(g$rk2IrSB|z&I;J;eez0?97bqnQtb!-RpG@^w(G0Awu8Lh0%ibLlE-` zfFq72N*6Ouu|N|t2?PcVFCr(Wc=6zcZ!3+Oy>Xhr9LRRjNjr@#luJc7?P4UNNsqTq zaoQLC$uf_4d`e*56@9Pc32%Qu=|BwpJ9zhDH{M0-opXo_?YM-}{agx4*RdN|&GM33 zVJtP3%q{4a)zr#dXsSh_j+s2U#+hX!saQ!XEqI>UT$OZMW>T556%dL81j}+#fLwR=J@@M8gHRDYjg5vPfcJi z`LpL};{}{F34kc{kpT8Tl6~-*2N$%<-qvVC8`eP`@+IEoy|eIjIB(6Pc|1?NwrX0xf-#8>{q8dveAgFupxF_jIk3w7n=tej;2~vy!WV8q2IW z{|Q4cd8d|(poX$!E-au3fQkzj=2B%k@D6R)ZVKj0m&O^hG*i+A9`H+tXD)t8o7%ew z7Epc{&weXl0g5k3UxAZ;yd52=LX7sN=74E01-pA^^g`)FQvo5P3~q+z7(+&R20b&@ z!WiEml~xmx+0oI)E$YLdO~2&jm@+9x@20ur!)oSwf7u3brSX8sDjDDoULz}b1z*8> z6?E&DTwrF#5oKIhF?--5R(bYa%yV6`Lb#{`#u&I%+_lPS0XLAcN-vv3=vG}0JyDVx zAt5w~vSG_@LeMj2RUz|t!~f(mJleQP|M@(IO(q)t2FN8^Fizon&^`qha;>T+@P^6@ haJBKi2f<9GxbG`wRsaV0I!@r};x6d`CSlS`@Gl}aNHqWe literal 0 HcmV?d00001 diff --git a/q02_data_cleaning_all/build.py b/q02_data_cleaning_all/build.py index b56e2bc..0373efa 100644 --- a/q02_data_cleaning_all/build.py +++ b/q02_data_cleaning_all/build.py @@ -1,3 +1,4 @@ +# %load q02_data_cleaning_all/build.py # Default Imports import sys, os sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname('__file__')))) @@ -12,3 +13,23 @@ # Write your solution here : +def data_cleaning(loan_data): + + loan_data['LoanAmount'].fillna(loan_data['LoanAmount'].mean(), inplace = True) + + cat_col = ['Gender', 'Married', 'Dependents', 'Self_Employed', 'Loan_Amount_Term', 'Credit_History'] + for col in cat_col: + loan_data['LoanAmount'].fillna(loan_data[col].mode(), inplace = True) + + X = loan_data.iloc[:,:-1] + y = loan_data.iloc[:,-1:] + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 9) + return X, y, X_train, X_test, y_train, y_test + +data_cleaning(loan_data) +loan_data + + + + + diff --git a/q02_data_cleaning_all/tests/__pycache__/__init__.cpython-36.pyc b/q02_data_cleaning_all/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c80c374caeb9bf1aa8432a6560ad1bb5588cd940 GIT binary patch literal 186 zcmYL@F$w}P5Jj_KA;KQS#t{Xv5V5nfQLwNWLNa9IW|K9OEj*If@EEqILQNr+7M0yLNKhn%irNxYs)9s|s0@_@sy+5GF5pK6-+ zw>EQG(0&NF_yYvhD2X+XfYxI@F+77{pAnl$#j8Nuq?Oo8Y|k$Gt=LJbUX^H!3~JP# z5UO&0A%QGf&qxHCm(f zcN(oTV`6yMDS5878+))nZL3X`^MVQaC=d+V3t2AXh;g5@B-;(*Jsk!?f0G6>@WYq| zX_WS%qt$1!m&b9y`(nt#@ogN?jk6&2hn&$Ulu?%Yd0I?iv=fS5&whrlpFZ9rU^K+# z4=!BgA>85_2vz{|bV{iHPV)?EP!lwuq88{1wW$NzqE%W0ZPPk!fOcq;wm?_u0$l`M zqf6lCvRXp-9?xz+#P#vQWgWO5!7VmGNKKLn`JKGg2D&uf5=z#z32XzlF!S8gSG7s; zC1C+`i=#PigBag>@*BXAjJm)jPgp92yH4-Ao6#;yq184y#s{x}Q1w6v#^sZpJcyMM zv$U-%Q?R(FOr-j!#+xwgpS-)BC9L}*7ux~HI|&|q+5I`=Z^SSNSvSlm1MvMw$S8#G z*5^zJgg@lj05;RzxwqjT`Fws5$6X0jL?QiS13JTzx`_kk=N&lJf{h6igl0Ho+&M9 z_Uf)X%5n%ywm%jQvb!$YwpuI$@u_&43K^s!Q`So#omcvhDl^PDQ7%{vKmcUDGPE+#Dub* z7y7hFJB!%p&U+IZT~1>5=-Ao31-^0@#e1C1eY2m&_bqJL=U50rDb2GX zzX>xrrV);bm}5!9@jsG(3c|A@kwSg1tg^OJMv#uY+K)w*;X|KSpSJH8zMh0gIp37r Yzr|ulU;=dX1}T3v&< Date: Sun, 21 Oct 2018 05:04:21 +0000 Subject: [PATCH 2/4] Done --- .../__pycache__/build.cpython-36.pyc | Bin 1138 -> 1171 bytes q02_data_cleaning_all/build.py | 12 +++++++----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/q02_data_cleaning_all/__pycache__/build.cpython-36.pyc b/q02_data_cleaning_all/__pycache__/build.cpython-36.pyc index df9daa8be359557344fb9f4e829bad308e72a3a8..857985c832fc66ce8768b71de24c18d99e85caf3 100644 GIT binary patch delta 274 zcmeywF`1Lon3tF9B*&TP0~0wbEm9a#SaW!D`J(t38B*D@_*2-L8KVSJ1+oNF*i$%~ znSgA*Q~{7!3TFye3qzDp3S%&XCilc$>KyEzxdo*qsYMg-MKdm%Y{ghUc`M@#M)t{m zOh$}6lN*_`7$qmaVJcD(WME(@761|qOj3*jj9iRz%wmi}%o2=6(m*jy;mLK(p^{Oe z#o0NjiA8yO1x2X^Mfu68#l@L<=|wD)FEG1DvH}&};wVZ@Oo>k}E-M0=c#9>aD8B$~ rM3E$rk3>j;Romp|raz{tfY$0Wum#wfv9Bn1@H6r9}396I?Mvr9A!kb8@x zC^a!9KDoH82&Cl}OG;6G0a#&?1dxwJNP<<{dYc^G+^ NxPT-F3kNR;BLJQ0GHw6> diff --git a/q02_data_cleaning_all/build.py b/q02_data_cleaning_all/build.py index 0373efa..5f1faed 100644 --- a/q02_data_cleaning_all/build.py +++ b/q02_data_cleaning_all/build.py @@ -5,6 +5,7 @@ import pandas as pd import numpy as np from sklearn.model_selection import train_test_split +from sklearn.preprocessing import Imputer from greyatomlib.logistic_regression_project.q01_outlier_removal.build import outlier_removal loan_data = pd.read_csv('data/loan_prediction_uncleaned.csv') @@ -14,20 +15,21 @@ # Write your solution here : def data_cleaning(loan_data): - + #Impute the values with mean and mode loan_data['LoanAmount'].fillna(loan_data['LoanAmount'].mean(), inplace = True) - cat_col = ['Gender', 'Married', 'Dependents', 'Self_Employed', 'Loan_Amount_Term', 'Credit_History'] for col in cat_col: loan_data['LoanAmount'].fillna(loan_data[col].mode(), inplace = True) + #seperate the features and target variable X = loan_data.iloc[:,:-1] - y = loan_data.iloc[:,-1:] + y = loan_data.iloc[:,-1] + + #train test split for ML X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 9) return X, y, X_train, X_test, y_train, y_test -data_cleaning(loan_data) -loan_data + From 9550771ff7e147959b4b7d07a09c0afe573063aa Mon Sep 17 00:00:00 2001 From: Tusharsharma86 Date: Sun, 21 Oct 2018 07:27:33 +0000 Subject: [PATCH 3/4] Done --- .../__pycache__/__init__.cpython-36.pyc | Bin 0 -> 182 bytes .../__pycache__/build.cpython-36.pyc | Bin 0 -> 1324 bytes q02_data_cleaning_all_2/build.py | 36 ++++++++++++++++++ .../tests/__pycache__/__init__.cpython-36.pyc | Bin 0 -> 188 bytes .../q02_test_data_cleaning_2.cpython-36.pyc | Bin 0 -> 4763 bytes 5 files changed, 36 insertions(+) create mode 100644 q02_data_cleaning_all_2/__pycache__/__init__.cpython-36.pyc create mode 100644 q02_data_cleaning_all_2/__pycache__/build.cpython-36.pyc create mode 100644 q02_data_cleaning_all_2/tests/__pycache__/__init__.cpython-36.pyc create mode 100644 q02_data_cleaning_all_2/tests/__pycache__/q02_test_data_cleaning_2.cpython-36.pyc diff --git a/q02_data_cleaning_all_2/__pycache__/__init__.cpython-36.pyc b/q02_data_cleaning_all_2/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..318489f46680ba9def580eff2d064d584ae68bad GIT binary patch literal 182 zcmXr!<>h+rd?=a$2p)q77+?f49Dul(1xTbY1T$zd`mJOr0tq9CU&;Cz`MIh3A*IC` ziA5lsn`mLCAD&;7U0jfuoT{IkpOUJdlb@bhT#}g_UzD0&lv-SznV%P5P?VpQnp~n^ zXkZkdl30=$pPZALn3tKC9-o+#6K|v+AD@|*SrQ+wS5SG2!zMRBr8Fni4rF;T5HkP( Dkwr4= literal 0 HcmV?d00001 diff --git a/q02_data_cleaning_all_2/__pycache__/build.cpython-36.pyc b/q02_data_cleaning_all_2/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ca381157c51ccfd1d216a31b9aa72ee1ca0b3d94 GIT binary patch literal 1324 zcma)6&2AGh5VrS!)1+yuQvNQ8TR0?8B*XzBs&c6Wy-*2ckQR5x4I8{a*t>|J?WtV* zAiN7t;43FyffM6x8`^>p8+knb=9}@1$Iku1p!fRDx7W`DLcWu8AFjz09QptS5TKGU z+RYAgadxDa`^=}-=E@)sSxAYXvk1H+%3=}Edf-EF$3q{W<$0D zAw*vYTLt=pjN%`-7ug+AGXP%mB$a{}SusUmST?oHgi4gi%a2@szHJ#3S@I&Olz=R) zv$9B{8NVe=e*MX`R+R z@bL7PcTG_4&qP||d50OEWCMZ?x2j5JDKF}sA}w>lR<}!j3F_H%-pb3SsM-1@exg69 znr-~ybb=3mdv?GcZruY_R>?u8bggWk5c2q^Opja>NWL$mR0WZ_;h)BFZJbC64vBr~arZ}HOIPdGLSGp=S65I~8&+Jk n;+hq<%B~mZ#0|tu^e8W(k>arpt!?!YjcA|togT`Fu6w@#%8h&Y literal 0 HcmV?d00001 diff --git a/q02_data_cleaning_all_2/build.py b/q02_data_cleaning_all_2/build.py index e20ff7b..796549f 100644 --- a/q02_data_cleaning_all_2/build.py +++ b/q02_data_cleaning_all_2/build.py @@ -1,3 +1,4 @@ +# %load q02_data_cleaning_all_2/build.py # Default Imports import pandas as pd import numpy as np @@ -11,3 +12,38 @@ # Write your solution here : +def data_cleaning_2(X_train, X_test, y_train, y_test): + cat_col = (X_train.select_dtypes(include=['object']).columns) + num_col = ['ApplicantIncome','CoapplicantIncome','LoanAmount'] + + X_train['ApplicantIncome_sqrt'] = np.sqrt(X_train['ApplicantIncome'] ) + X_test['ApplicantIncome_sqrt'] = np.sqrt(X_test['ApplicantIncome'] ) + X_train['CoapplicantIncome_sqrt'] = np.sqrt(X_train['CoapplicantIncome'] ) + X_test['CoapplicantIncome_sqrt'] = np.sqrt(X_test['CoapplicantIncome'] ) + X_train['LoanAmount_sqrt'] = np.sqrt(X_train['LoanAmount'] ) + X_test['LoanAmount_sqrt'] = np.sqrt(X_test['LoanAmount'] ) + + df_cat_train = pd.get_dummies(X_train[cat_col],drop_first=True) + df_cat_test = pd.get_dummies(X_test[cat_col],drop_first=True) + + X_train = pd.concat([X_train,df_cat_train],axis =1) + X_test = pd.concat([X_test,df_cat_test],axis =1) + + drop_col = list(cat_col) + num_col + X_train.drop(labels=drop_col,axis=1,inplace=True) + X_test.drop(labels=drop_col,axis=1,inplace=True) + + return X_train, X_test, y_train, y_test + + + + + + + + + + + + + diff --git a/q02_data_cleaning_all_2/tests/__pycache__/__init__.cpython-36.pyc b/q02_data_cleaning_all_2/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a697e98170da03600824a9f617b0c288ef682e57 GIT binary patch literal 188 zcmYL@F$w}P5Jj_KA;KQS#*r1oLPW5)QLwNWLNe@T%_eImTX-a|;W2D2yn~fVA%1v| z&#(TXEGO^X<1A8MS-%viY!f_6(&8Yd#r`frp8xt7XnaR-9JrAo)5+BmPCi_?mx@4n zO<;X(I2wh4>OjoKJM4pRNkzEME7USdRFt@D>Sb9N z4=E3K9Zm-d^gzclaYDy#c+~M-ziID4S81XP#{^O6xs=(Awu6_B2VF}|8tz4&=ddR4 z&_?ep-c$7g$G1C-61Ne$fp15C3`|L-!CzY0ukr8uw+Avj+A7|r9qJS6hup4N>)*1V zLs{6hZ!zlF-?L|q-@r+aJ~-W*aB&X=jW@?qh)k5%lBMG2K&z1q(V@+dEHR+fk#=aa zq(F+$8VNO4jx3WE_?;)KWDVK^StlFN7Re^rg0@7Ck>k)Vk!>LFgjh$?Zcp;mz(wE( zCuYF43m4x35lW%lmw%RDNi8K*U&%z?mijPJm}Khrp0XwN(XJ$TgjQz=H_$-z&fNJ4 z7!4^gf!wH#1exc^Mf0BflKP;PHI{{O3w{L2kaeCSx_IPRQFg=b-kyJzO?Mz|1G8Ow zh|{2?HzL^vbm%eZYJH%>sNNT2z6srPA}=(-#3Ra05}1Avnr$a+JU5#$`;#Ne>i$l! z0$`OD27yIlODK@|M67@&PEB3L5Rh-fkc0*qXs8{ThSB|F7SN6`n8hrZpnw^IL5508 zZE39xkwYELGi7{Az4y&N1z_c7*zHhrFJLC4VZ{7|57Xu5(~jd4hnwgNcbU_sPcNF? zAOb_B!?6%U%yYb)6)TBh-sksy9)d4Zp+B~f0-&>8`wBD04WA~%K;8a}!#CoCQy z@WNCAlPTJGpPfA53@n->1(LiZD{}A4Ba``Gtapu8wgJl$Sy(7@T%T>hFLn&YaTI7o z>;#BI82soc3?lG{fRU!~`l1vf*gMK$_aKKthe+yDF7yX+`({iI;mdD@$syRg$)pam z1HVogBYGe+QOq#L|99sEw{n2N#oRf;njmnTD{zvdC<%!gQgsuO^*B#xb@vfO)xj+o z*1P&$IwUrN2(vQ@AY1IG3OdT)aUL;~L69pjLkP`-94m6M|JrdaHGi;+aTD|X6xqQN zL|P!wrEnHyW)qp|!{gtW8Fmg(M1D+6ZIK|BT2bwDz#CCBcK}NREy&H_5lmo!72#Xr z#)!i=%5$cb*uYb+B;^V9MRJUd?4LbRa!PtdZ79iM?Xgp`@p(ReA~ zKyhh3jJb0MmNZbkyetgc_Mr&2ZIQR_c0eKz^`dR#i7xqvt>Vn_pu|q1IECVU6d$0t zfZ}5mm?{|t9Cii8RTTS0bgbb~gylf2!Nsv^G-N}EZzlfghC2Gjqsk{OJ*oNQ^e&9Z zcSAHDhU*uZXOZiX_#|rW!kMw_ghAVLpH*fYB`Z^pl9dE01E|)mqa)Sq1Ft5AI9UFR ztn1^ui7-B-Sq8jUAU3{-$i=TI@H7i=4B)kckRJ=V8@mmQjRX&m>?}^DnD}*)!r6K3 o$BK@9f}(~3xtvC_Fs>)N`4u>~#1Ybvi=(fS2hET- Date: Sun, 21 Oct 2018 07:48:08 +0000 Subject: [PATCH 4/4] Done --- .../__pycache__/__init__.cpython-36.pyc | Bin 0 -> 182 bytes .../__pycache__/build.cpython-36.pyc | Bin 0 -> 1534 bytes q03_logistic_regression/build.py | 35 ++++++++++++++++++ .../tests/__pycache__/__init__.cpython-36.pyc | Bin 0 -> 188 bytes ...est_q03_logistic_regression.cpython-36.pyc | Bin 0 -> 2302 bytes 5 files changed, 35 insertions(+) create mode 100644 q03_logistic_regression/__pycache__/__init__.cpython-36.pyc create mode 100644 q03_logistic_regression/__pycache__/build.cpython-36.pyc create mode 100644 q03_logistic_regression/tests/__pycache__/__init__.cpython-36.pyc create mode 100644 q03_logistic_regression/tests/__pycache__/test_q03_logistic_regression.cpython-36.pyc diff --git a/q03_logistic_regression/__pycache__/__init__.cpython-36.pyc b/q03_logistic_regression/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..932cdd63a659db4ccd039e7e70e99c9893d122a3 GIT binary patch literal 182 zcmXr!<>h+rd?=a$2p)q77+?f49Dul(1xTbY1T$zd`mJOr0tq9CU&;Cz`MIh3A*IC` ziA5lsn`mLCAD&;7U0jfuoT{IkpOUJdlb@bhT#}g_UzD0&lv-SznV%P5P?VpQnp~n^ mXkZ+VMP5HXJ~J<~BtBlRpz;=nO>TZlX-=vg$ns(!W&i-FKr@E` literal 0 HcmV?d00001 diff --git a/q03_logistic_regression/__pycache__/build.cpython-36.pyc b/q03_logistic_regression/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..22dca85e669ee7ead83f2df82fecd27483f41c8f GIT binary patch literal 1534 zcmbtU&5qkP5GM6+t-apAc-JY=27B|xP8^`yqD z2lC0i=0SStQ}oy;;GvhE_7!r=ePSWk3hOyY+1#g6QfZi55Sle=WM z!J+8OJ+g-p$Mby_tuZ;^2lGSLVe#4l+~o%iw`=T4s&Pj5W@Qb zul&lHyS3l4HJW?O|KP5C%MFqBKSO|>20IOgxxl+;8{MD|Ko_?PTivkH4O_OxTivh* z4(5YDXoZa~tf;|O7dN^-3s?xNi|S||)TnmW4n}~(8n5xHQ$@8~ch=4-u4712^vv|n z%Ti=1&8sJQT1am8FADkv729B)%VLpN5O4f^_#wRZqnPB5` zX%2509l)>kDHwzUJisST7sKn^#4&uwPK?z(@EUz(g3?x!R)_FO?&#N01S%gx#9$$* zEu?(bgq$3;VvoBI!vFN${DghAQDG8g|{XPSIo*i?wElD!wr5C2OknQpb<&P66toU+8; z3!UcLus=2)Q$?wIb{sF&G4$%hnxQp!t+{87JwZ3jH}GkqGYI%X@JIGsYwH`?++xV* N=JhO!@DUz(p8yA{zC{24 literal 0 HcmV?d00001 diff --git a/q03_logistic_regression/build.py b/q03_logistic_regression/build.py index cdbd506..5d243e5 100644 --- a/q03_logistic_regression/build.py +++ b/q03_logistic_regression/build.py @@ -1,12 +1,16 @@ +# %load q03_logistic_regression/build.py # Default Imports import pandas as pd from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LogisticRegression +import matplotlib.pyplot as plt from sklearn.metrics import confusion_matrix from greyatomlib.logistic_regression_project.q01_outlier_removal.build import outlier_removal from greyatomlib.logistic_regression_project.q02_data_cleaning_all.build import data_cleaning from greyatomlib.logistic_regression_project.q02_data_cleaning_all_2.build import data_cleaning_2 + + loan_data = pd.read_csv('data/loan_prediction_uncleaned.csv') loan_data = loan_data.drop('Loan_ID', 1) loan_data = outlier_removal(loan_data) @@ -15,4 +19,35 @@ # Write your solution code here: +def logistic_regression(X_train, X_test, y_train, y_test): + std_scl = StandardScaler() + scale_df = std_scl.fit_transform(X=X_train[['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount']]) + scale_df = pd.DataFrame(scale_df,columns=['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount'] , index=X_train.index) + + X_train['ApplicantIncome'] = scale_df['ApplicantIncome'] + X_train['CoapplicantIncome'] = scale_df['CoapplicantIncome'] + X_train['LoanAmount'] = scale_df['LoanAmount'] + + std_scl2 = StandardScaler() + scale_df2 = std_scl2.fit_transform(X=X_test[['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount']]) + scale_df2 = pd.DataFrame(scale_df2,columns=['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount'] , index=X_test.index) + + X_test['ApplicantIncome'] = scale_df2['ApplicantIncome'] + X_test['CoapplicantIncome'] = scale_df2['CoapplicantIncome'] + X_test['LoanAmount'] = scale_df2['LoanAmount'] + + model = LogisticRegression(random_state=9) + model.fit(X_train,y_train) + + y_pred = model.predict(X_test) + + cm = confusion_matrix(y_test,y_pred) + return cm + + + + + + + diff --git a/q03_logistic_regression/tests/__pycache__/__init__.cpython-36.pyc b/q03_logistic_regression/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..78e86b1ea4a54486c5225a0699806ce67be458b7 GIT binary patch literal 188 zcmXr!<>h+rd?=a$2p)q77+?f49Dul(1xTbY1T$zd`mJOr0tq9CUzz$D`MIh3A*IC` ziA5lsn`mLCAD&;7U0jfuoT{IkpOUJdlb@bhT#}g_UzD0&lv-SznV%P5P?VpQnp~n^ sXkZ+VMP9!owYa2MKR!M)FS8^*Uaz3?7Kcr4eoARhsvXD)#X!se0R1C1?*IS* literal 0 HcmV?d00001 diff --git a/q03_logistic_regression/tests/__pycache__/test_q03_logistic_regression.cpython-36.pyc b/q03_logistic_regression/tests/__pycache__/test_q03_logistic_regression.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e899608faa80de40b8c73c5145859faa144fb500 GIT binary patch literal 2302 zcmbtVQE%He5EdobRvg=L+ca5*VbcQSK|o-)DKMd5=<|fOBZF+~U(z|qxt_x4Bio0S> ztWVt+?#$Y{w9?O>SiHsC?=0RH?$n_hocwGBoim8TIt)nRJr&yQGA%%Fb7oi=Nx{-M z9e`nF!4U4CDa(zFg$k8OvQs9{Y;XkjJ2D%@+QbnI43yA1&QdUU2ErWWQZhBrBN0tD z(PLj`EDc9W@HjFsFU-?&QNeqWKBbKV9Nz!wjDYJ1H+u2lT92UVZ4jbN0l-hV{m!Be zceo3^%WJ$2eT_G`2YsC{LBP#o9Rof1aFY?Pf)6_PpzT7_J0Of@$dtS#Z>*thoHvA% z4QmRE!9sK2NA{XEbug2#jC-ii-I)tw^3~pJ2-yhkL-ctfQltGXzU@DWPelq^;3`~C zH9!>0OlzUc-bv16;Yg7NcHwFvj|vyD``uDqnD+PE{&ALw{?l9^GleZ--+t5oB~!oY z5sO4W%D4b{KS%|joeg0#{gZnS!fVF+2GG%^3~!jy8;y%ioOa#+;Vi`4uuYBeTaHI2 z-`#+E@8gaZ5`zxQ#C!?Ld}6A!;1Qf9!8ts0VAkZ}41X?;ST2nZe9eXSdFH2?@e^jE zV}DR)V{t~{TxLeKz*k_a4s>b-1ty{rw?HetzA=<@r+OWxRge&XX&5ndZr8+)FuA zij7}x`{OJJ_KNd~P!;?w9n^|$#nyejpK8O>NECLI6t(hi+BpBcaB-c0RM1^PEw~Z2 zYENNVde8yJr!iWKG>J_nk3Si|s~#6{oH1){hU3tlLWy5w)UPl)xj%z(0XUuksgO|> zm*&TSNys0C&I>1DuWx{3>tk?~)ZGLx8tarrCO`aF7)novL8I`(FlC7d!=f36Nyc-D zdMgaE22~>pPqSJ@v4H|}F~|Qhr+5fd69g(6E1E~V8oc$=8hlRcBL2fJg~8BWxG=`dWQmMl^{y#S7sac0a&M>N#l;;JGAVnn@>p_(N1pEeFHhmlxi91d zef;lz-4Czaisyc67;LTDUs>fFaf;WVDa#A>lwdr>D{ra15s~t6f2nL#kY@!M)5f#V yC>E!*4hq0U8{-QDD2N~Os)YHvtQ6k&Ra$>Um^CI3QbSsEZxeKntdcd-vHt-_wwV