From 3f7bab549fc562becee55dc5a0959185eab98ef6 Mon Sep 17 00:00:00 2001 From: Akashdesarda Date: Wed, 5 Dec 2018 11:22:13 +0000 Subject: [PATCH 1/4] Done --- __pycache__/__init__.cpython-36.pyc | Bin 159 -> 156 bytes .../__pycache__/__init__.cpython-36.pyc | Bin 170 -> 176 bytes .../__pycache__/build.cpython-36.pyc | Bin 655 -> 559 bytes q01_outlier_removal/build.py | 12 ++++++++++-- .../tests/__pycache__/__init__.cpython-36.pyc | Bin 185 -> 182 bytes .../test_q01_outlier_removal.cpython-36.pyc | Bin 1835 -> 1836 bytes 6 files changed, 10 insertions(+), 2 deletions(-) diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index 2e5da7da778c48a9c58d7d055bbdb642fcb66418..f6bd7256f42ec8882dd2b29d6817d668f0e55522 100644 GIT binary patch delta 54 zcmbQwIERtLn3tF9<|g)-i5%uip86U2xvBb&*@?v&DXGPYMJb8;;rT_`#RZAUsrt$J JDX9}vtO3x=5@7%U delta 57 zcmbQkIG>Tjn3tDpxuj9lL=JORAN|nc)S_bj#H5VO;*8Yn;?ks|#N5QZ%)~tXywvje Mw9K5;;)#jY0QgxGf&c&j diff --git a/q01_outlier_removal/__pycache__/__init__.cpython-36.pyc b/q01_outlier_removal/__pycache__/__init__.cpython-36.pyc index 2f9a42a105b2b26ec10c60ac4d11fa03f7624d22..3e3482780cefe2697e5f40c9ba5bb55d1a7222b2 100644 GIT binary patch delta 54 zcmZ3*xPg(wn3tF9<|g)-i5%uivHBVLxvBb&*@?v&DXGPYMJb8;;rT_`#RZAUsrt$J JDX9~q{Q=q&5}yD7 delta 48 zcmdnMxQda(n3tF9Mtoz`L=JPAaQ%$@+*JLb%7 delta 414 zcmZ9IF-rq66vy+DT)iHx?ON;yu!}bcE{Y&Jb`W%ODd#2Bw0D=9q|o7{Lno(F>g?|1 zmvNAM2xoEfU7=Xy<^AE$FaP&K-uW_!j+@QsL%rU8(N*+3=Y)^|`x4u}pp-`+_D?e ztF69|=057{q`1iSv~ULStv&9;$7xbHl}R5=j4GVx!sv+)R3YRHc|F6(KqM~lfal&r z`bJC>!#}t2gU)4~WO5c?Njr9W5?>oVlBu(CriaQpl@5#?8fmT4#d--R2leWo+9(9t rn`%IJQ>L~4gzjm@&0Zy~N~%RWvX9n>XSo*BOrBG8wuTbY2paqs$Dm_2 diff --git a/q01_outlier_removal/build.py b/q01_outlier_removal/build.py index ec278ba..edf4dc4 100644 --- a/q01_outlier_removal/build.py +++ b/q01_outlier_removal/build.py @@ -1,8 +1,16 @@ +# %load q01_outlier_removal/build.py # Default imports import pandas as pd -loan_data = pd.read_csv('data/loan_prediction_uncleaned.csv') -loan_data = loan_data.drop('Loan_ID', 1) +df = pd.read_csv('data/loan_prediction_uncleaned.csv') +df = df.drop('Loan_ID', 1) # Write your Solution here: +def outlier_removal(df): + df = df.drop(df[(df['ApplicantIncome']>df['ApplicantIncome'].quantile(0.95)) | + (df['CoapplicantIncome']>df['CoapplicantIncome'].quantile(0.95)) | + (df['LoanAmount']>df['LoanAmount'].quantile(0.95))].index) + return df + + diff --git a/q01_outlier_removal/tests/__pycache__/__init__.cpython-36.pyc b/q01_outlier_removal/tests/__pycache__/__init__.cpython-36.pyc index 5a057ffb73694628cef3ed87e03ee3a17f7410bc..9cd3e0ffea7f766abc26b97f0510a532c356e01a 100644 GIT binary patch delta 54 zcmdnVxQ&s+n3tF9)+Y9ti5%ui$@&@jxvBb&*@?v&DXGPYMJb8;;rT_`#RZAUsrt$J JDX9}vLIK~H63PGo delta 57 zcmdnSxRa5?n3tDpxuj9lL=JP+H2u)x)S_bj#H5VO;*8Yn;?ks|#N5QZ%)~tXywvje Mw9K5;;)#i&00;6D>Hq)$ diff --git a/q01_outlier_removal/tests/__pycache__/test_q01_outlier_removal.cpython-36.pyc b/q01_outlier_removal/tests/__pycache__/test_q01_outlier_removal.cpython-36.pyc index 4c0b6c7431c6a14108ba9d55dd44059612f144a5..c262e9ff3be4bbb35b4c9d3ae8cb116c4458aa7b 100644 GIT binary patch delta 70 zcmZ3@w}y|?n3tF9)+Y9tlM^{taqy+5mZX*Dm-q-GbFCKV;- ZCgx=(=IQ69mdB@M=A;&HR%GI41po~Q7g+!R From 4750c88dd7f36890acaaa64d8e8a0d5efd4603ee Mon Sep 17 00:00:00 2001 From: Akashdesarda Date: Wed, 5 Dec 2018 11:44:44 +0000 Subject: [PATCH 2/4] Done --- .../__pycache__/__init__.cpython-36.pyc | Bin 0 -> 178 bytes .../__pycache__/build.cpython-36.pyc | Bin 0 -> 1366 bytes q02_data_cleaning_all/build.py | 25 +++++++++++++++++- .../tests/__pycache__/__init__.cpython-36.pyc | Bin 0 -> 184 bytes .../test_q02_data_cleaning.cpython-36.pyc | Bin 0 -> 3406 bytes 5 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 q02_data_cleaning_all/__pycache__/__init__.cpython-36.pyc create mode 100644 q02_data_cleaning_all/__pycache__/build.cpython-36.pyc create mode 100644 q02_data_cleaning_all/tests/__pycache__/__init__.cpython-36.pyc create mode 100644 q02_data_cleaning_all/tests/__pycache__/test_q02_data_cleaning.cpython-36.pyc diff --git a/q02_data_cleaning_all/__pycache__/__init__.cpython-36.pyc b/q02_data_cleaning_all/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..baa030c37ef051369b9116f305290ef085a475a1 GIT binary patch literal 178 zcmYL?F$w}P5Jj^fD8e4Z#u05r^a3^(Ho`C@!*1Mc5;L*zNVeWaTW?`ywh%wO$LBYH zRoBz!V|-F2??Jy*rECi_Dl+PzX4U?#(lGz^L1;tIumNAp-UvcKxL^Vv))(9>uCmT%{x>AB^ z!N--o@%aw{e);?s!*f*-p%pMP7|&qXdti0QVGbPdh{qqIHC_QnE5wr-K0_Nkk2ff+ zo&=emp?R{?=?3vE>HC50^SW7RN3!pQ=E9i3Imv%y#z?_Q|G8*` zT{}z4S3*tccXg|pB|w8NRom0_Ct+%G^em*+(HGLX#w<+^S3vdNKQv79rl!_`Tcg5+PvO%kREi_z6fOjRwVgJm_NievLir4eP-zAsHeC3G zo2K=#b9Pkth)+o&>z0P-J$~)+(&w+k6;k^QCQzvD3{)M9de z&FmEymKn|_KR4#uwyYA93Lb;{ok4iAf(oA*0mMQLx2Bnk%1z!rctk^y)2^fg%; z8^3+Vqtj;^c3{qkgRb literal 0 HcmV?d00001 diff --git a/q02_data_cleaning_all/build.py b/q02_data_cleaning_all/build.py index b56e2bc..5a251a4 100644 --- a/q02_data_cleaning_all/build.py +++ b/q02_data_cleaning_all/build.py @@ -1,10 +1,12 @@ +# %load q02_data_cleaning_all/build.py # Default Imports import sys, os sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname('__file__')))) import pandas as pd import numpy as np -from sklearn.model_selection import train_test_split +from sklearn.model_selection import train_test_split as tts from greyatomlib.logistic_regression_project.q01_outlier_removal.build import outlier_removal +from sklearn.preprocessing import Imputer loan_data = pd.read_csv('data/loan_prediction_uncleaned.csv') loan_data = loan_data.drop('Loan_ID', 1) @@ -12,3 +14,24 @@ # Write your solution here : +def data_cleaning(df): + + imp_mean = Imputer(missing_values = float('NaN'), strategy='mean') + df['LoanAmount'] = imp_mean.fit_transform(df[['LoanAmount']]).ravel() + + df['Gender'] = df['Gender'].fillna(df['Gender'].mode()[0]) + df['Married'] = df['Married'].fillna(df['Married'].mode()[0]) + df['Dependents'] = df['Dependents'].fillna(df['Dependents'].mode()[0]) + df['Self_Employed'] = df['Self_Employed'].fillna(df['Self_Employed'].mode()[0]) + df['Loan_Amount_Term'] = df['Loan_Amount_Term'].fillna(df['Loan_Amount_Term'].mode()[0]) + df['Credit_History'] = df['Credit_History'].fillna(df['Credit_History'].mode()[0]) + + X=df.drop(['LoanAmount'],axis=1) + y=df['LoanAmount'] + + X_train, X_test, y_train, y_test = tts(X, y, test_size = 0.25, random_state = 9) + + return (X,y,X_train, X_test, y_train, y_test) + + + diff --git a/q02_data_cleaning_all/tests/__pycache__/__init__.cpython-36.pyc b/q02_data_cleaning_all/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5475eda0aeea734086048a3aceb70bad7eeabded GIT binary patch literal 184 zcmYL?F$w}P5Jj^fD8e4Z#u05r^a3^(Ho`C@!*1Mc5;L*zNVeWaTW?`ywh%wO$LBYH zRoBz!V|-F2??Jy*rECi_Dl+PzX4U?#(lGz^VQ53ouupAy$J=YN?t{5H-VFCrP+!&n BGr#}< literal 0 HcmV?d00001 diff --git a/q02_data_cleaning_all/tests/__pycache__/test_q02_data_cleaning.cpython-36.pyc b/q02_data_cleaning_all/tests/__pycache__/test_q02_data_cleaning.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..57931fd67c90df6663bedba6c77cfc8df17fab65 GIT binary patch literal 3406 zcmd6p&2HO95P-QPMN+^1iQ~jg+OTOIKt(Dg?x8JE6bX_vFc3IE)1m_Oo21Y&bHy#iG zg9}%A0JnSwg6F^jlM!aVHv)@U%mxjpm;<`RT;_pxSeaEoyR6D;pgmS+4bWw_z!pJQ z*b=z8te4Qe$FtjyaDBXRSr6_XFhNl_IawLzs~OZPvc#lLaS*Dj1N%)p{rC%F4U8qAx*TE z@U&@aTk@o%ZKV2#A?h&fAKBi{`n>({EtT7hODY&`|B{KfazG>AjxxppdpDLUj^L|x z1(y=h4@B03t+aRUZG=ZYUPzOqt$>NlWq52rYcSF`alrh%1D6`GEop-=ERT#^$N08R zn|uU58p+{*0*zVt1$sO-hoG&kJqt#SzddBkc!v%XkSwYwg^-AI=r#tvACsVFD{6T`JmnQR&7h*5fL#bOKLs%0jHY%Xw*zg&-8tA{&UC zFjHU}5txVtmNWwYBgN++0w Date: Wed, 5 Dec 2018 11:49:19 +0000 Subject: [PATCH 3/4] Done --- .../__pycache__/__init__.cpython-36.pyc | Bin 0 -> 180 bytes .../__pycache__/build.cpython-36.pyc | Bin 0 -> 1050 bytes q02_data_cleaning_all_2/build.py | 15 +++++++++++++++ .../tests/__pycache__/__init__.cpython-36.pyc | Bin 0 -> 186 bytes .../q02_test_data_cleaning_2.cpython-36.pyc | Bin 0 -> 4761 bytes 5 files changed, 15 insertions(+) create mode 100644 q02_data_cleaning_all_2/__pycache__/__init__.cpython-36.pyc create mode 100644 q02_data_cleaning_all_2/__pycache__/build.cpython-36.pyc create mode 100644 q02_data_cleaning_all_2/tests/__pycache__/__init__.cpython-36.pyc create mode 100644 q02_data_cleaning_all_2/tests/__pycache__/q02_test_data_cleaning_2.cpython-36.pyc diff --git a/q02_data_cleaning_all_2/__pycache__/__init__.cpython-36.pyc b/q02_data_cleaning_all_2/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a7dbdc9b8d21538f48d722e9391845d9926eca53 GIT binary patch literal 180 zcmYL?F$w}P5Jj^fD8e4Z#*wuZSubE?VIvGfGVI39CTk`Z9?90*XzMMk%ogH@_xSwg zugh}&d`wR&=RN9|B9}TNlPseSYEkU(DvtACAA&aY47-k`H7pbj!7;>62GR^V7zP&_ zE6Hk%+(Z_!!Q&9anRSB8szO7FP&+1XeS_p2R)A=|O^D0k_L^<_V6Kk0i_@% literal 0 HcmV?d00001 diff --git a/q02_data_cleaning_all_2/__pycache__/build.cpython-36.pyc b/q02_data_cleaning_all_2/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6caaaef99eef75301e1e034ffb1aafd7a7bce828 GIT binary patch literal 1050 zcma)4Pfy!06t|N!X+uj3n+hNquBo&;>Oilsl7L^ehSBc_zaYe-81^&QRzslKw&fphQg-eL%wLWcE;pH+k-pq$P;d6B3?uOf_M(`a1dK+Et93$}Ig}3l0 z0p#00B;gw_8CN8FOO?tvBk?XT9VCq=tvyzy)FnnbM_f#jy}T5~98PuKsiNe{%#)Gg z^nKs6QCf&9m)gb*T*)L;+K}$3EJc=5Y4&AWrI+&9s5bicb~S+I6*5#f+zBc)78eJQ$3G z3XyCM&#FwYK{>bGhu+PuYjF<*@_HCPFUB!m$Iq~O0$6|Dh9#AZYSo0TZ2<3bu3~48 z;qf^&MJ}?l!9O?+{>o`kGg6+5H2VKYuaZYoeiz4gahlL*mbkHQ)l=_MwSjB4#U~ce uRU1yJjuWd+xZLYB>(bv7>)^1>?Yv->;ICcmx0&+r%O+2JBNjvCeQ=G-xOjYR?p0ut?rx1DPcYAiCf(;=I4TCaW%*tLOD_Knm&$ DJ`OZf literal 0 HcmV?d00001 diff --git a/q02_data_cleaning_all_2/tests/__pycache__/q02_test_data_cleaning_2.cpython-36.pyc b/q02_data_cleaning_all_2/tests/__pycache__/q02_test_data_cleaning_2.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..30b3b15aeb7b3231d651e651e68b4a8f38657b14 GIT binary patch literal 4761 zcmd5=O_S3$7`7b8alZC@7ZwV*v_R9L30e9QI@+N^_P2g#vL zhMvNu|DdPdd+jgjwYv6{ztB_P*G{s|Hk)n&2jcOoH?sU#dh~usx>_ve{`w>H__v~> z{H-K@I_S6H7Jq`E3MHOm5hztpZKbRfQD};4p4LiRY0wp#_ViZ9%0#;E8Lh09jr5F{ zYvrvxQ5b0!sL>}@krkUuG)r@R74$4CMs*o1AM4h#wPLMWYu388!P0D%t+92s(bpbp zdF}F9{{vN*_83olN}SQ@4~p+LZ#I&}tmv1{#Roxw}6B zqamXvklSe?LFNT|*}UhzU_NLijc1@8$Hp@62m!R+eco|Anev!lQl%!Cwwo_-7dO#V>UBeZjTZc(*a3%!KUz-O_=t|}J zl^uJ+C~U{`?43#oyol&2YKNNal-pf-cG?4bJg{}>oWOx7DTDNOrtn;z4KOhc2oz1j zSx}PG5KjWS^O*8fZIFUiz0b#NQ?}=HywC&#?=WG~!1ROAY&l_l-)uzOPqrwf4(5U9 z0js2P2rCL-LV>i$Vg)pLX6i78fMOe(#553~p?Yi@M(>YVKr6yf7O`N00!9et7^=-w zQ){Lv33W8igz2fZ-ZuvnfQ_4Bx6RDGfSa6!9qz|`n65CNv>l&1!bDH_iaRa#JOsw^_U#Om);1IL$J4#Nga9z zew{Ez^gwQ+m|=|n@6Iu9`4EGPxpRy)LEt!7U?)dW64Epz>IS6hah}j>?n8*GgI#FW zyY^i&B-Vot=4Ki|wm3)?Y?QsDJYvR;AXi|9keUZMR%RprwWF2P{J}25jm`ICWCs@@ z$^wBdfwL$xo5)NbKL6Ux@C$$Qp#uY4556I8 zj3|6V&yHZ`;;isK91u9_^#m{1=MC%!FpILnkrm8YZ-|xbh?S!ZW@ex{7*F+Pf|t32 zOaf;*F_^gHAk$Q-mIyED+Td4oB3v`}u1tg%vG2e_Y-V(U73WoWkz*Vfh8X@1=rSF> ze1xTxW?4$~BCe${dEWx8krUmV*H|Wy=BMQs_;xg~g=j(dpP*$ED?R~n1SvCDN8_P@ z14T>gVa%O7uq1(MrDbW@whxP7+m<=oZUwaCpfvJ*Xz~P^wxQ60j5glo`72#PBYj6u(H5$aw;YvrBZlp$6)T(sa(&IIMl-{M$ z@!b%OhvWEV`f11YXtWQtZo{6j>x4nebDx%HY$eN6x02-;DGjLB?(vbT_MwA`Ar3D8 zWybaKWFm| Date: Wed, 5 Dec 2018 12:04:06 +0000 Subject: [PATCH 4/4] Done --- .../__pycache__/__init__.cpython-36.pyc | Bin 0 -> 180 bytes .../__pycache__/build.cpython-36.pyc | Bin 0 -> 1210 bytes q03_logistic_regression/build.py | 15 +++++++++++++++ .../tests/__pycache__/__init__.cpython-36.pyc | Bin 0 -> 186 bytes .../test_q03_logistic_regression.cpython-36.pyc | Bin 0 -> 2300 bytes 5 files changed, 15 insertions(+) create mode 100644 q03_logistic_regression/__pycache__/__init__.cpython-36.pyc create mode 100644 q03_logistic_regression/__pycache__/build.cpython-36.pyc create mode 100644 q03_logistic_regression/tests/__pycache__/__init__.cpython-36.pyc create mode 100644 q03_logistic_regression/tests/__pycache__/test_q03_logistic_regression.cpython-36.pyc diff --git a/q03_logistic_regression/__pycache__/__init__.cpython-36.pyc b/q03_logistic_regression/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a572cced8e9a2c27d7b3123f18e0391c8bbb51cc GIT binary patch literal 180 zcmXr!<>k7$i9Ln^2p)q77+?f49Dul(1xTbY1T$zd`mJOr0tq9CUy1q|`MIh3j@gOD z87Zm7iA5=i`r-LS*~JBk$*KCu`6;RTIr-_C#U+`^@kOcWMXANbnfZC~1x5K;smUe! mg$Bm)SmgEN<1_OzOXB183My}L*yQG?l;)(`fvheDVg>*S+A@a# literal 0 HcmV?d00001 diff --git a/q03_logistic_regression/__pycache__/build.cpython-36.pyc b/q03_logistic_regression/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3e8a6fa4a33c7d08b884c98ad9bc1be96974a09e GIT binary patch literal 1210 zcmbtTyKdVs6eaccE6&4q(zTlxyG_=jsF8FC5M*e8HXwoujL0~0s7EL|s0(FMKc!25 zpkqITYp47|r(UWyngkgNR01!L&ZRCd&%NC1beivf_-Ef8gnpuXj|cb-jN&U8h8Pxz zV3;joB{s3K;cVd~E^%w@2ru!8Ut?DUNrNZR>GJMu zoi$jqvH&-Dx8_=AKJ&;1*(6(}NBU&IJA8}xc%Kg{=ge8Lti{@25ZPw<1PwcPkSRJI zV!d{vXv(N$ClM7~0<(FPP2)nv(K|ksyeQ%<1$I5k(u=wI2otJg{0X>jHdi9%GL$^Y zZm0mN!>FQRBsfjubZXW-Ai~|x&kZpa8BN1nau!F*#GI$~Kb(!C;zs*N#(j8jhat-d zOZ#<4Z_tJh`I9 zCF2FuijF^I@~X&b#K%#_ILP^@WG2?KW)Lr%@kouYU+jfX_{Zn-Sg=vP&|6P3HE z$=qxG_P-*9yUSRk7$i9Ln^2p)q77+?f49Dul(1xTbY1T$zd`mJOr0tq9CU+MZ8`MIh3j@gOD z87Zm7iA5=i`r-LS*~JBk$*KCu`6;RTIr-_C#U+`^@kOcWMXANbnfZC~1x5K;smUe! sg$Bm)SmgCfQj1H9_2c6+^D;}~Cb>WLu@ldRZ z^{IR8&aBOAEB)lT#hbkK&f+cMP93_z$@f;beF1S;M_p3*KL~C1m=>V7I5RAaq+n^B z4#2RoU&|mWCrGcpMp+7v^cXsNlUwpHuG;hY!BHAmBQ}jb1&t)-z~&8-yrR0PquTzq6>r z9qvN!@*1y0U*jJ4p|A5L2)I$KW1xrcZ!*GF@ImK3v^{8g2ZXT@I8vltyKuFTCxwgH{c5QWO#A!i{%Mwo{_bn0Pr1-can}DaQ?K=i zMWP>NTmZQ5B?7R{hOm|X*`p`nE#rLy*yvJ*cTDMx#>FO1yY2sQ7T|r@q{iqi$0w7o z?!dhFVMhyTL5F2pJ_lt!EmczR0M1h299=jtYw~o4J{Kn}mnHzV=0XQN3)0L42{X}Y zFer1eIHT)a=S4NaSJzf;=+p`dOhYB^fmVEZXDF9W^*T(eARz$B(~pMa%FhfDWEd8t z5|Ph90fGWmjXiWYF)osJj*|A#AddR)M9#;mm&jzfD21%8oHKf&na@eIZV;CKe4LPk|w znjZruA%7G)FPwzExdV=^55ZAVcN4s5tWp-4eDhynC_No@y}}Q}lqDhzi$)kG8P6r^ z%`n6YRE;P+&1x0J1`5o@9RJIl;vrBC5U6OZXFl<3@YYLf_fuLQF`$Gl>=9RZ779O4 zV}lVZ42JH(bunfpOJw}2cS~`)Dqg*_M>`cSu5PH1N!fdq$C4{N@^t@yc?x$feIY04 z?!Wi-IJ|KuUiztFu(fJ`WsUdZ6fZ$jmKW+P!FY%l-corVBIV)sQrV~=FA6fI-iy#E x7N@ih3cy9Z@s$A-#1DB@!hBm+3jgaWt=}Td8j}a9A