From 789aa263daf46cacfc59490c7fe80ca0b1da54c5 Mon Sep 17 00:00:00 2001
From: Oyelabi Paul <59335237+Yodeman@users.noreply.github.com>
Date: Thu, 8 Jul 2021 17:23:53 +0100
Subject: [PATCH 01/10] Add files via upload

my first push
---
 oyelabi_paul_oluwadara/dsnOAU.md | 660 +++++++++++++++++++++++++++++++
 oyelabi_paul_oluwadara/knn.py    |  99 +++++
 2 files changed, 759 insertions(+)
 create mode 100644 oyelabi_paul_oluwadara/dsnOAU.md
 create mode 100644 oyelabi_paul_oluwadara/knn.py

diff --git a/oyelabi_paul_oluwadara/dsnOAU.md b/oyelabi_paul_oluwadara/dsnOAU.md
new file mode 100644
index 0000000..c7a4095
--- /dev/null
+++ b/oyelabi_paul_oluwadara/dsnOAU.md
@@ -0,0 +1,660 @@
+# A Light Implementation of K neares neighbors.
+
+A light implementation of K nearest neighbors algorithm.
+This implementation only supports euclidean and manhattan
+distance metrics. It also provides the probability of
+prediction.
+    
+It provides common interface as the sklearn kNN algorithm.
+Below are runs of the sklearn and my implementation.
+       
+       parameters:
+           k: the numbers of neighbors.
+           method: euclidean/manhattan.
+           mode: classification/regression.
+    
+`clf_1 --> sklearn KNN classifier algorithm`
+
+`clf_2 --> my algorithm`
+
+`regr_1 --> sklearn KNN regressor algorithm`
+
+`regr_2 -- my algorithm`
+    
+The implementation is found in knn.py file.
+    
+
+
+```python
+from sklearn.datasets import load_iris, load_breast_cancer, load_diabetes
+from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import accuracy_score, mean_squared_error
+from timeit import timeit
+from knn import KNN
+```
+
+# CLASSIFICATION - iris flower classification
+
+
+```python
+#load classification data
+data, target = load_iris(True)
+data.shape, target.shape
+```
+
+
+
+
+    ((150, 4), (150,))
+
+
+
+
+```python
+x_train, x_test, y_train, y_test = train_test_split(data, target, test_size=0.25, random_state=0, shuffle=True)
+```
+
+### sklearn knn classifier
+
+
+```python
+clf_1 = KNeighborsClassifier(n_neighbors=3)
+```
+
+
+```python
+clf_1.fit(x_train, y_train)
+```
+
+
+
+
+    KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
+                         metric_params=None, n_jobs=None, n_neighbors=3, p=2,
+                         weights='uniform')
+
+
+
+
+```python
+y1_pred = clf_1.predict(x_test)
+```
+
+
+```python
+print("sklearn KNN classifier accuracy score: {:.2f}%".format(accuracy_score(y_test, y1_pred)*100))
+```
+
+    sklearn KNN classifier accuracy score: 97.37%
+    
+
+
+```python
+clf_1.predict_proba(x_test)
+```
+
+
+
+
+    array([[0.        , 0.        , 1.        ],
+           [0.        , 1.        , 0.        ],
+           [1.        , 0.        , 0.        ],
+           [0.        , 0.        , 1.        ],
+           [1.        , 0.        , 0.        ],
+           [0.        , 0.        , 1.        ],
+           [1.        , 0.        , 0.        ],
+           [0.        , 1.        , 0.        ],
+           [0.        , 1.        , 0.        ],
+           [0.        , 1.        , 0.        ],
+           [0.        , 0.        , 1.        ],
+           [0.        , 1.        , 0.        ],
+           [0.        , 1.        , 0.        ],
+           [0.        , 1.        , 0.        ],
+           [0.        , 0.66666667, 0.33333333],
+           [1.        , 0.        , 0.        ],
+           [0.        , 1.        , 0.        ],
+           [0.        , 1.        , 0.        ],
+           [1.        , 0.        , 0.        ],
+           [1.        , 0.        , 0.        ],
+           [0.        , 0.        , 1.        ],
+           [0.        , 1.        , 0.        ],
+           [1.        , 0.        , 0.        ],
+           [1.        , 0.        , 0.        ],
+           [0.        , 0.        , 1.        ],
+           [1.        , 0.        , 0.        ],
+           [1.        , 0.        , 0.        ],
+           [0.        , 1.        , 0.        ],
+           [0.        , 1.        , 0.        ],
+           [1.        , 0.        , 0.        ],
+           [0.        , 0.        , 1.        ],
+           [0.        , 1.        , 0.        ],
+           [1.        , 0.        , 0.        ],
+           [0.        , 0.33333333, 0.66666667],
+           [0.        , 0.        , 1.        ],
+           [0.        , 1.        , 0.        ],
+           [1.        , 0.        , 0.        ],
+           [0.        , 0.        , 1.        ]])
+
+
+
+### My KNN classifier
+
+
+```python
+clf_2 = KNN() #defaults to 3 nearest neighbors
+```
+
+
+```python
+clf_2.fit(x_train, y_train)
+```
+
+
+```python
+y2_pred = clf_2.predict(x_test)
+```
+
+
+```python
+print("My KNN classifier accuracy score: {:.2f}%".format(accuracy_score(y_test, y2_pred)*100))
+```
+
+    My KNN classifier accuracy score: 97.37%
+    
+
+
+```python
+clf_2.predict_proba(x_test)
+```
+
+
+
+
+    array([[0.        , 0.        , 1.        ],
+           [0.        , 1.        , 0.        ],
+           [1.        , 0.        , 0.        ],
+           [0.        , 0.        , 1.        ],
+           [1.        , 0.        , 0.        ],
+           [0.        , 0.        , 1.        ],
+           [1.        , 0.        , 0.        ],
+           [0.        , 1.        , 0.        ],
+           [0.        , 1.        , 0.        ],
+           [0.        , 1.        , 0.        ],
+           [0.        , 0.        , 1.        ],
+           [0.        , 1.        , 0.        ],
+           [0.        , 1.        , 0.        ],
+           [0.        , 1.        , 0.        ],
+           [0.        , 0.66666667, 0.33333333],
+           [1.        , 0.        , 0.        ],
+           [0.        , 1.        , 0.        ],
+           [0.        , 1.        , 0.        ],
+           [1.        , 0.        , 0.        ],
+           [1.        , 0.        , 0.        ],
+           [0.        , 0.        , 1.        ],
+           [0.        , 1.        , 0.        ],
+           [1.        , 0.        , 0.        ],
+           [1.        , 0.        , 0.        ],
+           [0.        , 0.        , 1.        ],
+           [1.        , 0.        , 0.        ],
+           [1.        , 0.        , 0.        ],
+           [0.        , 1.        , 0.        ],
+           [0.        , 1.        , 0.        ],
+           [1.        , 0.        , 0.        ],
+           [0.        , 0.        , 1.        ],
+           [0.        , 1.        , 0.        ],
+           [1.        , 0.        , 0.        ],
+           [0.        , 0.33333333, 0.66666667],
+           [0.        , 0.        , 1.        ],
+           [0.        , 1.        , 0.        ],
+           [1.        , 0.        , 0.        ],
+           [0.        , 0.        , 1.        ]])
+
+
+
+# CLASSIFICATION - breast cancer
+
+
+```python
+breast_data, breast_target = load_breast_cancer(True)
+breast_data.shape, breast_target.shape
+```
+
+
+
+
+    ((569, 30), (569,))
+
+
+
+
+```python
+x2_train, x2_test, y2_train, y2_test = train_test_split(
+            breast_data, breast_target, test_size=0.25, random_state=0, shuffle=True
+        )
+```
+
+### sklearn knn classifier
+
+
+```python
+clf_1 = KNeighborsClassifier(n_neighbors=3)
+```
+
+
+```python
+clf_1.fit(x2_train, y2_train);
+```
+
+
+```python
+y1_pred = clf_1.predict(x2_test)
+```
+
+
+```python
+print("sklearn KNN classifier accuracy score: {:.2f}%".format(accuracy_score(y2_test, y1_pred)*100))
+```
+
+    sklearn KNN classifier accuracy score: 92.31%
+    
+
+
+```python
+clf_1.predict_proba(x2_test)
+```
+
+
+
+
+    array([[0.66666667, 0.33333333],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.66666667, 0.33333333],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.66666667, 0.33333333],
+           [0.33333333, 0.66666667],
+           [0.        , 1.        ],
+           [0.66666667, 0.33333333],
+           [0.66666667, 0.33333333],
+           [1.        , 0.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [1.        , 0.        ],
+           [1.        , 0.        ],
+           [0.66666667, 0.33333333],
+           [0.66666667, 0.33333333],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [0.33333333, 0.66666667],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [0.        , 1.        ],
+           [0.66666667, 0.33333333],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [0.66666667, 0.33333333],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [0.        , 1.        ],
+           [0.66666667, 0.33333333],
+           [1.        , 0.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [1.        , 0.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [1.        , 0.        ],
+           [1.        , 0.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [0.33333333, 0.66666667],
+           [1.        , 0.        ],
+           [1.        , 0.        ],
+           [1.        , 0.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [1.        , 0.        ],
+           [1.        , 0.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [1.        , 0.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [0.33333333, 0.66666667],
+           [0.33333333, 0.66666667],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [0.66666667, 0.33333333],
+           [1.        , 0.        ],
+           [0.66666667, 0.33333333],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [1.        , 0.        ],
+           [0.33333333, 0.66666667],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.33333333, 0.66666667],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [0.        , 1.        ],
+           [0.33333333, 0.66666667],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [1.        , 0.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ]])
+
+
+
+### My KNN classifier
+
+
+```python
+clf_2 = KNN() #defaults to 3 nearest neighbors
+```
+
+
+```python
+clf_2.fit(x2_train, y2_train)
+```
+
+
+```python
+y2_pred = clf_2.predict(x2_test)
+```
+
+
+```python
+print("My KNN classifier accuracy score: {:.2f}%".format(accuracy_score(y2_test, y2_pred)*100))
+```
+
+    My KNN classifier accuracy score: 92.31%
+    
+
+
+```python
+clf_2.predict_proba(x2_test)
+```
+
+
+
+
+    array([[0.66666667, 0.33333333],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.66666667, 0.33333333],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.66666667, 0.33333333],
+           [0.33333333, 0.66666667],
+           [0.        , 1.        ],
+           [0.66666667, 0.33333333],
+           [0.66666667, 0.33333333],
+           [1.        , 0.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [1.        , 0.        ],
+           [1.        , 0.        ],
+           [0.66666667, 0.33333333],
+           [0.66666667, 0.33333333],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [0.33333333, 0.66666667],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [0.        , 1.        ],
+           [0.66666667, 0.33333333],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [0.66666667, 0.33333333],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [0.        , 1.        ],
+           [0.66666667, 0.33333333],
+           [1.        , 0.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [1.        , 0.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [1.        , 0.        ],
+           [1.        , 0.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [0.33333333, 0.66666667],
+           [1.        , 0.        ],
+           [1.        , 0.        ],
+           [1.        , 0.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [1.        , 0.        ],
+           [1.        , 0.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [1.        , 0.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [0.33333333, 0.66666667],
+           [0.33333333, 0.66666667],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [0.66666667, 0.33333333],
+           [1.        , 0.        ],
+           [0.66666667, 0.33333333],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [1.        , 0.        ],
+           [0.33333333, 0.66666667],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.33333333, 0.66666667],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [0.        , 1.        ],
+           [0.33333333, 0.66666667],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ],
+           [1.        , 0.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [0.        , 1.        ],
+           [1.        , 0.        ]])
+
+
+
+# REGRESSION - diabetes dataset
+
+
+```python
+rgr_data, rgr_target = load_diabetes(True)
+rgr_data.shape, rgr_target.shape
+```
+
+
+
+
+    ((442, 10), (442,))
+
+
+
+
+```python
+rx_train, rx_test, ry_train, ry_test = train_test_split(rgr_data, rgr_target, test_size=0.25, random_state=0, shuffle=True)
+```
+
+### sklearn KNN regressor
+
+
+```python
+regr_1 = KNeighborsRegressor(n_neighbors=3)
+regr_1.fit(rx_train, ry_train);
+```
+
+
+```python
+ry_pred = regr_1.predict(rx_test)
+```
+
+
+```python
+print("sklearn KNN regressor percentage RMSE: {:.2f}".format(mean_squared_error(ry_test, ry_pred)))
+```
+
+    sklearn KNN regressor percentage RMSE: 4232.01
+    
+
+### My KNN regressor
+
+
+```python
+regr_2 = KNN(k=3, mode="regression")
+regr_2.fit(rx_train, ry_train);
+```
+
+
+```python
+ry2_pred = regr_2.predict(rx_test)
+```
+
+
+```python
+print("My KNN regressor percentage RMSE: {:.2f}".format(mean_squared_error(ry_test, ry2_pred)))
+```
+
+    My KNN regressor percentage RMSE: 4232.01
+    
+
+
+```python
+
+```
diff --git a/oyelabi_paul_oluwadara/knn.py b/oyelabi_paul_oluwadara/knn.py
new file mode 100644
index 0000000..b868281
--- /dev/null
+++ b/oyelabi_paul_oluwadara/knn.py
@@ -0,0 +1,99 @@
+import numpy as np
+import statistics as st
+
+class KNN():
+    """
+    A light implementation of K nearest neighbors algorithm.
+    This implementation only supports euclidean and manhattan
+    distance metrics. It also provides the probability of
+    prediction.
+
+    parameters:
+       k: the numbers of neighbors.
+       method: euclidean/manhattan.
+       mode: classification/regression.
+    """
+
+    def __init__(self, k=3, mode="classification", method="euclidean"):
+        self.k = k
+        self.mode = mode
+        self.x = None
+        self.y = None
+        self.class_ = None
+        self.method = method
+
+        assert self.mode in ("classification", "regression"), "Unsupported mode."
+        assert self.method in ("euclidean", "manhattan"), "Unsupported method."
+
+    def fit(self, x, y):
+        if isinstance(x, np.ndarray) and isinstance(y, np.ndarray):
+            self.x = x
+            self.y = y
+        else:
+            self.x = np.array(x)
+            self.y = np.array(y)
+        self.class_ = np.unique(self.y)
+
+    def predict(self, x):
+        assert isinstance(self.x, np.ndarray) and isinstance(self.y, np.ndarray),\
+        "You need to train before predicting."
+        if (x.ndim < 2): raise Exception("Input not in right shape.")
+        return np.apply_along_axis(self.util, 1, x)
+
+    def predict_proba(self, x):
+        assert self.mode=="classification", "Method availabel only for classification."
+        assert isinstance(self.x, np.ndarray) and isinstance(self.y, np.ndarray),\
+        "You need to train before predicting."
+        return np.apply_along_axis(self.prob_util, 1, x)
+
+    def euclidean(self, x):
+        return np.argpartition(
+            np.linalg.norm(self.x-x, axis=1), self.k)[:self.k]
+        #np.sqrt(np.sum(np.square(self.x-x), 1))
+
+    def manhattan(self, x):
+        return np.argpartition(
+            np.linalg.norm(self.x-x, ord=1, axis=1), self.k)[:self.k]
+        #np.sqrt(np.sum(np.absolute(self.x-x), 1))
+
+    def prob_util(self, x):
+        idx = self.euclidean(x) if self.method=="euclidean" else self.manhattan(x)
+        cls_ = self.y[idx]
+        #pred = st.mode(cls_)
+        out = []
+        unique, count = np.unique(cls_, return_counts=True)
+        for i in self.class_:
+            try:
+                out.append(count[np.where(unique==i)][0]/len(cls_))
+            except IndexError:
+                out.append(0)
+        return out
+
+    def util(self, x):
+        idx = self.euclidean(x) if self.method=="euclidean" else self.manhattan(x)
+        cls_ = self.y[idx]
+        if self.mode=="classification":
+            try:
+                return st.mode(cls_)
+            except:
+                raise Exception(
+                    "no unique mode; found 2 equally common values"
+                    "You should consider the value of k with respect"
+                    "to the number of classes in your target variable."
+                    ) from None
+                
+        elif self.mode=="regression":
+            return st.mean(cls_)
+
+if __name__ == "__main__":
+    from sklearn.datasets import load_iris
+    from sklearn.model_selection import train_test_split
+    from sklearn.metrics import accuracy_score
+    data, target = load_iris(True)
+    x_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.25, random_state=0, shuffle=True)
+    clf_2 = KNN(method="manhattan")
+    clf_2.fit(x_train, y_train)
+    print(clf_2.class_)
+    y = clf_2.predict(X_test)
+    print("My KNN classifier accuracy score: {:.2f}%".format(accuracy_score(y_test, y)*100))
+    print(clf_2.predict_proba(X_test))

From 0a47284d4c687abf8d1d8e86edfb46918e2f12f2 Mon Sep 17 00:00:00 2001
From: Oyelabi Paul <59335237+Yodeman@users.noreply.github.com>
Date: Fri, 9 Jul 2021 09:05:55 +0100
Subject: [PATCH 02/10] Update dsnOAU.md

---
 oyelabi_paul_oluwadara/dsnOAU.md | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/oyelabi_paul_oluwadara/dsnOAU.md b/oyelabi_paul_oluwadara/dsnOAU.md
index c7a4095..13668ca 100644
--- a/oyelabi_paul_oluwadara/dsnOAU.md
+++ b/oyelabi_paul_oluwadara/dsnOAU.md
@@ -12,6 +12,30 @@ Below are runs of the sklearn and my implementation.
            k: the numbers of neighbors.
            method: euclidean/manhattan.
            mode: classification/regression.
+           
+        
+        fit method:
+            The KNN algorithm doesn't really learn, this method only
+            captures the training dataset with which test samples will 
+            be compared.
+            
+        predict method:
+            The prediction is made by comparing the distance of datapoints
+            in the training set to the the test sample. This method calls 
+            the util method which calculates the distance metric and then
+            assign the class with majority vote(with highest number of
+            occurence) to the test sample.
+          
+        predict_proba method:
+            This method works like the predict method, it calls the proba_util
+            method which performs almost same functionality as the util method
+            but it calculates the probability of the test belonging to what
+            predict mwthods performs.
+            
+        The argpartition method used in the distance metrics works by partitioning
+        the array into two, with the k least numbers on the left and others on the
+        right and then returns the indices of the k-least numbers, which is then 
+        used to index the target variable to get the corresponding labels.
     
 `clf_1 --> sklearn KNN classifier algorithm`
 

From ac7c8243d3cef51f3bb86d537c529f8f98d9639c Mon Sep 17 00:00:00 2001
From: Oyelabi Paul <59335237+Yodeman@users.noreply.github.com>
Date: Sat, 10 Jul 2021 21:36:49 +0100
Subject: [PATCH 03/10] Update dsnOAU.md

---
 oyelabi_paul_oluwadara/dsnOAU.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/oyelabi_paul_oluwadara/dsnOAU.md b/oyelabi_paul_oluwadara/dsnOAU.md
index 13668ca..0ee9378 100644
--- a/oyelabi_paul_oluwadara/dsnOAU.md
+++ b/oyelabi_paul_oluwadara/dsnOAU.md
@@ -3,7 +3,8 @@
 A light implementation of K nearest neighbors algorithm.
 This implementation only supports euclidean and manhattan
 distance metrics. It also provides the probability of
-prediction.
+prediction. I leveraged on Numpy and python built-in 
+statistics package.
     
 It provides common interface as the sklearn kNN algorithm.
 Below are runs of the sklearn and my implementation.

From 789ea4bf75e74197995e2360f0542a5e6cd282d3 Mon Sep 17 00:00:00 2001
From: Oyelabi Paul <59335237+Yodeman@users.noreply.github.com>
Date: Thu, 15 Jul 2021 17:31:26 +0100
Subject: [PATCH 04/10] Create readme.md

---
 oyelabi_paul_oluwadara/readme.md | 81 ++++++++++++++++++++++++++++++++
 1 file changed, 81 insertions(+)
 create mode 100644 oyelabi_paul_oluwadara/readme.md

diff --git a/oyelabi_paul_oluwadara/readme.md b/oyelabi_paul_oluwadara/readme.md
new file mode 100644
index 0000000..7165b8e
--- /dev/null
+++ b/oyelabi_paul_oluwadara/readme.md
@@ -0,0 +1,81 @@
+# A Light Implementation of K neares neighbors.
+
+A light implementation of K nearest neighbors algorithm.
+This implementation only supports euclidean and manhattan
+distance metrics. It also provides the probability of
+prediction. I leveraged on Numpy and python built-in 
+statistics package.
+    
+It provides common interface as the sklearn kNN algorithm.
+Below are runs of the sklearn and my implementation.
+
+The KNN algorithm works by calculating how close datapoints are
+to one another by calculating the distance between two datapoints
+using distance metrics like euclidean distance and then select K
+closest datapoints. It then assign the class with majority vote in
+the K selected datapoints to the new sample the algorithm is trying
+to classify. And in terms of regression, it calculates the mean of
+the K selected datapoints to the new sample.
+       
+       parameters:
+           k: the numbers of neighbors.
+           method: euclidean/manhattan.
+           mode: classification/regression.
+           
+        
+        fit method:
+            Since the KNN algorithm doesn't really learn, this method only
+            captures the predictors (X) and target (Y) with which test samples
+            will be compared.
+            
+        predict method:
+            The prediction is made by comparing the distance of datapoints
+            in the training set to the the test sample. This method calls 
+            the util method which calculates the distance metric and then
+            assign the class with majority vote(with highest number of
+            occurence) to the test sample.
+          
+        predict_proba method:
+            This method works like the predict method, it calls the proba_util
+            method which performs almost same functionality as the util method
+            but it calculates the probability of the test belonging to what
+            predict mwthods performs.
+            
+        euclidean method:
+            This method calculates the euclidean distance between datapoints using
+            the numpy linear algebra method. I used the numpy argpartition method to
+            sort the calculated distance and then it returns the indices of the K
+            closest (smallest value) distances.
+        
+        manhattan method:
+            This method calculates the manhattan distance between datapoints using
+            the numpy linear algebra method. I used the numpy argpartition method to
+            sort the calculated distance and then it returns the indices of the K
+            closest (smallest value) distances.
+            
+        util method:
+             This method calls either euclidean or manhattan method according to the
+             method parameter passed to the class constructor. It gets the indices of
+             the K closest datapoints which it then uses to index the target varible
+             to get the corresponding class label. It then calculates the majority vote
+             using the python built-in statistics mode method. If the mode parameter is 
+             set to `regression` this method returns the mean of the K nearest datapoints.
+             
+             `I could have assigned class randomly whenever there is a tie in the majority
+              vote, but instead, I raised a warning and then advise the data scientist to
+              consider seecting the K value with respect to the number of classes (i.e. an
+              even value for K whenever the number of class in target varible is odd, and an
+              odd value for K whenever that number of class in target variable is even) in the
+              target varible as this is a best practice to ensure good results.`
+              
+         prob_util method:
+             This method is only available during classification mode. This method works exactly
+             like the util method but it returns the probability of a test sample belonging to 
+             the predicted class. It acheive this by dividing the total number of occurence
+             of each class in the K nearest datapoints by K.
+            
+        The argpartition method used in the distance metrics works by partitioning
+        the array into two, with the k least numbers on the left and others on the
+        right and then returns the indices of the k-least numbers, which is then 
+        used to index the target variable to get the corresponding labels.
+    

From 10edb5980f0d211f3f5b17ab280f84b708dd1c03 Mon Sep 17 00:00:00 2001
From: Oyelabi Paul <59335237+Yodeman@users.noreply.github.com>
Date: Thu, 15 Jul 2021 17:35:16 +0100
Subject: [PATCH 05/10] Update readme.md

---
 oyelabi_paul_oluwadara/readme.md | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/oyelabi_paul_oluwadara/readme.md b/oyelabi_paul_oluwadara/readme.md
index 7165b8e..7d0c620 100644
--- a/oyelabi_paul_oluwadara/readme.md
+++ b/oyelabi_paul_oluwadara/readme.md
@@ -43,23 +43,23 @@ the K selected datapoints to the new sample.
             
         euclidean method:
             This method calculates the euclidean distance between datapoints using
-            the numpy linear algebra method. I used the numpy argpartition method to
-            sort the calculated distance and then it returns the indices of the K
-            closest (smallest value) distances.
+            the numpy linear algebra method. I used the numpy argpartition method
+            to sort the calculated distance and then it returns the indices of the
+            K closest (smallest value) distances.
         
         manhattan method:
             This method calculates the manhattan distance between datapoints using
-            the numpy linear algebra method. I used the numpy argpartition method to
-            sort the calculated distance and then it returns the indices of the K
-            closest (smallest value) distances.
+            the numpy linear algebra method. I used the numpy argpartition method
+            to sort the calculated distance and then it returns the indices of the
+            K closest (smallest value) distances.
             
         util method:
              This method calls either euclidean or manhattan method according to the
              method parameter passed to the class constructor. It gets the indices of
              the K closest datapoints which it then uses to index the target varible
-             to get the corresponding class label. It then calculates the majority vote
-             using the python built-in statistics mode method. If the mode parameter is 
-             set to `regression` this method returns the mean of the K nearest datapoints.
+             to get the corresponding class label. It then calculates the majority
+             vote using the python built-in statistics mode method. If the mode parameter
+             is set to `regression` this method returns the mean of the K nearest datapoints.
              
              `I could have assigned class randomly whenever there is a tie in the majority
               vote, but instead, I raised a warning and then advise the data scientist to

From afa1fddf3e9834e3ce8066092e55784c4e07bd32 Mon Sep 17 00:00:00 2001
From: Oyelabi Paul <59335237+Yodeman@users.noreply.github.com>
Date: Thu, 15 Jul 2021 17:49:47 +0100
Subject: [PATCH 06/10] Update readme.md

---
 oyelabi_paul_oluwadara/readme.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/oyelabi_paul_oluwadara/readme.md b/oyelabi_paul_oluwadara/readme.md
index 7d0c620..43b28ef 100644
--- a/oyelabi_paul_oluwadara/readme.md
+++ b/oyelabi_paul_oluwadara/readme.md
@@ -79,3 +79,4 @@ the K selected datapoints to the new sample.
         right and then returns the indices of the k-least numbers, which is then 
         used to index the target variable to get the corresponding labels.
     
+### Some tests are found in [dsnOAU.md](https://github.com/Yodeman/ML-Algorithm-Challenge/edit/main/oyelabi_paul_oluwadara/readme.md)

From 791c611b0ed50f7adbc3e304d5b50256ad7f60b2 Mon Sep 17 00:00:00 2001
From: Oyelabi Paul <59335237+Yodeman@users.noreply.github.com>
Date: Thu, 15 Jul 2021 17:50:37 +0100
Subject: [PATCH 07/10] Update readme.md

---
 oyelabi_paul_oluwadara/readme.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/oyelabi_paul_oluwadara/readme.md b/oyelabi_paul_oluwadara/readme.md
index 43b28ef..58954ff 100644
--- a/oyelabi_paul_oluwadara/readme.md
+++ b/oyelabi_paul_oluwadara/readme.md
@@ -79,4 +79,4 @@ the K selected datapoints to the new sample.
         right and then returns the indices of the k-least numbers, which is then 
         used to index the target variable to get the corresponding labels.
     
-### Some tests are found in [dsnOAU.md](https://github.com/Yodeman/ML-Algorithm-Challenge/edit/main/oyelabi_paul_oluwadara/readme.md)
+### Some tests are found in [dsnOAU.md](https://github.com/Yodeman/ML-Algorithm-Challenge/edit/main/oyelabi_paul_oluwadara/dsnOAU.md)

From e538fc66d3537e738cc8c3fae431e891086afa24 Mon Sep 17 00:00:00 2001
From: Oyelabi Paul <59335237+Yodeman@users.noreply.github.com>
Date: Thu, 15 Jul 2021 17:51:25 +0100
Subject: [PATCH 08/10] Update readme.md

---
 oyelabi_paul_oluwadara/readme.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/oyelabi_paul_oluwadara/readme.md b/oyelabi_paul_oluwadara/readme.md
index 58954ff..22d1fb0 100644
--- a/oyelabi_paul_oluwadara/readme.md
+++ b/oyelabi_paul_oluwadara/readme.md
@@ -79,4 +79,4 @@ the K selected datapoints to the new sample.
         right and then returns the indices of the k-least numbers, which is then 
         used to index the target variable to get the corresponding labels.
     
-### Some tests are found in [dsnOAU.md](https://github.com/Yodeman/ML-Algorithm-Challenge/edit/main/oyelabi_paul_oluwadara/dsnOAU.md)
+### Some tests are found in [dsnOAU.md](https://github.com/Yodeman/ML-Algorithm-Challenge/main/oyelabi_paul_oluwadara/dsnOAU.md)

From 2da5401d63e7b1c375c47c51e5ea16a54090e42f Mon Sep 17 00:00:00 2001
From: Oyelabi Paul <59335237+Yodeman@users.noreply.github.com>
Date: Thu, 15 Jul 2021 17:52:32 +0100
Subject: [PATCH 09/10] Update readme.md

---
 oyelabi_paul_oluwadara/readme.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/oyelabi_paul_oluwadara/readme.md b/oyelabi_paul_oluwadara/readme.md
index 22d1fb0..a5278af 100644
--- a/oyelabi_paul_oluwadara/readme.md
+++ b/oyelabi_paul_oluwadara/readme.md
@@ -79,4 +79,4 @@ the K selected datapoints to the new sample.
         right and then returns the indices of the k-least numbers, which is then 
         used to index the target variable to get the corresponding labels.
     
-### Some tests are found in [dsnOAU.md](https://github.com/Yodeman/ML-Algorithm-Challenge/main/oyelabi_paul_oluwadara/dsnOAU.md)
+### Some tests are found in [dsnOAU.md](https://github.com/Yodeman/ML-Algorithm-Challenge/blob/main/oyelabi_paul_oluwadara/dsnOAU.md)

From 613b825edf01a595c117ff0715e310572d2276f7 Mon Sep 17 00:00:00 2001
From: Oyelabi Paul <59335237+Yodeman@users.noreply.github.com>
Date: Fri, 16 Jul 2021 09:06:26 +0100
Subject: [PATCH 10/10] Update dsnOAU.md

---
 oyelabi_paul_oluwadara/dsnOAU.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/oyelabi_paul_oluwadara/dsnOAU.md b/oyelabi_paul_oluwadara/dsnOAU.md
index 0ee9378..5198daf 100644
--- a/oyelabi_paul_oluwadara/dsnOAU.md
+++ b/oyelabi_paul_oluwadara/dsnOAU.md
@@ -652,10 +652,10 @@ ry_pred = regr_1.predict(rx_test)
 
 
 ```python
-print("sklearn KNN regressor percentage RMSE: {:.2f}".format(mean_squared_error(ry_test, ry_pred)))
+print("sklearn KNN regressor RMSE: {:.2f}".format(mean_squared_error(ry_test, ry_pred)))
 ```
 
-    sklearn KNN regressor percentage RMSE: 4232.01
+    sklearn KNN regressor RMSE: 4232.01
     
 
 ### My KNN regressor
@@ -673,10 +673,10 @@ ry2_pred = regr_2.predict(rx_test)
 
 
 ```python
-print("My KNN regressor percentage RMSE: {:.2f}".format(mean_squared_error(ry_test, ry2_pred)))
+print("My KNN regresRMSE: {:.2f}".format(mean_squared_error(ry_test, ry2_pred)))
 ```
 
-    My KNN regressor percentage RMSE: 4232.01
+    My KNN regressor RMSE: 4232.01