Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions ismail_nurudeen/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
## KNN Algorithm from scratch.

**K Nearest Neighbor** is a simple supervised machine learning algorithm.

K-nearest neighbors (KNN) algorithm uses ‘feature similarity’ to predict the values of new datapoints.
This means that the new data point will be assigned a value based on how closely it matches the points in the training set.


In my implementation of the algorithm, there are three major steps:

Step 1 − Initialized the classifier with the number of nearest neighbors (K).

Step 2 − Fit - Initialized the train and test data.

Step 3 − Predict - Takes the test data and performs the following operations to return a prediction;

* Loops over the test data and calculates the distance between test data and each row of training data using the Euclidean Distance Method.
* Sorts the distance gotten from step 1 in ascending order.
* Next, It gets the position of the first K rows (neighbors).
* Then, it will assign a class to the test point based on most frequent class of these rows.
* Returns the label of the most frequent as the prediction.

## Usage

```python
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from knn_classifier import KNN

iris = load_iris()
data = iris.data
target = iris.target

X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=43)

clf = KNN(K = 4) # By Default K = 3
clf.fit(X_train, y_train)

predictions = clf.predict(X_test)

print('Accuracy:', accuracy_score(y_test, predictions))
```
99 changes: 99 additions & 0 deletions ismail_nurudeen/knn_classifier.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 36,
"id": "e1b89bac",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[1]"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import numpy as np\n",
"import operator\n",
"\n",
"# calculate euclidean distance\n",
"def euc_dist(x1, x2):\n",
" return np.sqrt(np.sum((x1-x2)**2))\n",
"\n",
"class KNN():\n",
" \n",
" def __init__(self, K = 3):\n",
" if(K < 1):\n",
" raise Exception('K must be grater than 0')\n",
" self.K = K\n",
"\n",
" def fit(self, x_train, y_train):\n",
" self.X_train = x_train\n",
" self.y_train = y_train\n",
"\n",
" def predict(self, X_test):\n",
" \n",
" # list to store all our predictions\n",
" predictions = []\n",
" \n",
" # loop over all observations\n",
" for i in range(len(X_test)): \n",
" \n",
" # calculate the distance between the test point and all other points in the training set\n",
" dist = np.array([euc_dist(X_test[i], x) for x in self.X_train])\n",
" \n",
" # sort the distances and return the positions of the first K neighbors\n",
" dist_sorted = dist.argsort()[:self.K]\n",
" \n",
" neighbor_votes = {}\n",
"\n",
" # for each neighbor find the class and return the most voted.\n",
" for d in dist_sorted:\n",
" if self.y_train[d] in neighbor_votes:\n",
" neighbor_votes[self.y_train[d]] += 1\n",
" else:\n",
" neighbor_votes[self.y_train[d]] = 1\n",
" \n",
" # get the most common class label \n",
" sorted_neighbors = sorted(neighbor_votes.items(), key=operator.itemgetter(1), reverse=True)\n",
" \n",
" predictions.append(sorted_neighbors[0][0])\n",
" return predictions"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "02b69260",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
46 changes: 46 additions & 0 deletions ismail_nurudeen/knn_classifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import numpy as np
import operator

# calculate euclidean distance
def euc_dist(x1, x2):
return np.sqrt(np.sum((x1-x2)**2))

class KNN():

def __init__(self, K = 3):
if(K < 1):
raise Exception('K must be grater than 0')
self.K = K

def fit(self, x_train, y_train):
self.X_train = x_train
self.y_train = y_train

def predict(self, X_test):

# list to store all our predictions
predictions = []

# loop over all observations
for i in range(len(X_test)):

# calculate the distance between the test point and all other points in the training set
dist = np.array([euc_dist(X_test[i], x) for x in self.X_train])

# sort the distances and return the positions of the first K neighbors
dist_sorted = dist.argsort()[:self.K]

neighbor_votes = {}

# for each neighbor find the class and return the most voted.
for d in dist_sorted:
if self.y_train[d] in neighbor_votes:
neighbor_votes[self.y_train[d]] += 1
else:
neighbor_votes[self.y_train[d]] = 1

# get the most common class label
sorted_neighbors = sorted(neighbor_votes.items(), key=operator.itemgetter(1), reverse=True)

predictions.append(sorted_neighbors[0][0])
return predictions