Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
301 changes: 301 additions & 0 deletions Lydiaaa.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,301 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "cbe3555e",
"metadata": {},
"outputs": [],
"source": [
"#importing the libraries\n",
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn import datasets\n",
"from collections import Counter\n",
"from matplotlib.colors import ListedColormap"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2d7ec577",
"metadata": {},
"outputs": [],
"source": [
"#About the dataset\n",
"#this dataset contains the information of the users of social network, \n",
"#it has several business clients. one of the user works in a car company and\n",
"#just launched their brand new SUV for a token.\n",
"#we are going to build a predict model to see if other users will get or not.\n",
"#based on two columns age and the estimated salaries\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0a177dc9",
"metadata": {},
"outputs": [],
"source": [
"#importing the dataset\n",
"dataset = pd.read_csv('Social_Network_Ads.csv') "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8ec1ddc6",
"metadata": {},
"outputs": [],
"source": [
"cmap = ListedColormap(['red', 'blue'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "81cb85d8",
"metadata": {},
"outputs": [],
"source": [
"X = dataset.iloc[:, [2, 3]].values\n",
"y = dataset.iloc[:, 4].values"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f428132c",
"metadata": {},
"outputs": [],
"source": [
"#Splitting the dataset into train set and test set\n",
"X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.25, random_state = 124)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7a544025",
"metadata": {},
"outputs": [],
"source": [
"#Feature Scaling\n",
"from sklearn.preprocessing import StandardScaler\n",
"sc_x = StandardScaler()\n",
"X_train = sc_x.fit_transform(X_train)\n",
"X_test = sc_x.transform(X_test)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d73937aa",
"metadata": {},
"outputs": [],
"source": [
"dataset.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "23d66b87",
"metadata": {},
"outputs": [],
"source": [
"dataset.isnull().sum()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dcc5cca4",
"metadata": {},
"outputs": [],
"source": [
"dataset.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "74fd6605",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.preprocessing import LabelEncoder\n",
"le = LabelEncoder()\n",
"dataset['Gender'] = le.fit_transform(dataset['Gender'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "17ba9cfe",
"metadata": {},
"outputs": [],
"source": [
"dataset.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2b98d3fa",
"metadata": {},
"outputs": [],
"source": [
"dataset.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6b42ebce",
"metadata": {},
"outputs": [],
"source": [
"print(X_train.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "18bfb844",
"metadata": {},
"outputs": [],
"source": [
"print(X_train[0])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "69faeee0",
"metadata": {},
"outputs": [],
"source": [
"print(y_train.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8c2237e4",
"metadata": {},
"outputs": [],
"source": [
"print(y_train)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b1f4d8fc",
"metadata": {},
"outputs": [],
"source": [
"#Visualiising the dataset\n",
"plt.figure()\n",
"plt.scatter(X[:, 0], X[:,1], c=y, cmap=cmap, edgecolor ='k', s =20)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7423a20e",
"metadata": {},
"outputs": [],
"source": [
"#calculating euclidean distance\n",
"def euclidean_distance(a, b):\n",
" p = 2\n",
" return np.sqrt(np.sum((a-b)**p))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9850c7f2",
"metadata": {},
"outputs": [],
"source": [
"class KNN:\n",
" def __init__(self, k =2):\n",
" self.k = k\n",
" def fit(self, X, y):\n",
" self.X_train = X\n",
" self.y_train = y\n",
" def predict(self, X):\n",
" predicted_labels = [self._predict(x) for x in X]\n",
" return np.array(predicted_labels)\n",
" def _predict(self, x):\n",
" #Compute the distances\n",
" distances =[euclidean_distance(x, x_train) for x_train in self.X_train]\n",
" #get the nearest values of k\n",
" k_values = np.argsort(distances)[:self.k]\n",
" k_nearest_values = [self.y_train[i] for i in k_values]\n",
" #Get the most common class\n",
" most_common = Counter(k_nearest_values).most_common(1)\n",
" return most_common[0][0]\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f5e5f294",
"metadata": {},
"outputs": [],
"source": [
"model = KNN( k =16)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b36fcb84",
"metadata": {},
"outputs": [],
"source": [
"#Getting the prediction and accuracy\n",
"model.fit(X_train, y_train)\n",
"predictions = model.predict(X_test)\n",
"accuracy = np.sum(predictions == y_test)/len(y_test)\n",
"print (accuracy)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "29fd98ca",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading