datascienceife · Lydia02 · Jul 9, 2021 · Jul 9, 2021
diff --git a/Lydiaaa.ipynb b/Lydiaaa.ipynb
@@ -0,0 +1,301 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cbe3555e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#importing the libraries\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn import datasets\n",
+    "from collections import Counter\n",
+    "from matplotlib.colors import ListedColormap"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2d7ec577",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#About the  dataset\n",
+    "#this dataset contains the information of the users of social network, \n",
+    "#it has several business clients. one of the user works in a car company and\n",
+    "#just launched their brand new SUV for a token.\n",
+    "#we are going to build a predict model to see if other users will get or not.\n",
+    "#based on  two columns age and the estimated salaries\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0a177dc9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#importing the dataset\n",
+    "dataset = pd.read_csv('Social_Network_Ads.csv') "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8ec1ddc6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cmap = ListedColormap(['red', 'blue'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "81cb85d8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X = dataset.iloc[:, [2, 3]].values\n",
+    "y = dataset.iloc[:, 4].values"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f428132c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Splitting the dataset into train set and test set\n",
+    "X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.25, random_state = 124)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7a544025",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Feature Scaling\n",
+    "from sklearn.preprocessing import StandardScaler\n",
+    "sc_x = StandardScaler()\n",
+    "X_train = sc_x.fit_transform(X_train)\n",
+    "X_test = sc_x.transform(X_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d73937aa",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataset.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "23d66b87",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataset.isnull().sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dcc5cca4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataset.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "74fd6605",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.preprocessing import LabelEncoder\n",
+    "le = LabelEncoder()\n",
+    "dataset['Gender'] = le.fit_transform(dataset['Gender'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "17ba9cfe",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataset.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2b98d3fa",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataset.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6b42ebce",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(X_train.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "18bfb844",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(X_train[0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "69faeee0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(y_train.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8c2237e4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(y_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b1f4d8fc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Visualiising the dataset\n",
+    "plt.figure()\n",
+    "plt.scatter(X[:, 0], X[:,1], c=y, cmap=cmap, edgecolor ='k', s =20)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7423a20e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#calculating euclidean distance\n",
+    "def euclidean_distance(a, b):\n",
+    "    p = 2\n",
+    "    return np.sqrt(np.sum((a-b)**p))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9850c7f2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class KNN:\n",
+    "    def __init__(self, k =2):\n",
+    "        self.k = k\n",
+    "    def fit(self, X, y):\n",
+    "        self.X_train = X\n",
+    "        self.y_train = y\n",
+    "    def predict(self, X):\n",
+    "        predicted_labels = [self._predict(x) for x in X]\n",
+    "        return np.array(predicted_labels)\n",
+    "    def _predict(self, x):\n",
+    "        #Compute the distances\n",
+    "        distances =[euclidean_distance(x, x_train) for x_train in self.X_train]\n",
+    "        #get the nearest values of k\n",
+    "        k_values = np.argsort(distances)[:self.k]\n",
+    "        k_nearest_values = [self.y_train[i] for i in k_values]\n",
+    "        #Get the most common class\n",
+    "        most_common = Counter(k_nearest_values).most_common(1)\n",
+    "        return most_common[0][0]\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f5e5f294",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = KNN( k =16)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b36fcb84",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Getting the prediction and accuracy\n",
+    "model.fit(X_train, y_train)\n",
+    "predictions = model.predict(X_test)\n",
+    "accuracy = np.sum(predictions == y_test)/len(y_test)\n",
+    "print (accuracy)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "29fd98ca",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}