From 20c6bb3a5f73d543abc07ae5a8559e5c35acc40f Mon Sep 17 00:00:00 2001 From: Arnab Ghosh <43007068+ArnabG99@users.noreply.github.com> Date: Wed, 26 Sep 2018 19:08:46 +0530 Subject: [PATCH 01/12] Created using Colaboratory --- Basic_Pandas.ipynb | 790 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 790 insertions(+) create mode 100644 Basic_Pandas.ipynb diff --git a/Basic_Pandas.ipynb b/Basic_Pandas.ipynb new file mode 100644 index 0000000..b2b8a30 --- /dev/null +++ b/Basic_Pandas.ipynb @@ -0,0 +1,790 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "Basic Pandas.ipynb", + "version": "0.3.2", + "provenance": [], + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "[View in Colaboratory](https://colab.research.google.com/github/ArnabG99/Assignment-3/blob/ArnabG99/Basic_Pandas.ipynb)" + ] + }, + { + "metadata": { + "id": "cGbE814_Xaf9", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "# Pandas\n", + "\n", + "Pandas is an open-source, BSD-licensed Python library providing high-performance, easy-to-use data structures and data analysis tools for the Python programming language. Python with Pandas is used in a wide range of fields including academic and commercial domains including finance, economics, Statistics, analytics, etc.In this tutorial, we will learn the various features of Python Pandas and how to use them in practice.\n", + "\n", + "\n", + "## Import pandas and numpy" + ] + }, + { + "metadata": { + "id": "irlVYeeAXPDL", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "import pandas as pd\n", + "import numpy as np" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "BI2J-zdMbGwE", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "### This is your playground feel free to explore other functions on pandas\n", + "\n", + "#### Create Series from numpy array, list and dict\n", + "\n", + "Don't know what a series is?\n", + "\n", + "[Series Doc](https://pandas.pydata.org/pandas-docs/version/0.22/generated/pandas.Series.html)" + ] + }, + { + "metadata": { + "id": "GeEct691YGE3", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 139 + }, + "outputId": "b6e56093-cc64-4332-bc7d-94f9a83a1390" + }, + "cell_type": "code", + "source": [ + "a_ascii = ord('A')\n", + "z_ascii = ord('Z')\n", + "alphabets = [chr(i) for i in range(a_ascii, z_ascii+1)]\n", + "\n", + "print(alphabets)\n", + "\n", + "numbers = np.arange(26)\n", + "\n", + "print(numbers)\n", + "\n", + "print(type(alphabets), type(numbers))\n", + "\n", + "alpha_numbers = dict(zip(alphabets, numbers))\n", + "\n", + "print(alpha_numbers)\n", + "\n", + "print(type(alpha_numbers))" + ], + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "text": [ + "['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']\n", + "[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23\n", + " 24 25]\n", + " \n", + "{'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 4, 'F': 5, 'G': 6, 'H': 7, 'I': 8, 'J': 9, 'K': 10, 'L': 11, 'M': 12, 'N': 13, 'O': 14, 'P': 15, 'Q': 16, 'R': 17, 'S': 18, 'T': 19, 'U': 20, 'V': 21, 'W': 22, 'X': 23, 'Y': 24, 'Z': 25}\n", + "\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "6ouDfjWab_Mc", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 476 + }, + "outputId": "6c7a68a3-6b81-4c60-c475-b6c78cce0f3a" + }, + "cell_type": "code", + "source": [ + "series1 = pd.Series(alphabets)\n", + "print(series1)" + ], + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "text": [ + "0 A\n", + "1 B\n", + "2 C\n", + "3 D\n", + "4 E\n", + "5 F\n", + "6 G\n", + "7 H\n", + "8 I\n", + "9 J\n", + "10 K\n", + "11 L\n", + "12 M\n", + "13 N\n", + "14 O\n", + "15 P\n", + "16 Q\n", + "17 R\n", + "18 S\n", + "19 T\n", + "20 U\n", + "21 V\n", + "22 W\n", + "23 X\n", + "24 Y\n", + "25 Z\n", + "dtype: object\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "At7nY7vVcBZ3", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 476 + }, + "outputId": "a1a2e548-1a41-4cb6-95ad-9e293dfb1011" + }, + "cell_type": "code", + "source": [ + "series2 = pd.Series(numbers)\n", + "print(series2)" + ], + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "text": [ + "0 0\n", + "1 1\n", + "2 2\n", + "3 3\n", + "4 4\n", + "5 5\n", + "6 6\n", + "7 7\n", + "8 8\n", + "9 9\n", + "10 10\n", + "11 11\n", + "12 12\n", + "13 13\n", + "14 14\n", + "15 15\n", + "16 16\n", + "17 17\n", + "18 18\n", + "19 19\n", + "20 20\n", + "21 21\n", + "22 22\n", + "23 23\n", + "24 24\n", + "25 25\n", + "dtype: int64\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "J5z-2CWAdH6N", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 476 + }, + "outputId": "b6e96e60-a159-4840-8f6c-869f94bf4958" + }, + "cell_type": "code", + "source": [ + "series3 = pd.Series(alpha_numbers)\n", + "print(series3)" + ], + "execution_count": 5, + "outputs": [ + { + "output_type": "stream", + "text": [ + "A 0\n", + "B 1\n", + "C 2\n", + "D 3\n", + "E 4\n", + "F 5\n", + "G 6\n", + "H 7\n", + "I 8\n", + "J 9\n", + "K 10\n", + "L 11\n", + "M 12\n", + "N 13\n", + "O 14\n", + "P 15\n", + "Q 16\n", + "R 17\n", + "S 18\n", + "T 19\n", + "U 20\n", + "V 21\n", + "W 22\n", + "X 23\n", + "Y 24\n", + "Z 25\n", + "dtype: int64\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "fYzblGGudKjO", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 119 + }, + "outputId": "70d2e0fb-e4b1-41c1-d8d1-47f72a8c8c98" + }, + "cell_type": "code", + "source": [ + "#replace head() with head(n) where n can be any number between [0-25] and observe the output in deach case \n", + "series3.head()" + ], + "execution_count": 6, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "A 0\n", + "B 1\n", + "C 2\n", + "D 3\n", + "E 4\n", + "dtype: int64" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 6 + } + ] + }, + { + "metadata": { + "id": "OwsJIf5feTtg", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Create DataFrame from lists\n", + "\n", + "[DataFrame Doc](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html)" + ] + }, + { + "metadata": { + "id": "73UTZ07EdWki", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 855 + }, + "outputId": "208099a4-81d7-41cd-9193-aca56fad1761" + }, + "cell_type": "code", + "source": [ + "data = {'alphabets': alphabets, 'values': numbers}\n", + "\n", + "df = pd.DataFrame(data)\n", + "\n", + "#Lets Change the column `values` to `alpha_numbers`\n", + "\n", + "#df.columns = ['alphabets', 'alpha_numbers']\n", + "\n", + "df" + ], + "execution_count": 7, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
alphabetsvalues
0A0
1B1
2C2
3D3
4E4
5F5
6G6
7H7
8I8
9J9
10K10
11L11
12M12
13N13
14O14
15P15
16Q16
17R17
18S18
19T19
20U20
21V21
22W22
23X23
24Y24
25Z25
\n", + "
" + ], + "text/plain": [ + " alphabets values\n", + "0 A 0\n", + "1 B 1\n", + "2 C 2\n", + "3 D 3\n", + "4 E 4\n", + "5 F 5\n", + "6 G 6\n", + "7 H 7\n", + "8 I 8\n", + "9 J 9\n", + "10 K 10\n", + "11 L 11\n", + "12 M 12\n", + "13 N 13\n", + "14 O 14\n", + "15 P 15\n", + "16 Q 16\n", + "17 R 17\n", + "18 S 18\n", + "19 T 19\n", + "20 U 20\n", + "21 V 21\n", + "22 W 22\n", + "23 X 23\n", + "24 Y 24\n", + "25 Z 25" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 7 + } + ] + }, + { + "metadata": { + "id": "uaK_1EO9etGS", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 140 + }, + "outputId": "b7e765dc-4d30-4bbb-a43b-485eb592b40d" + }, + "cell_type": "code", + "source": [ + "# transpose\n", + "\n", + "df.T\n", + "\n", + "# there are many more operations which we can perform look at the documentation with the subsequent exercises we will learn more" + ], + "execution_count": 8, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0123456789...16171819202122232425
alphabetsABCDEFGHIJ...QRSTUVWXYZ
values0123456789...16171819202122232425
\n", + "

2 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 4 5 6 7 8 9 ... 16 17 18 19 20 21 22 23 \\\n", + "alphabets A B C D E F G H I J ... Q R S T U V W X \n", + "values 0 1 2 3 4 5 6 7 8 9 ... 16 17 18 19 20 21 22 23 \n", + "\n", + " 24 25 \n", + "alphabets Y Z \n", + "values 24 25 \n", + "\n", + "[2 rows x 26 columns]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 8 + } + ] + }, + { + "metadata": { + "id": "ZYonoaW8gEAJ", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Extract Items from a series" + ] + }, + { + "metadata": { + "id": "tc1-KX_Bfe7U", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "ser = pd.Series(list('abcdefghijklmnopqrstuvwxyz'))\n", + "pos = [0, 4, 8, 14, 20]\n", + "\n", + "vowels = ser.take(pos)\n", + "\n", + "df = pd.DataFrame(vowels)#, columns=['vowels'])\n", + "\n", + "df.columns = ['vowels']\n", + "\n", + "#df.index = [0, 1, 2, 3, 4]\n", + "\n", + "df" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "cmDxwtDNjWpO", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Change the first character of each word to upper case in each word of ser" + ] + }, + { + "metadata": { + "id": "5KagP9PpgV2F", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "ser = pd.Series(['we', 'are', 'learning', 'pandas'])\n", + "\n", + "ser.map(lambda x : x.title())\n", + "\n", + "titles = [i.title() for i in ser]\n", + "\n", + "titles" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "qn47ee-MkZN8", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Reindexing" + ] + }, + { + "metadata": { + "id": "h5R0JL2NjuFS", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "my_index = [1, 2, 3, 4, 5]\n", + "\n", + "df1 = pd.DataFrame({'upper values': ['A', 'B', 'C', 'D', 'E'],\n", + " 'lower values': ['a', 'b', 'c', 'd', 'e']},\n", + " index = my_index)\n", + "\n", + "df1" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "G_Frvc3mk93k", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "new_index = [2, 5, 4, 3, 1]\n", + "\n", + "df1.reindex(index = new_index)" + ], + "execution_count": 0, + "outputs": [] + } + ] +} \ No newline at end of file From db81c390fcf3c2d3c1d0e98735a3b03c23cac088 Mon Sep 17 00:00:00 2001 From: Arnab Ghosh <43007068+ArnabG99@users.noreply.github.com> Date: Sat, 29 Sep 2018 02:57:02 +0530 Subject: [PATCH 02/12] Created using Colaboratory --- ArnabG99.ipynb | 1082 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 1052 insertions(+), 30 deletions(-) diff --git a/ArnabG99.ipynb b/ArnabG99.ipynb index 9e2543a..c702352 100644 --- a/ArnabG99.ipynb +++ b/ArnabG99.ipynb @@ -1,32 +1,1054 @@ { - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "ArnabG99.ipynb", + "version": "0.3.2", + "provenance": [], + "include_colab_link": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + } }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "[View in Colaboratory](https://colab.research.google.com/github/ArnabG99/Assignment-3/blob/ArnabG99/ArnabG99.ipynb)" + ] + }, + { + "metadata": { + "id": "cGbE814_Xaf9", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "# Pandas\n", + "\n", + "Pandas is an open-source, BSD-licensed Python library providing high-performance, easy-to-use data structures and data analysis tools for the Python programming language. Python with Pandas is used in a wide range of fields including academic and commercial domains including finance, economics, Statistics, analytics, etc.In this tutorial, we will learn the various features of Python Pandas and how to use them in practice.\n", + "\n", + "\n", + "## Import pandas and numpy" + ] + }, + { + "metadata": { + "id": "irlVYeeAXPDL", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "import pandas as pd\n", + "import numpy as np" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "BI2J-zdMbGwE", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "### This is your playground feel free to explore other functions on pandas\n", + "\n", + "#### Create Series from numpy array, list and dict\n", + "\n", + "Don't know what a series is?\n", + "\n", + "[Series Doc](https://pandas.pydata.org/pandas-docs/version/0.22/generated/pandas.Series.html)" + ] + }, + { + "metadata": { + "id": "GeEct691YGE3", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "a_ascii = ord('A')\n", + "z_ascii = ord('Z')\n", + "alphabets = [chr(i) for i in range(a_ascii, z_ascii+1)]\n", + "\n", + "print(alphabets)\n", + "\n", + "numbers = np.arange(26)\n", + "\n", + "print(numbers)\n", + "\n", + "print(type(alphabets), type(numbers))\n", + "\n", + "alpha_numbers = dict(zip(alphabets, numbers))\n", + "\n", + "print(alpha_numbers)\n", + "\n", + "print(type(alpha_numbers))" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "6ouDfjWab_Mc", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "series1 = pd.Series(alphabets)\n", + "print(series1)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "At7nY7vVcBZ3", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "series2 = pd.Series(numbers)\n", + "print(series2)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "J5z-2CWAdH6N", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "series3 = pd.Series(alpha_numbers)\n", + "print(series3)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "fYzblGGudKjO", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "#replace head() with head(n) where n can be any number between [0-25] and observe the output in deach case \n", + "series3.head()" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "OwsJIf5feTtg", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Create DataFrame from lists\n", + "\n", + "[DataFrame Doc](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html)" + ] + }, + { + "metadata": { + "id": "73UTZ07EdWki", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "data = {'alphabets': alphabets, 'values': numbers}\n", + "\n", + "df = pd.DataFrame(data)\n", + "\n", + "#Lets Change the column `values` to `alpha_numbers`\n", + "\n", + "#df.columns = ['alphabets', 'alpha_numbers']\n", + "\n", + "df" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "uaK_1EO9etGS", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "# transpose\n", + "\n", + "df.T\n", + "\n", + "# there are many more operations which we can perform look at the documentation with the subsequent exercises we will learn more" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "ZYonoaW8gEAJ", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Extract Items from a series" + ] + }, + { + "metadata": { + "id": "tc1-KX_Bfe7U", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "ser = pd.Series(list('abcdefghijklmnopqrstuvwxyz'))\n", + "pos = [0, 4, 8, 14, 20]\n", + "\n", + "vowels = ser.take(pos)\n", + "\n", + "df = pd.DataFrame(vowels)#, columns=['vowels'])\n", + "\n", + "df.columns = ['vowels']\n", + "\n", + "#df.index = [0, 1, 2, 3, 4]\n", + "\n", + "df" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "cmDxwtDNjWpO", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Change the first character of each word to upper case in each word of ser" + ] + }, + { + "metadata": { + "id": "5KagP9PpgV2F", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "ser = pd.Series(['we', 'are', 'learning', 'pandas'])\n", + "\n", + "ser.map(lambda x : x.title())\n", + "\n", + "titles = [i.title() for i in ser]\n", + "\n", + "titles" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "qn47ee-MkZN8", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Reindexing" + ] + }, + { + "metadata": { + "id": "h5R0JL2NjuFS", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "my_index = [1, 2, 3, 4, 5]\n", + "\n", + "df1 = pd.DataFrame({'upper values': ['A', 'B', 'C', 'D', 'E'],\n", + " 'lower values': ['a', 'b', 'c', 'd', 'e']},\n", + " index = my_index)\n", + "\n", + "df1" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "G_Frvc3mk93k", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "new_index = [2, 5, 4, 3, 1]\n", + "\n", + "df1.reindex(index = new_index)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "J82LU53m_OU0", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "# Get to know your Data\n", + "\n", + "\n", + "#### Import necessary modules\n" + ] + }, + { + "metadata": { + "id": "ZyO1UXL8mtSj", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "import pandas as pd\n", + "import numpy as np" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "yXTzTowtnwGI", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Loading CSV Data to a DataFrame" + ] + }, + { + "metadata": { + "id": "H1Bjlb5wm9f-", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "iris_df = pd.read_csv('https://raw.githubusercontent.com/uiuc-cse/data-fa14/gh-pages/data/iris.csv')\n" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "KE-k7b_Mn5iN", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### See the top 10 rows\n" + ] + }, + { + "metadata": { + "id": "HY2Ps7xMn4ao", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "iris_df.head()" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "ZQXekIodqOZu", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Find number of rows and columns\n" + ] + }, + { + "metadata": { + "id": "6Y-A-lbFqR82", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "print(iris_df.shape)\n", + "\n", + "#first is row and second is column\n", + "#select row by simple indexing\n", + "\n", + "#print(iris_df.shape[0])\n", + "#print(iris_df.shape[1])" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "4ckCiGPhrC_t", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Print all columns" + ] + }, + { + "metadata": { + "id": "S6jgMyRDrF2a", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "print(iris_df.columns)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "kVav5-ACtIqS", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Check Index\n" + ] + }, + { + "metadata": { + "id": "iu3I9zIGtLDX", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "print(iris_df.index)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "psCc7PborOCQ", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Right now the iris_data set has all the species grouped together let's shuffle it" + ] + }, + { + "metadata": { + "id": "Bxc8i6avrZPw", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "#generate a random permutaion on index\n", + "\n", + "print(iris_df.head())\n", + "\n", + "new_index = np.random.permutation(iris_df.index)\n", + "iris_df = iris_df.reindex(index = new_index)\n", + "\n", + "print(iris_df.head())" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "j32h8022sRT8", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### We can also apply an operation on whole column of iris_df" + ] + }, + { + "metadata": { + "id": "seYXHXsYsYJI", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "#original\n", + "\n", + "print(iris_df.head())\n", + "\n", + "iris_df['sepal_width'] *= 10\n", + "\n", + "#changed\n", + "\n", + "print(iris_df.head())\n", + "\n", + "#lets undo the operation\n", + "\n", + "iris_df['sepal_width'] /= 10\n", + "\n", + "print(iris_df.head())" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "R-Ca-LBLzjiF", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Show all the rows where sepal_width > 3.3" + ] + }, + { + "metadata": { + "id": "WJ7W-F-d0AoZ", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "iris_df[iris_df['sepal_width']>3.3]" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "gH3DnhCq2Cbl", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Club two filters together - Find all samples where sepal_width > 3.3 and species is versicolor" + ] + }, + { + "metadata": { + "id": "4U7ksr_R2H7M", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "iris_df[(iris_df['sepal_width']>3.3) & (iris_df['species'] == 'versicolor')] " + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "1lmnB3ot2u7I", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Sorting a column by value" + ] + }, + { + "metadata": { + "id": "K7KIj6fv2zWP", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "iris_df.sort_values(by='sepal_width')#, ascending = False)\n", + "#pass ascending = False for descending order" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "9jg_Z4YCoMSV", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### List all the unique species" + ] + }, + { + "metadata": { + "id": "M6EN78ufoJY7", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "species = iris_df['species'].unique()\n", + "\n", + "print(species)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "wG1i5nxBodmB", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Selecting a particular species using boolean mask (learnt in previous exercise)" + ] + }, + { + "metadata": { + "id": "gZvpbKBwoVUe", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "setosa = iris_df[iris_df['species'] == species[0]]\n", + "\n", + "setosa.head()" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "7tumfZ3DotPG", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "# do the same for other 2 species \n", + "versicolor = iris_df[iris_df['species'] == species[1]]\n", + "\n", + "versicolor.head()" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "cUYm5UqVpDPy", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "\n", + "\n", + "virginica = iris_df[iris_df['species'] == species[2]]\n", + "\n", + "virginica.head()" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "-y1wDc8SpdQs", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Describe each created species to see the difference\n", + "\n" + ] + }, + { + "metadata": { + "id": "eHrn3ZVRpOk5", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "setosa.describe()" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "GwJFT2GlpwUv", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "versicolor.describe()" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "Ad4qhSZLpztf", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "virginica.describe()" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "Vdu0ulZWtr09", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Let's plot and see the difference" + ] + }, + { + "metadata": { + "id": "PEVMzRvpttmD", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "##### import matplotlib.pyplot " + ] + }, + { + "metadata": { + "id": "rqDXuuAtt7C3", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "#hist creates a histogram there are many more plots(see the documentation) you can play with it.\n", + "\n", + "plt.hist(setosa['sepal_length'])\n", + "plt.hist(versicolor['sepal_length'])\n", + "plt.hist(virginica['sepal_length'])" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "2LTtpUJEibjg", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "# Pandas Exercise :\n", + "\n", + "\n", + "#### import necessary modules" + ] + }, + { + "metadata": { + "id": "c3_UBbMRhiKx", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "import numpy as np\n", + "import pandas as pd" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "tp-cTCyWi8mR", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Load url = \"https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data\" to a dataframe named wine_df\n", + "\n", + "This is a wine dataset\n", + "\n" + ] + }, + { + "metadata": { + "id": "DMojQY3thrRi", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "BF9MMjoZjSlg", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### print first five rows" + ] + }, + { + "metadata": { + "id": "1vSMQdnHjYNU", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "Tet6P2DvjY3T", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### assign wine_df to a different variable wine_df_copy and then delete all odd rows of wine_df_copy\n", + "\n", + "[Hint](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.drop.html)" + ] + }, + { + "metadata": { + "id": "CMj3qSdJjx0u", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "o6Cs6T1Rjz71", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Assign the columns as below:\n", + "\n", + "The attributes are (dontated by Riccardo Leardi, riclea '@' anchem.unige.it): \n", + "1) Alcohol \n", + "2) Malic acid \n", + "3) Ash \n", + "4) Alcalinity of ash \n", + "5) Magnesium \n", + "6) Total phenols \n", + "7) Flavanoids \n", + "8) Nonflavanoid phenols \n", + "9) Proanthocyanins \n", + "10)Color intensity \n", + "11)Hue \n", + "12)OD280/OD315 of diluted wines \n", + "13)Proline " + ] + }, + { + "metadata": { + "id": "my8HB4V4j779", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "Zqi7hwWpkNbH", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Set the values of the first 3 rows from alcohol as NaN\n", + "\n", + "Hint- Use iloc to select 3 rows of wine_df" + ] + }, + { + "metadata": { + "id": "buyT4vX4kPMl", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "RQMNI2UHkP3o", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Create an array of 10 random numbers uptill 10 and assign it to a variable named `random`" + ] + }, + { + "metadata": { + "id": "xunmCjaEmDwZ", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "hELUakyXmFSu", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Use random numbers you generated as an index and assign NaN value to each of cell of the column alcohol" + ] + }, + { + "metadata": { + "id": "zMgaNnNHmP01", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "PHyK_vRsmRwV", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### How many missing values do we have? \n", + "\n", + "Hint: you can use isnull() and sum()" + ] + }, + { + "metadata": { + "id": "EnOYhmEqmfKp", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "-Fd4WBklmf1_", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Delete the rows that contain missing values " + ] + }, + { + "metadata": { + "id": "As7IC6Ktms8-", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "DlpG8drhmz7W", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "### BONUS: Play with the data set below" + ] + }, + { + "metadata": { + "id": "mD40T0Cnm5SA", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + } + ] +} \ No newline at end of file From 439240f8d0abc0bd304cf35746720262943074d9 Mon Sep 17 00:00:00 2001 From: Arnab Ghosh <43007068+ArnabG99@users.noreply.github.com> Date: Sun, 27 Jan 2019 18:57:10 +0530 Subject: [PATCH 03/12] Created using Colaboratory --- Exercise.ipynb | 309 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 309 insertions(+) create mode 100644 Exercise.ipynb diff --git a/Exercise.ipynb b/Exercise.ipynb new file mode 100644 index 0000000..2a22ee7 --- /dev/null +++ b/Exercise.ipynb @@ -0,0 +1,309 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "Exercise.ipynb", + "version": "0.3.2", + "provenance": [], + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "metadata": { + "id": "2LTtpUJEibjg", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "# Pandas Exercise :\n", + "\n", + "\n", + "#### import necessary modules" + ] + }, + { + "metadata": { + "id": "c3_UBbMRhiKx", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "import numpy as np\n", + "import pandas as pd" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "tp-cTCyWi8mR", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Load url = \"https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data\" to a dataframe named wine_df\n", + "\n", + "This is a wine dataset\n", + "\n" + ] + }, + { + "metadata": { + "id": "DMojQY3thrRi", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "BF9MMjoZjSlg", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### print first five rows" + ] + }, + { + "metadata": { + "id": "1vSMQdnHjYNU", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "Tet6P2DvjY3T", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### assign wine_df to a different variable wine_df_copy and then delete all odd rows of wine_df_copy\n", + "\n", + "[Hint](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.drop.html)" + ] + }, + { + "metadata": { + "id": "CMj3qSdJjx0u", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "o6Cs6T1Rjz71", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Assign the columns as below:\n", + "\n", + "The attributes are (dontated by Riccardo Leardi, riclea '@' anchem.unige.it): \n", + "1) Alcohol \n", + "2) Malic acid \n", + "3) Ash \n", + "4) Alcalinity of ash \n", + "5) Magnesium \n", + "6) Total phenols \n", + "7) Flavanoids \n", + "8) Nonflavanoid phenols \n", + "9) Proanthocyanins \n", + "10)Color intensity \n", + "11)Hue \n", + "12)OD280/OD315 of diluted wines \n", + "13)Proline " + ] + }, + { + "metadata": { + "id": "my8HB4V4j779", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "Zqi7hwWpkNbH", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Set the values of the first 3 rows from alcohol as NaN\n", + "\n", + "Hint- Use iloc to select 3 rows of wine_df" + ] + }, + { + "metadata": { + "id": "buyT4vX4kPMl", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "RQMNI2UHkP3o", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Create an array of 10 random numbers uptill 10 and assign it to a variable named `random`" + ] + }, + { + "metadata": { + "id": "xunmCjaEmDwZ", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "hELUakyXmFSu", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Use random numbers you generated as an index and assign NaN value to each of cell of the column alcohol" + ] + }, + { + "metadata": { + "id": "zMgaNnNHmP01", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "PHyK_vRsmRwV", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### How many missing values do we have? \n", + "\n", + "Hint: you can use isnull() and sum()" + ] + }, + { + "metadata": { + "id": "EnOYhmEqmfKp", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "-Fd4WBklmf1_", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Delete the rows that contain missing values " + ] + }, + { + "metadata": { + "id": "As7IC6Ktms8-", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "DlpG8drhmz7W", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "### BONUS: Play with the data set below" + ] + }, + { + "metadata": { + "id": "mD40T0Cnm5SA", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "" + ], + "execution_count": 0, + "outputs": [] + } + ] +} \ No newline at end of file From 9eece543aae6f88d8eb81a81b43fd24ab67ef8fc Mon Sep 17 00:00:00 2001 From: Arnab Ghosh <43007068+ArnabG99@users.noreply.github.com> Date: Sun, 27 Jan 2019 18:59:55 +0530 Subject: [PATCH 04/12] Update Exercise.ipynb --- Exercise.ipynb | 3559 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 3521 insertions(+), 38 deletions(-) diff --git a/Exercise.ipynb b/Exercise.ipynb index 2a22ee7..4012296 100644 --- a/Exercise.ipynb +++ b/Exercise.ipynb @@ -21,7 +21,7 @@ "colab_type": "text" }, "source": [ - "\"Open" + "[View in Colaboratory](https://colab.research.google.com/github/s-bose/Assignment-3/blob/s-bose/Pandas_Exercise.ipynb)" ] }, { @@ -68,14 +68,1236 @@ "metadata": { "id": "DMojQY3thrRi", "colab_type": "code", - "colab": {} + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1906 + }, + "outputId": "59abbbb2-9f4b-4970-b2d6-d3e01afd2dd3" }, "cell_type": "code", "source": [ - "" + "wine_df = pd.read_csv(\"https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data\")\n", + "wine_df\n" ], - "execution_count": 0, - "outputs": [] + "execution_count": 116, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
114.231.712.4315.61272.83.06.282.295.641.043.921065
0113.201.782.1411.21002.652.760.261.284.3800001.053.401050
1113.162.362.6718.61012.803.240.302.815.6800001.033.171185
2114.371.952.5016.81133.853.490.242.187.8000000.863.451480
3113.242.592.8721.01182.802.690.391.824.3200001.042.93735
4114.201.762.4515.21123.273.390.341.976.7500001.052.851450
5114.391.872.4514.6962.502.520.301.985.2500001.023.581290
6114.062.152.6117.61212.602.510.311.255.0500001.063.581295
7114.831.642.1714.0972.802.980.291.985.2000001.082.851045
8113.861.352.2716.0982.983.150.221.857.2200001.013.551045
9114.102.162.3018.01052.953.320.222.385.7500001.253.171510
10114.121.482.3216.8952.202.430.261.575.0000001.172.821280
11113.751.732.4116.0892.602.760.291.815.6000001.152.901320
12114.751.732.3911.4913.103.690.432.815.4000001.252.731150
13114.381.872.3812.01023.303.640.292.967.5000001.203.001547
14113.631.812.7017.21122.852.910.301.467.3000001.282.881310
15114.301.922.7220.01202.803.140.331.976.2000001.072.651280
16113.831.572.6220.01152.953.400.401.726.6000001.132.571130
17114.191.592.4816.51083.303.930.321.868.7000001.232.821680
18113.643.102.5615.21162.703.030.171.665.1000000.963.36845
19114.061.632.2816.01263.003.170.242.105.6500001.093.71780
20112.933.802.6518.61022.412.410.251.984.5000001.033.52770
21113.711.862.3616.61012.612.880.271.693.8000001.114.001035
22112.851.602.5217.8952.482.370.261.463.9300001.093.631015
23113.501.812.6120.0962.532.610.281.663.5200001.123.82845
24113.052.053.2225.01242.632.680.471.923.5800001.133.20830
25113.391.772.6216.1932.852.940.341.454.8000000.923.221195
26113.301.722.1417.0942.402.190.271.353.9500001.022.771285
27113.871.902.8019.41072.952.970.371.764.5000001.253.40915
28114.021.682.2116.0962.652.330.261.984.7000001.043.591035
29113.731.502.7022.51013.003.250.292.385.7000001.192.711285
.............................................
147313.323.242.3821.5921.930.760.451.258.4200000.551.62650
148313.083.902.3621.51131.411.390.341.149.4000000.571.33550
149313.503.122.6224.01231.401.570.221.258.6000000.591.30500
150312.792.672.4822.01121.481.360.241.2610.8000000.481.47480
151313.111.902.7525.51162.201.280.261.567.1000000.611.33425
152313.233.302.2818.5981.800.830.611.8710.5200000.561.51675
153312.581.292.1020.01031.480.580.531.407.6000000.581.55640
154313.175.192.3222.0931.740.630.611.557.9000000.601.48725
155313.844.122.3819.5891.800.830.481.569.0100000.571.64480
156312.453.032.6427.0971.900.580.631.147.5000000.671.73880
157314.341.682.7025.0982.801.310.532.7013.0000000.571.96660
158313.481.672.6422.5892.601.100.522.2911.7500000.571.78620
159312.363.832.3821.0882.300.920.501.047.6500000.561.58520
160313.693.262.5420.01071.830.560.500.805.8800000.961.82680
161312.853.272.5822.01061.650.600.600.965.5800000.872.11570
162312.963.452.3518.51061.390.700.400.945.2800000.681.75675
163313.782.762.3022.0901.350.680.411.039.5800000.701.68615
164313.734.362.2622.5881.280.470.521.156.6200000.781.75520
165313.453.702.6023.01111.700.920.431.4610.6800000.851.56695
166312.823.372.3019.5881.480.660.400.9710.2600000.721.75685
167313.582.582.6924.51051.550.840.391.548.6600000.741.80750
168313.404.602.8625.01121.980.960.271.118.5000000.671.92630
169312.203.032.3219.0961.250.490.400.735.5000000.661.83510
170312.772.392.2819.5861.390.510.480.649.8999990.571.63470
171314.162.512.4820.0911.680.700.441.249.7000000.621.71660
172313.715.652.4520.5951.680.610.521.067.7000000.641.74740
173313.403.912.4823.01021.800.750.431.417.3000000.701.56750
174313.274.282.2620.01201.590.690.431.3510.2000000.591.56835
175313.172.592.3720.01201.650.680.531.469.3000000.601.62840
176314.134.102.7424.5962.050.760.561.359.2000000.611.60560
\n", + "

177 rows × 14 columns

\n", + "
" + ], + "text/plain": [ + " 1 14.23 1.71 2.43 15.6 127 2.8 3.06 .28 2.29 5.64 1.04 \\\n", + "0 1 13.20 1.78 2.14 11.2 100 2.65 2.76 0.26 1.28 4.380000 1.05 \n", + "1 1 13.16 2.36 2.67 18.6 101 2.80 3.24 0.30 2.81 5.680000 1.03 \n", + "2 1 14.37 1.95 2.50 16.8 113 3.85 3.49 0.24 2.18 7.800000 0.86 \n", + "3 1 13.24 2.59 2.87 21.0 118 2.80 2.69 0.39 1.82 4.320000 1.04 \n", + "4 1 14.20 1.76 2.45 15.2 112 3.27 3.39 0.34 1.97 6.750000 1.05 \n", + "5 1 14.39 1.87 2.45 14.6 96 2.50 2.52 0.30 1.98 5.250000 1.02 \n", + "6 1 14.06 2.15 2.61 17.6 121 2.60 2.51 0.31 1.25 5.050000 1.06 \n", + "7 1 14.83 1.64 2.17 14.0 97 2.80 2.98 0.29 1.98 5.200000 1.08 \n", + "8 1 13.86 1.35 2.27 16.0 98 2.98 3.15 0.22 1.85 7.220000 1.01 \n", + "9 1 14.10 2.16 2.30 18.0 105 2.95 3.32 0.22 2.38 5.750000 1.25 \n", + "10 1 14.12 1.48 2.32 16.8 95 2.20 2.43 0.26 1.57 5.000000 1.17 \n", + "11 1 13.75 1.73 2.41 16.0 89 2.60 2.76 0.29 1.81 5.600000 1.15 \n", + "12 1 14.75 1.73 2.39 11.4 91 3.10 3.69 0.43 2.81 5.400000 1.25 \n", + "13 1 14.38 1.87 2.38 12.0 102 3.30 3.64 0.29 2.96 7.500000 1.20 \n", + "14 1 13.63 1.81 2.70 17.2 112 2.85 2.91 0.30 1.46 7.300000 1.28 \n", + "15 1 14.30 1.92 2.72 20.0 120 2.80 3.14 0.33 1.97 6.200000 1.07 \n", + "16 1 13.83 1.57 2.62 20.0 115 2.95 3.40 0.40 1.72 6.600000 1.13 \n", + "17 1 14.19 1.59 2.48 16.5 108 3.30 3.93 0.32 1.86 8.700000 1.23 \n", + "18 1 13.64 3.10 2.56 15.2 116 2.70 3.03 0.17 1.66 5.100000 0.96 \n", + "19 1 14.06 1.63 2.28 16.0 126 3.00 3.17 0.24 2.10 5.650000 1.09 \n", + "20 1 12.93 3.80 2.65 18.6 102 2.41 2.41 0.25 1.98 4.500000 1.03 \n", + "21 1 13.71 1.86 2.36 16.6 101 2.61 2.88 0.27 1.69 3.800000 1.11 \n", + "22 1 12.85 1.60 2.52 17.8 95 2.48 2.37 0.26 1.46 3.930000 1.09 \n", + "23 1 13.50 1.81 2.61 20.0 96 2.53 2.61 0.28 1.66 3.520000 1.12 \n", + "24 1 13.05 2.05 3.22 25.0 124 2.63 2.68 0.47 1.92 3.580000 1.13 \n", + "25 1 13.39 1.77 2.62 16.1 93 2.85 2.94 0.34 1.45 4.800000 0.92 \n", + "26 1 13.30 1.72 2.14 17.0 94 2.40 2.19 0.27 1.35 3.950000 1.02 \n", + "27 1 13.87 1.90 2.80 19.4 107 2.95 2.97 0.37 1.76 4.500000 1.25 \n", + "28 1 14.02 1.68 2.21 16.0 96 2.65 2.33 0.26 1.98 4.700000 1.04 \n", + "29 1 13.73 1.50 2.70 22.5 101 3.00 3.25 0.29 2.38 5.700000 1.19 \n", + ".. .. ... ... ... ... ... ... ... ... ... ... ... \n", + "147 3 13.32 3.24 2.38 21.5 92 1.93 0.76 0.45 1.25 8.420000 0.55 \n", + "148 3 13.08 3.90 2.36 21.5 113 1.41 1.39 0.34 1.14 9.400000 0.57 \n", + "149 3 13.50 3.12 2.62 24.0 123 1.40 1.57 0.22 1.25 8.600000 0.59 \n", + "150 3 12.79 2.67 2.48 22.0 112 1.48 1.36 0.24 1.26 10.800000 0.48 \n", + "151 3 13.11 1.90 2.75 25.5 116 2.20 1.28 0.26 1.56 7.100000 0.61 \n", + "152 3 13.23 3.30 2.28 18.5 98 1.80 0.83 0.61 1.87 10.520000 0.56 \n", + "153 3 12.58 1.29 2.10 20.0 103 1.48 0.58 0.53 1.40 7.600000 0.58 \n", + "154 3 13.17 5.19 2.32 22.0 93 1.74 0.63 0.61 1.55 7.900000 0.60 \n", + "155 3 13.84 4.12 2.38 19.5 89 1.80 0.83 0.48 1.56 9.010000 0.57 \n", + "156 3 12.45 3.03 2.64 27.0 97 1.90 0.58 0.63 1.14 7.500000 0.67 \n", + "157 3 14.34 1.68 2.70 25.0 98 2.80 1.31 0.53 2.70 13.000000 0.57 \n", + "158 3 13.48 1.67 2.64 22.5 89 2.60 1.10 0.52 2.29 11.750000 0.57 \n", + "159 3 12.36 3.83 2.38 21.0 88 2.30 0.92 0.50 1.04 7.650000 0.56 \n", + "160 3 13.69 3.26 2.54 20.0 107 1.83 0.56 0.50 0.80 5.880000 0.96 \n", + "161 3 12.85 3.27 2.58 22.0 106 1.65 0.60 0.60 0.96 5.580000 0.87 \n", + "162 3 12.96 3.45 2.35 18.5 106 1.39 0.70 0.40 0.94 5.280000 0.68 \n", + "163 3 13.78 2.76 2.30 22.0 90 1.35 0.68 0.41 1.03 9.580000 0.70 \n", + "164 3 13.73 4.36 2.26 22.5 88 1.28 0.47 0.52 1.15 6.620000 0.78 \n", + "165 3 13.45 3.70 2.60 23.0 111 1.70 0.92 0.43 1.46 10.680000 0.85 \n", + "166 3 12.82 3.37 2.30 19.5 88 1.48 0.66 0.40 0.97 10.260000 0.72 \n", + "167 3 13.58 2.58 2.69 24.5 105 1.55 0.84 0.39 1.54 8.660000 0.74 \n", + "168 3 13.40 4.60 2.86 25.0 112 1.98 0.96 0.27 1.11 8.500000 0.67 \n", + "169 3 12.20 3.03 2.32 19.0 96 1.25 0.49 0.40 0.73 5.500000 0.66 \n", + "170 3 12.77 2.39 2.28 19.5 86 1.39 0.51 0.48 0.64 9.899999 0.57 \n", + "171 3 14.16 2.51 2.48 20.0 91 1.68 0.70 0.44 1.24 9.700000 0.62 \n", + "172 3 13.71 5.65 2.45 20.5 95 1.68 0.61 0.52 1.06 7.700000 0.64 \n", + "173 3 13.40 3.91 2.48 23.0 102 1.80 0.75 0.43 1.41 7.300000 0.70 \n", + "174 3 13.27 4.28 2.26 20.0 120 1.59 0.69 0.43 1.35 10.200000 0.59 \n", + "175 3 13.17 2.59 2.37 20.0 120 1.65 0.68 0.53 1.46 9.300000 0.60 \n", + "176 3 14.13 4.10 2.74 24.5 96 2.05 0.76 0.56 1.35 9.200000 0.61 \n", + "\n", + " 3.92 1065 \n", + "0 3.40 1050 \n", + "1 3.17 1185 \n", + "2 3.45 1480 \n", + "3 2.93 735 \n", + "4 2.85 1450 \n", + "5 3.58 1290 \n", + "6 3.58 1295 \n", + "7 2.85 1045 \n", + "8 3.55 1045 \n", + "9 3.17 1510 \n", + "10 2.82 1280 \n", + "11 2.90 1320 \n", + "12 2.73 1150 \n", + "13 3.00 1547 \n", + "14 2.88 1310 \n", + "15 2.65 1280 \n", + "16 2.57 1130 \n", + "17 2.82 1680 \n", + "18 3.36 845 \n", + "19 3.71 780 \n", + "20 3.52 770 \n", + "21 4.00 1035 \n", + "22 3.63 1015 \n", + "23 3.82 845 \n", + "24 3.20 830 \n", + "25 3.22 1195 \n", + "26 2.77 1285 \n", + "27 3.40 915 \n", + "28 3.59 1035 \n", + "29 2.71 1285 \n", + ".. ... ... \n", + "147 1.62 650 \n", + "148 1.33 550 \n", + "149 1.30 500 \n", + "150 1.47 480 \n", + "151 1.33 425 \n", + "152 1.51 675 \n", + "153 1.55 640 \n", + "154 1.48 725 \n", + "155 1.64 480 \n", + "156 1.73 880 \n", + "157 1.96 660 \n", + "158 1.78 620 \n", + "159 1.58 520 \n", + "160 1.82 680 \n", + "161 2.11 570 \n", + "162 1.75 675 \n", + "163 1.68 615 \n", + "164 1.75 520 \n", + "165 1.56 695 \n", + "166 1.75 685 \n", + "167 1.80 750 \n", + "168 1.92 630 \n", + "169 1.83 510 \n", + "170 1.63 470 \n", + "171 1.71 660 \n", + "172 1.74 740 \n", + "173 1.56 750 \n", + "174 1.56 835 \n", + "175 1.62 840 \n", + "176 1.60 560 \n", + "\n", + "[177 rows x 14 columns]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 116 + } + ] }, { "metadata": { @@ -91,14 +1313,168 @@ "metadata": { "id": "1vSMQdnHjYNU", "colab_type": "code", - "colab": {} + "colab": { + "base_uri": "https://localhost:8080/", + "height": 197 + }, + "outputId": "78bb77f6-02ed-4cec-a277-43cb27ac0bc9" }, "cell_type": "code", "source": [ - "" + "wine_df.head(5)" ], - "execution_count": 0, - "outputs": [] + "execution_count": 118, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
114.231.712.4315.61272.83.06.282.295.641.043.921065
0113.201.782.1411.21002.652.760.261.284.381.053.401050
1113.162.362.6718.61012.803.240.302.815.681.033.171185
2114.371.952.5016.81133.853.490.242.187.800.863.451480
3113.242.592.8721.01182.802.690.391.824.321.042.93735
4114.201.762.4515.21123.273.390.341.976.751.052.851450
\n", + "
" + ], + "text/plain": [ + " 1 14.23 1.71 2.43 15.6 127 2.8 3.06 .28 2.29 5.64 1.04 3.92 \\\n", + "0 1 13.20 1.78 2.14 11.2 100 2.65 2.76 0.26 1.28 4.38 1.05 3.40 \n", + "1 1 13.16 2.36 2.67 18.6 101 2.80 3.24 0.30 2.81 5.68 1.03 3.17 \n", + "2 1 14.37 1.95 2.50 16.8 113 3.85 3.49 0.24 2.18 7.80 0.86 3.45 \n", + "3 1 13.24 2.59 2.87 21.0 118 2.80 2.69 0.39 1.82 4.32 1.04 2.93 \n", + "4 1 14.20 1.76 2.45 15.2 112 3.27 3.39 0.34 1.97 6.75 1.05 2.85 \n", + "\n", + " 1065 \n", + "0 1050 \n", + "1 1185 \n", + "2 1480 \n", + "3 735 \n", + "4 1450 " + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 118 + } + ] }, { "metadata": { @@ -116,14 +1492,1240 @@ "metadata": { "id": "CMj3qSdJjx0u", "colab_type": "code", - "colab": {} + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1906 + }, + "outputId": "665b54d3-004e-4872-e052-034147629db6" }, "cell_type": "code", "source": [ - "" + "\n", + "wine_df_copy = wine_df.copy()\n", + "wine_df_copy.drop(index=np.arange(1, wine_df_copy.shape[0], 2), inplace = True)\n", + "wine_df_copy\n", + "\n", + "\n" ], - "execution_count": 0, - "outputs": [] + "execution_count": 119, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
114.231.712.4315.61272.83.06.282.295.641.043.921065
0113.201.782.1411.21002.652.760.261.284.3800001.053.401050
2114.371.952.5016.81133.853.490.242.187.8000000.863.451480
4114.201.762.4515.21123.273.390.341.976.7500001.052.851450
6114.062.152.6117.61212.602.510.311.255.0500001.063.581295
8113.861.352.2716.0982.983.150.221.857.2200001.013.551045
10114.121.482.3216.8952.202.430.261.575.0000001.172.821280
12114.751.732.3911.4913.103.690.432.815.4000001.252.731150
14113.631.812.7017.21122.852.910.301.467.3000001.282.881310
16113.831.572.6220.01152.953.400.401.726.6000001.132.571130
18113.643.102.5615.21162.703.030.171.665.1000000.963.36845
20112.933.802.6518.61022.412.410.251.984.5000001.033.52770
22112.851.602.5217.8952.482.370.261.463.9300001.093.631015
24113.052.053.2225.01242.632.680.471.923.5800001.133.20830
26113.301.722.1417.0942.402.190.271.353.9500001.022.771285
28114.021.682.2116.0962.652.330.261.984.7000001.043.591035
30113.581.662.3619.11062.863.190.221.956.9000001.092.881515
32113.761.532.7019.51322.952.740.501.355.4000001.253.001235
34113.481.812.4120.51002.702.980.261.865.1000001.043.47920
36113.051.652.5518.0982.452.430.291.444.2500001.122.511105
38114.223.992.5113.21283.003.040.202.085.1000000.893.53760
40113.413.842.1218.8902.452.680.271.484.2800000.913.001035
42113.243.982.2917.51032.642.630.321.664.3600000.823.00680
44114.214.042.4418.91112.852.650.301.255.2400000.873.331080
46113.901.682.1216.01013.103.390.212.146.1000000.913.33985
48113.941.732.2717.41082.883.540.322.088.9000001.123.101260
50113.831.652.6017.2942.452.990.222.295.6000001.243.371265
52113.771.902.6817.11153.002.790.391.686.3000001.132.931375
54113.561.732.4620.51162.962.780.202.456.2500000.983.031120
56113.291.972.6816.81023.003.230.311.666.0000001.072.841270
58212.370.941.3610.6881.980.570.280.421.9500001.051.82520
.............................................
118212.003.432.0019.0872.001.640.371.871.2800000.933.05564
120211.562.053.2328.51193.185.080.471.876.0000000.933.69465
122213.055.802.1321.5862.622.650.302.012.6000000.733.10380
124212.072.162.1721.0852.602.650.371.352.7600000.863.28378
126211.792.132.7828.5922.132.240.581.763.0000000.972.44466
128212.044.302.3822.0802.101.750.421.352.6000000.792.57580
130312.882.992.4020.01041.301.220.240.835.4000000.741.42530
132312.703.552.3621.51061.701.200.170.845.0000000.781.29600
134312.602.462.2018.5941.620.660.630.947.1000000.731.58695
136312.535.512.6425.0961.790.600.631.105.0000000.821.69515
138312.842.962.6124.01012.320.600.530.814.9200000.892.15590
140313.362.562.3520.0891.400.500.370.645.6000000.702.47780
142313.624.952.3520.0922.000.800.471.024.4000000.912.05550
144313.163.572.1521.01021.500.550.431.304.0000000.601.68830
146312.874.612.4821.5861.700.650.470.867.6500000.541.86625
148313.083.902.3621.51131.411.390.341.149.4000000.571.33550
150312.792.672.4822.01121.481.360.241.2610.8000000.481.47480
152313.233.302.2818.5981.800.830.611.8710.5200000.561.51675
154313.175.192.3222.0931.740.630.611.557.9000000.601.48725
156312.453.032.6427.0971.900.580.631.147.5000000.671.73880
158313.481.672.6422.5892.601.100.522.2911.7500000.571.78620
160313.693.262.5420.01071.830.560.500.805.8800000.961.82680
162312.963.452.3518.51061.390.700.400.945.2800000.681.75675
164313.734.362.2622.5881.280.470.521.156.6200000.781.75520
166312.823.372.3019.5881.480.660.400.9710.2600000.721.75685
168313.404.602.8625.01121.980.960.271.118.5000000.671.92630
170312.772.392.2819.5861.390.510.480.649.8999990.571.63470
172313.715.652.4520.5951.680.610.521.067.7000000.641.74740
174313.274.282.2620.01201.590.690.431.3510.2000000.591.56835
176314.134.102.7424.5962.050.760.561.359.2000000.611.60560
\n", + "

89 rows × 14 columns

\n", + "
" + ], + "text/plain": [ + " 1 14.23 1.71 2.43 15.6 127 2.8 3.06 .28 2.29 5.64 1.04 \\\n", + "0 1 13.20 1.78 2.14 11.2 100 2.65 2.76 0.26 1.28 4.380000 1.05 \n", + "2 1 14.37 1.95 2.50 16.8 113 3.85 3.49 0.24 2.18 7.800000 0.86 \n", + "4 1 14.20 1.76 2.45 15.2 112 3.27 3.39 0.34 1.97 6.750000 1.05 \n", + "6 1 14.06 2.15 2.61 17.6 121 2.60 2.51 0.31 1.25 5.050000 1.06 \n", + "8 1 13.86 1.35 2.27 16.0 98 2.98 3.15 0.22 1.85 7.220000 1.01 \n", + "10 1 14.12 1.48 2.32 16.8 95 2.20 2.43 0.26 1.57 5.000000 1.17 \n", + "12 1 14.75 1.73 2.39 11.4 91 3.10 3.69 0.43 2.81 5.400000 1.25 \n", + "14 1 13.63 1.81 2.70 17.2 112 2.85 2.91 0.30 1.46 7.300000 1.28 \n", + "16 1 13.83 1.57 2.62 20.0 115 2.95 3.40 0.40 1.72 6.600000 1.13 \n", + "18 1 13.64 3.10 2.56 15.2 116 2.70 3.03 0.17 1.66 5.100000 0.96 \n", + "20 1 12.93 3.80 2.65 18.6 102 2.41 2.41 0.25 1.98 4.500000 1.03 \n", + "22 1 12.85 1.60 2.52 17.8 95 2.48 2.37 0.26 1.46 3.930000 1.09 \n", + "24 1 13.05 2.05 3.22 25.0 124 2.63 2.68 0.47 1.92 3.580000 1.13 \n", + "26 1 13.30 1.72 2.14 17.0 94 2.40 2.19 0.27 1.35 3.950000 1.02 \n", + "28 1 14.02 1.68 2.21 16.0 96 2.65 2.33 0.26 1.98 4.700000 1.04 \n", + "30 1 13.58 1.66 2.36 19.1 106 2.86 3.19 0.22 1.95 6.900000 1.09 \n", + "32 1 13.76 1.53 2.70 19.5 132 2.95 2.74 0.50 1.35 5.400000 1.25 \n", + "34 1 13.48 1.81 2.41 20.5 100 2.70 2.98 0.26 1.86 5.100000 1.04 \n", + "36 1 13.05 1.65 2.55 18.0 98 2.45 2.43 0.29 1.44 4.250000 1.12 \n", + "38 1 14.22 3.99 2.51 13.2 128 3.00 3.04 0.20 2.08 5.100000 0.89 \n", + "40 1 13.41 3.84 2.12 18.8 90 2.45 2.68 0.27 1.48 4.280000 0.91 \n", + "42 1 13.24 3.98 2.29 17.5 103 2.64 2.63 0.32 1.66 4.360000 0.82 \n", + "44 1 14.21 4.04 2.44 18.9 111 2.85 2.65 0.30 1.25 5.240000 0.87 \n", + "46 1 13.90 1.68 2.12 16.0 101 3.10 3.39 0.21 2.14 6.100000 0.91 \n", + "48 1 13.94 1.73 2.27 17.4 108 2.88 3.54 0.32 2.08 8.900000 1.12 \n", + "50 1 13.83 1.65 2.60 17.2 94 2.45 2.99 0.22 2.29 5.600000 1.24 \n", + "52 1 13.77 1.90 2.68 17.1 115 3.00 2.79 0.39 1.68 6.300000 1.13 \n", + "54 1 13.56 1.73 2.46 20.5 116 2.96 2.78 0.20 2.45 6.250000 0.98 \n", + "56 1 13.29 1.97 2.68 16.8 102 3.00 3.23 0.31 1.66 6.000000 1.07 \n", + "58 2 12.37 0.94 1.36 10.6 88 1.98 0.57 0.28 0.42 1.950000 1.05 \n", + ".. .. ... ... ... ... ... ... ... ... ... ... ... \n", + "118 2 12.00 3.43 2.00 19.0 87 2.00 1.64 0.37 1.87 1.280000 0.93 \n", + "120 2 11.56 2.05 3.23 28.5 119 3.18 5.08 0.47 1.87 6.000000 0.93 \n", + "122 2 13.05 5.80 2.13 21.5 86 2.62 2.65 0.30 2.01 2.600000 0.73 \n", + "124 2 12.07 2.16 2.17 21.0 85 2.60 2.65 0.37 1.35 2.760000 0.86 \n", + "126 2 11.79 2.13 2.78 28.5 92 2.13 2.24 0.58 1.76 3.000000 0.97 \n", + "128 2 12.04 4.30 2.38 22.0 80 2.10 1.75 0.42 1.35 2.600000 0.79 \n", + "130 3 12.88 2.99 2.40 20.0 104 1.30 1.22 0.24 0.83 5.400000 0.74 \n", + "132 3 12.70 3.55 2.36 21.5 106 1.70 1.20 0.17 0.84 5.000000 0.78 \n", + "134 3 12.60 2.46 2.20 18.5 94 1.62 0.66 0.63 0.94 7.100000 0.73 \n", + "136 3 12.53 5.51 2.64 25.0 96 1.79 0.60 0.63 1.10 5.000000 0.82 \n", + "138 3 12.84 2.96 2.61 24.0 101 2.32 0.60 0.53 0.81 4.920000 0.89 \n", + "140 3 13.36 2.56 2.35 20.0 89 1.40 0.50 0.37 0.64 5.600000 0.70 \n", + "142 3 13.62 4.95 2.35 20.0 92 2.00 0.80 0.47 1.02 4.400000 0.91 \n", + "144 3 13.16 3.57 2.15 21.0 102 1.50 0.55 0.43 1.30 4.000000 0.60 \n", + "146 3 12.87 4.61 2.48 21.5 86 1.70 0.65 0.47 0.86 7.650000 0.54 \n", + "148 3 13.08 3.90 2.36 21.5 113 1.41 1.39 0.34 1.14 9.400000 0.57 \n", + "150 3 12.79 2.67 2.48 22.0 112 1.48 1.36 0.24 1.26 10.800000 0.48 \n", + "152 3 13.23 3.30 2.28 18.5 98 1.80 0.83 0.61 1.87 10.520000 0.56 \n", + "154 3 13.17 5.19 2.32 22.0 93 1.74 0.63 0.61 1.55 7.900000 0.60 \n", + "156 3 12.45 3.03 2.64 27.0 97 1.90 0.58 0.63 1.14 7.500000 0.67 \n", + "158 3 13.48 1.67 2.64 22.5 89 2.60 1.10 0.52 2.29 11.750000 0.57 \n", + "160 3 13.69 3.26 2.54 20.0 107 1.83 0.56 0.50 0.80 5.880000 0.96 \n", + "162 3 12.96 3.45 2.35 18.5 106 1.39 0.70 0.40 0.94 5.280000 0.68 \n", + "164 3 13.73 4.36 2.26 22.5 88 1.28 0.47 0.52 1.15 6.620000 0.78 \n", + "166 3 12.82 3.37 2.30 19.5 88 1.48 0.66 0.40 0.97 10.260000 0.72 \n", + "168 3 13.40 4.60 2.86 25.0 112 1.98 0.96 0.27 1.11 8.500000 0.67 \n", + "170 3 12.77 2.39 2.28 19.5 86 1.39 0.51 0.48 0.64 9.899999 0.57 \n", + "172 3 13.71 5.65 2.45 20.5 95 1.68 0.61 0.52 1.06 7.700000 0.64 \n", + "174 3 13.27 4.28 2.26 20.0 120 1.59 0.69 0.43 1.35 10.200000 0.59 \n", + "176 3 14.13 4.10 2.74 24.5 96 2.05 0.76 0.56 1.35 9.200000 0.61 \n", + "\n", + " 3.92 1065 \n", + "0 3.40 1050 \n", + "2 3.45 1480 \n", + "4 2.85 1450 \n", + "6 3.58 1295 \n", + "8 3.55 1045 \n", + "10 2.82 1280 \n", + "12 2.73 1150 \n", + "14 2.88 1310 \n", + "16 2.57 1130 \n", + "18 3.36 845 \n", + "20 3.52 770 \n", + "22 3.63 1015 \n", + "24 3.20 830 \n", + "26 2.77 1285 \n", + "28 3.59 1035 \n", + "30 2.88 1515 \n", + "32 3.00 1235 \n", + "34 3.47 920 \n", + "36 2.51 1105 \n", + "38 3.53 760 \n", + "40 3.00 1035 \n", + "42 3.00 680 \n", + "44 3.33 1080 \n", + "46 3.33 985 \n", + "48 3.10 1260 \n", + "50 3.37 1265 \n", + "52 2.93 1375 \n", + "54 3.03 1120 \n", + "56 2.84 1270 \n", + "58 1.82 520 \n", + ".. ... ... \n", + "118 3.05 564 \n", + "120 3.69 465 \n", + "122 3.10 380 \n", + "124 3.28 378 \n", + "126 2.44 466 \n", + "128 2.57 580 \n", + "130 1.42 530 \n", + "132 1.29 600 \n", + "134 1.58 695 \n", + "136 1.69 515 \n", + "138 2.15 590 \n", + "140 2.47 780 \n", + "142 2.05 550 \n", + "144 1.68 830 \n", + "146 1.86 625 \n", + "148 1.33 550 \n", + "150 1.47 480 \n", + "152 1.51 675 \n", + "154 1.48 725 \n", + "156 1.73 880 \n", + "158 1.78 620 \n", + "160 1.82 680 \n", + "162 1.75 675 \n", + "164 1.75 520 \n", + "166 1.75 685 \n", + "168 1.92 630 \n", + "170 1.63 470 \n", + "172 1.74 740 \n", + "174 1.56 835 \n", + "176 1.60 560 \n", + "\n", + "[89 rows x 14 columns]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 119 + } + ] }, { "metadata": { @@ -154,14 +2756,176 @@ "metadata": { "id": "my8HB4V4j779", "colab_type": "code", - "colab": {} + "colab": { + "base_uri": "https://localhost:8080/", + "height": 214 + }, + "outputId": "5312cbf0-f396-4fa4-cba7-019d3c0b5691" }, "cell_type": "code", "source": [ - "" + "wine_df.columns = ['category', 'Alcohol', 'Malic acid', 'Ash', 'Alcalinity of ash', 'Magnesium', ' Flavanoids', 'Total phenols', 'Nonflavanoid phenols', 'Proanthocyanins', 'Color intensity', 'Hue', 'OD280/OD315 of diluted wines', 'Proline']\n", + "wine_df.head()" ], - "execution_count": 0, - "outputs": [] + "execution_count": 120, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
categoryAlcoholMalic acidAshAlcalinity of ashMagnesiumFlavanoidsTotal phenolsNonflavanoid phenolsProanthocyaninsColor intensityHueOD280/OD315 of diluted winesProline
0113.201.782.1411.21002.652.760.261.284.381.053.401050
1113.162.362.6718.61012.803.240.302.815.681.033.171185
2114.371.952.5016.81133.853.490.242.187.800.863.451480
3113.242.592.8721.01182.802.690.391.824.321.042.93735
4114.201.762.4515.21123.273.390.341.976.751.052.851450
\n", + "
" + ], + "text/plain": [ + " category Alcohol Malic acid Ash Alcalinity of ash Magnesium \\\n", + "0 1 13.20 1.78 2.14 11.2 100 \n", + "1 1 13.16 2.36 2.67 18.6 101 \n", + "2 1 14.37 1.95 2.50 16.8 113 \n", + "3 1 13.24 2.59 2.87 21.0 118 \n", + "4 1 14.20 1.76 2.45 15.2 112 \n", + "\n", + " Flavanoids Total phenols Nonflavanoid phenols Proanthocyanins \\\n", + "0 2.65 2.76 0.26 1.28 \n", + "1 2.80 3.24 0.30 2.81 \n", + "2 3.85 3.49 0.24 2.18 \n", + "3 2.80 2.69 0.39 1.82 \n", + "4 3.27 3.39 0.34 1.97 \n", + "\n", + " Color intensity Hue OD280/OD315 of diluted wines Proline \n", + "0 4.38 1.05 3.40 1050 \n", + "1 5.68 1.03 3.17 1185 \n", + "2 7.80 0.86 3.45 1480 \n", + "3 4.32 1.04 2.93 735 \n", + "4 6.75 1.05 2.85 1450 " + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 120 + } + ] }, { "metadata": { @@ -179,14 +2943,176 @@ "metadata": { "id": "buyT4vX4kPMl", "colab_type": "code", - "colab": {} + "colab": { + "base_uri": "https://localhost:8080/", + "height": 214 + }, + "outputId": "28815039-f3ca-4068-c95f-9fe61758a12a" }, "cell_type": "code", "source": [ - "" + "wine_df.iloc[0:3, wine_df.columns.get_loc('Alcohol')] = np.nan\n", + "wine_df.head()" ], - "execution_count": 0, - "outputs": [] + "execution_count": 121, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
categoryAlcoholMalic acidAshAlcalinity of ashMagnesiumFlavanoidsTotal phenolsNonflavanoid phenolsProanthocyaninsColor intensityHueOD280/OD315 of diluted winesProline
01NaN1.782.1411.21002.652.760.261.284.381.053.401050
11NaN2.362.6718.61012.803.240.302.815.681.033.171185
21NaN1.952.5016.81133.853.490.242.187.800.863.451480
3113.242.592.8721.01182.802.690.391.824.321.042.93735
4114.201.762.4515.21123.273.390.341.976.751.052.851450
\n", + "
" + ], + "text/plain": [ + " category Alcohol Malic acid Ash Alcalinity of ash Magnesium \\\n", + "0 1 NaN 1.78 2.14 11.2 100 \n", + "1 1 NaN 2.36 2.67 18.6 101 \n", + "2 1 NaN 1.95 2.50 16.8 113 \n", + "3 1 13.24 2.59 2.87 21.0 118 \n", + "4 1 14.20 1.76 2.45 15.2 112 \n", + "\n", + " Flavanoids Total phenols Nonflavanoid phenols Proanthocyanins \\\n", + "0 2.65 2.76 0.26 1.28 \n", + "1 2.80 3.24 0.30 2.81 \n", + "2 3.85 3.49 0.24 2.18 \n", + "3 2.80 2.69 0.39 1.82 \n", + "4 3.27 3.39 0.34 1.97 \n", + "\n", + " Color intensity Hue OD280/OD315 of diluted wines Proline \n", + "0 4.38 1.05 3.40 1050 \n", + "1 5.68 1.03 3.17 1185 \n", + "2 7.80 0.86 3.45 1480 \n", + "3 4.32 1.04 2.93 735 \n", + "4 6.75 1.05 2.85 1450 " + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 121 + } + ] }, { "metadata": { @@ -202,14 +3128,28 @@ "metadata": { "id": "xunmCjaEmDwZ", "colab_type": "code", - "colab": {} + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "ae6865f1-e069-4149-db26-5713e2869c1f" }, "cell_type": "code", "source": [ - "" + "random = np.random.randint(10, size= 10)\n", + "random.sort()\n", + "print(random)" ], - "execution_count": 0, - "outputs": [] + "execution_count": 122, + "outputs": [ + { + "output_type": "stream", + "text": [ + "[0 0 0 1 4 4 5 8 9 9]\n" + ], + "name": "stdout" + } + ] }, { "metadata": { @@ -225,14 +3165,277 @@ "metadata": { "id": "zMgaNnNHmP01", "colab_type": "code", - "colab": {} + "colab": { + "base_uri": "https://localhost:8080/", + "height": 364 + }, + "outputId": "639a4d74-d9a0-4733-f046-96a360def085" }, "cell_type": "code", "source": [ - "" + "\n", + "wine_df.iloc[random, wine_df.columns.get_loc('Alcohol')] = np.nan\n", + "wine_df.head(10)" ], - "execution_count": 0, - "outputs": [] + "execution_count": 123, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
categoryAlcoholMalic acidAshAlcalinity of ashMagnesiumFlavanoidsTotal phenolsNonflavanoid phenolsProanthocyaninsColor intensityHueOD280/OD315 of diluted winesProline
01NaN1.782.1411.21002.652.760.261.284.381.053.401050
11NaN2.362.6718.61012.803.240.302.815.681.033.171185
21NaN1.952.5016.81133.853.490.242.187.800.863.451480
3113.242.592.8721.01182.802.690.391.824.321.042.93735
41NaN1.762.4515.21123.273.390.341.976.751.052.851450
51NaN1.872.4514.6962.502.520.301.985.251.023.581290
6114.062.152.6117.61212.602.510.311.255.051.063.581295
7114.831.642.1714.0972.802.980.291.985.201.082.851045
81NaN1.352.2716.0982.983.150.221.857.221.013.551045
91NaN2.162.3018.01052.953.320.222.385.751.253.171510
\n", + "
" + ], + "text/plain": [ + " category Alcohol Malic acid Ash Alcalinity of ash Magnesium \\\n", + "0 1 NaN 1.78 2.14 11.2 100 \n", + "1 1 NaN 2.36 2.67 18.6 101 \n", + "2 1 NaN 1.95 2.50 16.8 113 \n", + "3 1 13.24 2.59 2.87 21.0 118 \n", + "4 1 NaN 1.76 2.45 15.2 112 \n", + "5 1 NaN 1.87 2.45 14.6 96 \n", + "6 1 14.06 2.15 2.61 17.6 121 \n", + "7 1 14.83 1.64 2.17 14.0 97 \n", + "8 1 NaN 1.35 2.27 16.0 98 \n", + "9 1 NaN 2.16 2.30 18.0 105 \n", + "\n", + " Flavanoids Total phenols Nonflavanoid phenols Proanthocyanins \\\n", + "0 2.65 2.76 0.26 1.28 \n", + "1 2.80 3.24 0.30 2.81 \n", + "2 3.85 3.49 0.24 2.18 \n", + "3 2.80 2.69 0.39 1.82 \n", + "4 3.27 3.39 0.34 1.97 \n", + "5 2.50 2.52 0.30 1.98 \n", + "6 2.60 2.51 0.31 1.25 \n", + "7 2.80 2.98 0.29 1.98 \n", + "8 2.98 3.15 0.22 1.85 \n", + "9 2.95 3.32 0.22 2.38 \n", + "\n", + " Color intensity Hue OD280/OD315 of diluted wines Proline \n", + "0 4.38 1.05 3.40 1050 \n", + "1 5.68 1.03 3.17 1185 \n", + "2 7.80 0.86 3.45 1480 \n", + "3 4.32 1.04 2.93 735 \n", + "4 6.75 1.05 2.85 1450 \n", + "5 5.25 1.02 3.58 1290 \n", + "6 5.05 1.06 3.58 1295 \n", + "7 5.20 1.08 2.85 1045 \n", + "8 7.22 1.01 3.55 1045 \n", + "9 5.75 1.25 3.17 1510 " + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 123 + } + ] }, { "metadata": { @@ -250,14 +3453,32 @@ "metadata": { "id": "EnOYhmEqmfKp", "colab_type": "code", - "colab": {} + "colab": { + "base_uri": "https://localhost:8080/", + "height": 52 + }, + "outputId": "904d00bf-9428-4da9-c272-ed98d8eb4a2c" }, "cell_type": "code", "source": [ - "" + "\n", + "nan_index = wine_df[wine_df.isnull().any(axis = 1)].index\n", + "print(nan_index)\n", + "print('No of missing values: ' + str(nan_index.size))\n", + "\n", + "\n" ], - "execution_count": 0, - "outputs": [] + "execution_count": 124, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Int64Index([0, 1, 2, 4, 5, 8, 9], dtype='int64')\n", + "No of missing values: 7\n" + ], + "name": "stdout" + } + ] }, { "metadata": { @@ -273,14 +3494,276 @@ "metadata": { "id": "As7IC6Ktms8-", "colab_type": "code", - "colab": {} + "colab": { + "base_uri": "https://localhost:8080/", + "height": 364 + }, + "outputId": "f253338d-d9dc-4089-c5e4-32194d2450f7" }, "cell_type": "code", "source": [ - "" + "wine_df.drop(index = nan_index, inplace = True)\n", + "wine_df.head(10)\n" ], - "execution_count": 0, - "outputs": [] + "execution_count": 125, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
categoryAlcoholMalic acidAshAlcalinity of ashMagnesiumFlavanoidsTotal phenolsNonflavanoid phenolsProanthocyaninsColor intensityHueOD280/OD315 of diluted winesProline
3113.242.592.8721.01182.802.690.391.824.321.042.93735
6114.062.152.6117.61212.602.510.311.255.051.063.581295
7114.831.642.1714.0972.802.980.291.985.201.082.851045
10114.121.482.3216.8952.202.430.261.575.001.172.821280
11113.751.732.4116.0892.602.760.291.815.601.152.901320
12114.751.732.3911.4913.103.690.432.815.401.252.731150
13114.381.872.3812.01023.303.640.292.967.501.203.001547
14113.631.812.7017.21122.852.910.301.467.301.282.881310
15114.301.922.7220.01202.803.140.331.976.201.072.651280
16113.831.572.6220.01152.953.400.401.726.601.132.571130
\n", + "
" + ], + "text/plain": [ + " category Alcohol Malic acid Ash Alcalinity of ash Magnesium \\\n", + "3 1 13.24 2.59 2.87 21.0 118 \n", + "6 1 14.06 2.15 2.61 17.6 121 \n", + "7 1 14.83 1.64 2.17 14.0 97 \n", + "10 1 14.12 1.48 2.32 16.8 95 \n", + "11 1 13.75 1.73 2.41 16.0 89 \n", + "12 1 14.75 1.73 2.39 11.4 91 \n", + "13 1 14.38 1.87 2.38 12.0 102 \n", + "14 1 13.63 1.81 2.70 17.2 112 \n", + "15 1 14.30 1.92 2.72 20.0 120 \n", + "16 1 13.83 1.57 2.62 20.0 115 \n", + "\n", + " Flavanoids Total phenols Nonflavanoid phenols Proanthocyanins \\\n", + "3 2.80 2.69 0.39 1.82 \n", + "6 2.60 2.51 0.31 1.25 \n", + "7 2.80 2.98 0.29 1.98 \n", + "10 2.20 2.43 0.26 1.57 \n", + "11 2.60 2.76 0.29 1.81 \n", + "12 3.10 3.69 0.43 2.81 \n", + "13 3.30 3.64 0.29 2.96 \n", + "14 2.85 2.91 0.30 1.46 \n", + "15 2.80 3.14 0.33 1.97 \n", + "16 2.95 3.40 0.40 1.72 \n", + "\n", + " Color intensity Hue OD280/OD315 of diluted wines Proline \n", + "3 4.32 1.04 2.93 735 \n", + "6 5.05 1.06 3.58 1295 \n", + "7 5.20 1.08 2.85 1045 \n", + "10 5.00 1.17 2.82 1280 \n", + "11 5.60 1.15 2.90 1320 \n", + "12 5.40 1.25 2.73 1150 \n", + "13 7.50 1.20 3.00 1547 \n", + "14 7.30 1.28 2.88 1310 \n", + "15 6.20 1.07 2.65 1280 \n", + "16 6.60 1.13 2.57 1130 " + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 125 + } + ] }, { "metadata": { @@ -306,4 +3789,4 @@ "outputs": [] } ] -} \ No newline at end of file +} From 5d026d42c4e477fc3104ab61c353a16af56fed96 Mon Sep 17 00:00:00 2001 From: Arnab Ghosh <43007068+ArnabG99@users.noreply.github.com> Date: Sun, 27 Jan 2019 19:03:00 +0530 Subject: [PATCH 05/12] Created using Colaboratory --- Get_to_know_your_Data.ipynb | 485 ++++++++++++++++++++++++++++++++++++ 1 file changed, 485 insertions(+) create mode 100644 Get_to_know_your_Data.ipynb diff --git a/Get_to_know_your_Data.ipynb b/Get_to_know_your_Data.ipynb new file mode 100644 index 0000000..4384f58 --- /dev/null +++ b/Get_to_know_your_Data.ipynb @@ -0,0 +1,485 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "Get to know your Data.ipynb", + "version": "0.3.2", + "provenance": [], + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "metadata": { + "id": "J82LU53m_OU0", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "# Get to know your Data\n", + "\n", + "\n", + "#### Import necessary modules\n" + ] + }, + { + "metadata": { + "id": "ZyO1UXL8mtSj", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "import pandas as pd\n", + "import numpy as np" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "yXTzTowtnwGI", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Loading CSV Data to a DataFrame" + ] + }, + { + "metadata": { + "id": "H1Bjlb5wm9f-", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "iris_df = pd.read_csv('https://raw.githubusercontent.com/uiuc-cse/data-fa14/gh-pages/data/iris.csv')\n" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "KE-k7b_Mn5iN", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### See the top 10 rows\n" + ] + }, + { + "metadata": { + "id": "HY2Ps7xMn4ao", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "iris_df.head()" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "ZQXekIodqOZu", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Find number of rows and columns\n" + ] + }, + { + "metadata": { + "id": "6Y-A-lbFqR82", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "print(iris_df.shape)\n", + "\n", + "#first is row and second is column\n", + "#select row by simple indexing\n", + "\n", + "#print(iris_df.shape[0])\n", + "#print(iris_df.shape[1])" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "4ckCiGPhrC_t", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Print all columns" + ] + }, + { + "metadata": { + "id": "S6jgMyRDrF2a", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "print(iris_df.columns)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "kVav5-ACtIqS", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Check Index\n" + ] + }, + { + "metadata": { + "id": "iu3I9zIGtLDX", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "print(iris_df.index)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "psCc7PborOCQ", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Right now the iris_data set has all the species grouped together let's shuffle it" + ] + }, + { + "metadata": { + "id": "Bxc8i6avrZPw", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "#generate a random permutaion on index\n", + "\n", + "print(iris_df.head())\n", + "\n", + "new_index = np.random.permutation(iris_df.index)\n", + "iris_df = iris_df.reindex(index = new_index)\n", + "\n", + "print(iris_df.head())" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "j32h8022sRT8", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### We can also apply an operation on whole column of iris_df" + ] + }, + { + "metadata": { + "id": "seYXHXsYsYJI", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "#original\n", + "\n", + "print(iris_df.head())\n", + "\n", + "iris_df['sepal_width'] *= 10\n", + "\n", + "#changed\n", + "\n", + "print(iris_df.head())\n", + "\n", + "#lets undo the operation\n", + "\n", + "iris_df['sepal_width'] /= 10\n", + "\n", + "print(iris_df.head())" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "R-Ca-LBLzjiF", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Show all the rows where sepal_width > 3.3" + ] + }, + { + "metadata": { + "id": "WJ7W-F-d0AoZ", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "iris_df[iris_df['sepal_width']>3.3]" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "gH3DnhCq2Cbl", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Club two filters together - Find all samples where sepal_width > 3.3 and species is versicolor" + ] + }, + { + "metadata": { + "id": "4U7ksr_R2H7M", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "iris_df[(iris_df['sepal_width']>3.3) & (iris_df['species'] == 'versicolor')] " + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "1lmnB3ot2u7I", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Sorting a column by value" + ] + }, + { + "metadata": { + "id": "K7KIj6fv2zWP", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "iris_df.sort_values(by='sepal_width')#, ascending = False)\n", + "#pass ascending = False for descending order" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "9jg_Z4YCoMSV", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### List all the unique species" + ] + }, + { + "metadata": { + "id": "M6EN78ufoJY7", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "species = iris_df['species'].unique()\n", + "\n", + "print(species)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "wG1i5nxBodmB", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Selecting a particular species using boolean mask (learnt in previous exercise)" + ] + }, + { + "metadata": { + "id": "gZvpbKBwoVUe", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "setosa = iris_df[iris_df['species'] == species[0]]\n", + "\n", + "setosa.head()" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "7tumfZ3DotPG", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "# do the same for other 2 species \n", + "versicolor = iris_df[iris_df['species'] == species[1]]\n", + "\n", + "versicolor.head()" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "cUYm5UqVpDPy", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "\n", + "\n", + "virginica = iris_df[iris_df['species'] == species[2]]\n", + "\n", + "virginica.head()" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "-y1wDc8SpdQs", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Describe each created species to see the difference\n", + "\n" + ] + }, + { + "metadata": { + "id": "eHrn3ZVRpOk5", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "setosa.describe()" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "GwJFT2GlpwUv", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "versicolor.describe()" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "Ad4qhSZLpztf", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "virginica.describe()" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "Vdu0ulZWtr09", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "#### Let's plot and see the difference" + ] + }, + { + "metadata": { + "id": "PEVMzRvpttmD", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "##### import matplotlib.pyplot " + ] + }, + { + "metadata": { + "id": "rqDXuuAtt7C3", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "#hist creates a histogram there are many more plots(see the documentation) you can play with it.\n", + "\n", + "plt.hist(setosa['sepal_length'])\n", + "plt.hist(versicolor['sepal_length'])\n", + "plt.hist(virginica['sepal_length'])" + ], + "execution_count": 0, + "outputs": [] + } + ] +} \ No newline at end of file From f0eb53c05d047061609b6a40b98967afc99aeeb7 Mon Sep 17 00:00:00 2001 From: Arnab Ghosh <43007068+ArnabG99@users.noreply.github.com> Date: Sun, 27 Jan 2019 19:06:39 +0530 Subject: [PATCH 06/12] Update Exercise.ipynb --- Exercise.ipynb | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/Exercise.ipynb b/Exercise.ipynb index 4012296..ed9305e 100644 --- a/Exercise.ipynb +++ b/Exercise.ipynb @@ -6,7 +6,7 @@ "name": "Exercise.ipynb", "version": "0.3.2", "provenance": [], - "include_colab_link": true + "include_colab_link": false }, "kernelspec": { "name": "python3", @@ -20,10 +20,7 @@ "id": "view-in-github", "colab_type": "text" }, - "source": [ - "[View in Colaboratory](https://colab.research.google.com/github/s-bose/Assignment-3/blob/s-bose/Pandas_Exercise.ipynb)" - ] - }, + { "metadata": { "id": "2LTtpUJEibjg", From e5055bd83ed739327c781d8aba6dd36657b7990b Mon Sep 17 00:00:00 2001 From: Arnab Ghosh <43007068+ArnabG99@users.noreply.github.com> Date: Mon, 28 Jan 2019 20:06:31 +0530 Subject: [PATCH 07/12] Final Commit --- Basic_Pandas.ipynb | 421 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 339 insertions(+), 82 deletions(-) diff --git a/Basic_Pandas.ipynb b/Basic_Pandas.ipynb index b2b8a30..83112c6 100644 --- a/Basic_Pandas.ipynb +++ b/Basic_Pandas.ipynb @@ -21,7 +21,7 @@ "colab_type": "text" }, "source": [ - "[View in Colaboratory](https://colab.research.google.com/github/ArnabG99/Assignment-3/blob/ArnabG99/Basic_Pandas.ipynb)" + "[View in Colaboratory](hhttps://colab.research.google.com/github/ArnabG99/Assignment-3/blob/ArnabG99/ArnabG99.ipynb)" ] }, { @@ -75,9 +75,9 @@ "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", - "height": 139 + "height": 143 }, - "outputId": "b6e56093-cc64-4332-bc7d-94f9a83a1390" + "outputId": "370a6f61-ad3a-4b1c-ab1f-327c4a768cce" }, "cell_type": "code", "source": [ @@ -99,7 +99,7 @@ "\n", "print(type(alpha_numbers))" ], - "execution_count": 2, + "execution_count": 34, "outputs": [ { "output_type": "stream", @@ -121,16 +121,16 @@ "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", - "height": 476 + "height": 496 }, - "outputId": "6c7a68a3-6b81-4c60-c475-b6c78cce0f3a" + "outputId": "3245028d-c7b3-4e79-95fe-dd1aab01d3b7" }, "cell_type": "code", "source": [ "series1 = pd.Series(alphabets)\n", "print(series1)" ], - "execution_count": 3, + "execution_count": 33, "outputs": [ { "output_type": "stream", @@ -173,16 +173,16 @@ "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", - "height": 476 + "height": 496 }, - "outputId": "a1a2e548-1a41-4cb6-95ad-9e293dfb1011" + "outputId": "764cc23b-a746-41a2-cf0c-5385fc3ce2c0" }, "cell_type": "code", "source": [ "series2 = pd.Series(numbers)\n", "print(series2)" ], - "execution_count": 4, + "execution_count": 32, "outputs": [ { "output_type": "stream", @@ -225,16 +225,16 @@ "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", - "height": 476 + "height": 496 }, - "outputId": "b6e96e60-a159-4840-8f6c-869f94bf4958" + "outputId": "a666fc70-f97c-4ac6-b983-a94618e77e88" }, "cell_type": "code", "source": [ "series3 = pd.Series(alpha_numbers)\n", "print(series3)" ], - "execution_count": 5, + "execution_count": 31, "outputs": [ { "output_type": "stream", @@ -277,33 +277,41 @@ "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", - "height": 119 + "height": 265 }, - "outputId": "70d2e0fb-e4b1-41c1-d8d1-47f72a8c8c98" + "outputId": "828b59a1-9652-4788-af97-aef6f5e51b0b" }, "cell_type": "code", "source": [ "#replace head() with head(n) where n can be any number between [0-25] and observe the output in deach case \n", - "series3.head()" + "series3.head(13)" ], - "execution_count": 6, + "execution_count": 27, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ - "A 0\n", - "B 1\n", - "C 2\n", - "D 3\n", - "E 4\n", + "A 0\n", + "B 1\n", + "C 2\n", + "D 3\n", + "E 4\n", + "F 5\n", + "G 6\n", + "H 7\n", + "I 8\n", + "J 9\n", + "K 10\n", + "L 11\n", + "M 12\n", "dtype: int64" ] }, "metadata": { "tags": [] }, - "execution_count": 6 + "execution_count": 27 } ] }, @@ -325,9 +333,9 @@ "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", - "height": 855 + "height": 827 }, - "outputId": "208099a4-81d7-41cd-9193-aca56fad1761" + "outputId": "0019af9c-1671-4572-97ed-965199b6892b" }, "cell_type": "code", "source": [ @@ -337,11 +345,11 @@ "\n", "#Lets Change the column `values` to `alpha_numbers`\n", "\n", - "#df.columns = ['alphabets', 'alpha_numbers']\n", + "df.columns = ['alphabets', 'alpha_numbers']\n", "\n", "df" ], - "execution_count": 7, + "execution_count": 30, "outputs": [ { "output_type": "execute_result", @@ -366,7 +374,7 @@ " \n", " \n", " alphabets\n", - " values\n", + " alpha_numbers\n", " \n", " \n", " \n", @@ -505,39 +513,39 @@ "" ], "text/plain": [ - " alphabets values\n", - "0 A 0\n", - "1 B 1\n", - "2 C 2\n", - "3 D 3\n", - "4 E 4\n", - "5 F 5\n", - "6 G 6\n", - "7 H 7\n", - "8 I 8\n", - "9 J 9\n", - "10 K 10\n", - "11 L 11\n", - "12 M 12\n", - "13 N 13\n", - "14 O 14\n", - "15 P 15\n", - "16 Q 16\n", - "17 R 17\n", - "18 S 18\n", - "19 T 19\n", - "20 U 20\n", - "21 V 21\n", - "22 W 22\n", - "23 X 23\n", - "24 Y 24\n", - "25 Z 25" + " alphabets alpha_numbers\n", + "0 A 0\n", + "1 B 1\n", + "2 C 2\n", + "3 D 3\n", + "4 E 4\n", + "5 F 5\n", + "6 G 6\n", + "7 H 7\n", + "8 I 8\n", + "9 J 9\n", + "10 K 10\n", + "11 L 11\n", + "12 M 12\n", + "13 N 13\n", + "14 O 14\n", + "15 P 15\n", + "16 Q 16\n", + "17 R 17\n", + "18 S 18\n", + "19 T 19\n", + "20 U 20\n", + "21 V 21\n", + "22 W 22\n", + "23 X 23\n", + "24 Y 24\n", + "25 Z 25" ] }, "metadata": { "tags": [] }, - "execution_count": 7 + "execution_count": 30 } ] }, @@ -547,9 +555,9 @@ "colab_type": "code", "colab": { "base_uri": "https://localhost:8080/", - "height": 140 + "height": 136 }, - "outputId": "b7e765dc-4d30-4bbb-a43b-485eb592b40d" + "outputId": "d6a38677-52c8-4838-d59a-edc16f02c28b" }, "cell_type": "code", "source": [ @@ -559,7 +567,7 @@ "\n", "# there are many more operations which we can perform look at the documentation with the subsequent exercises we will learn more" ], - "execution_count": 8, + "execution_count": 29, "outputs": [ { "output_type": "execute_result", @@ -632,7 +640,7 @@ " Z\n", " \n", " \n", - " values\n", + " alpha_numbers\n", " 0\n", " 1\n", " 2\n", @@ -661,13 +669,13 @@ "" ], "text/plain": [ - " 0 1 2 3 4 5 6 7 8 9 ... 16 17 18 19 20 21 22 23 \\\n", - "alphabets A B C D E F G H I J ... Q R S T U V W X \n", - "values 0 1 2 3 4 5 6 7 8 9 ... 16 17 18 19 20 21 22 23 \n", + " 0 1 2 3 4 5 6 7 8 9 ... 16 17 18 19 20 21 22 \\\n", + "alphabets A B C D E F G H I J ... Q R S T U V W \n", + "alpha_numbers 0 1 2 3 4 5 6 7 8 9 ... 16 17 18 19 20 21 22 \n", "\n", - " 24 25 \n", - "alphabets Y Z \n", - "values 24 25 \n", + " 23 24 25 \n", + "alphabets X Y Z \n", + "alpha_numbers 23 24 25 \n", "\n", "[2 rows x 26 columns]" ] @@ -675,7 +683,7 @@ "metadata": { "tags": [] }, - "execution_count": 8 + "execution_count": 29 } ] }, @@ -693,7 +701,11 @@ "metadata": { "id": "tc1-KX_Bfe7U", "colab_type": "code", - "colab": {} + "colab": { + "base_uri": "https://localhost:8080/", + "height": 304 + }, + "outputId": "e96360a7-13bf-4f7d-c513-c161383b1eee" }, "cell_type": "code", "source": [ @@ -702,16 +714,94 @@ "\n", "vowels = ser.take(pos)\n", "\n", - "df = pd.DataFrame(vowels)#, columns=['vowels'])\n", + "df = pd.DataFrame(vowels, columns= ['vowels'])#, columns=['vowels'])\n", "\n", "df.columns = ['vowels']\n", + "print(df)\n", "\n", - "#df.index = [0, 1, 2, 3, 4]\n", + "df.index = [0, 1, 2, 3, 4]\n", "\n", "df" ], - "execution_count": 0, - "outputs": [] + "execution_count": 43, + "outputs": [ + { + "output_type": "stream", + "text": [ + " vowels\n", + "0 a\n", + "4 e\n", + "8 i\n", + "14 o\n", + "20 u\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
vowels
0a
1e
2i
3o
4u
\n", + "
" + ], + "text/plain": [ + " vowels\n", + "0 a\n", + "1 e\n", + "2 i\n", + "3 o\n", + "4 u" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 43 + } + ] }, { "metadata": { @@ -727,7 +817,11 @@ "metadata": { "id": "5KagP9PpgV2F", "colab_type": "code", - "colab": {} + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "9627d7f3-685e-4d52-871c-429907b377a7" }, "cell_type": "code", "source": [ @@ -739,8 +833,21 @@ "\n", "titles" ], - "execution_count": 0, - "outputs": [] + "execution_count": 44, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['We', 'Are', 'Learning', 'Pandas']" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 44 + } + ] }, { "metadata": { @@ -756,7 +863,11 @@ "metadata": { "id": "h5R0JL2NjuFS", "colab_type": "code", - "colab": {} + "colab": { + "base_uri": "https://localhost:8080/", + "height": 197 + }, + "outputId": "e34a155f-8068-49aa-f329-3bb003c741a1" }, "cell_type": "code", "source": [ @@ -768,14 +879,89 @@ "\n", "df1" ], - "execution_count": 0, - "outputs": [] + "execution_count": 45, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
lower valuesupper values
1aA
2bB
3cC
4dD
5eE
\n", + "
" + ], + "text/plain": [ + " lower values upper values\n", + "1 a A\n", + "2 b B\n", + "3 c C\n", + "4 d D\n", + "5 e E" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 45 + } + ] }, { "metadata": { "id": "G_Frvc3mk93k", "colab_type": "code", - "colab": {} + "colab": { + "base_uri": "https://localhost:8080/", + "height": 197 + }, + "outputId": "ca5a9322-5ac9-4b64-f234-3caa4103b2d7" }, "cell_type": "code", "source": [ @@ -783,8 +969,79 @@ "\n", "df1.reindex(index = new_index)" ], - "execution_count": 0, - "outputs": [] + "execution_count": 46, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
lower valuesupper values
2bB
5eE
4dD
3cC
1aA
\n", + "
" + ], + "text/plain": [ + " lower values upper values\n", + "2 b B\n", + "5 e E\n", + "4 d D\n", + "3 c C\n", + "1 a A" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 46 + } + ] } ] -} \ No newline at end of file +} From c394be2f587798dc48907cd1ac4525946531f68e Mon Sep 17 00:00:00 2001 From: Arnab Ghosh <43007068+ArnabG99@users.noreply.github.com> Date: Mon, 28 Jan 2019 20:15:15 +0530 Subject: [PATCH 08/12] Created using Colaboratory --- Get_to_know_your_Data.ipynb | 1570 +++++++++++++++++++++++++++++++++-- 1 file changed, 1514 insertions(+), 56 deletions(-) diff --git a/Get_to_know_your_Data.ipynb b/Get_to_know_your_Data.ipynb index 4384f58..c19a255 100644 --- a/Get_to_know_your_Data.ipynb +++ b/Get_to_know_your_Data.ipynb @@ -88,14 +88,152 @@ "metadata": { "id": "HY2Ps7xMn4ao", "colab_type": "code", - "colab": {} + "colab": { + "base_uri": "https://localhost:8080/", + "height": 359 + }, + "outputId": "829f589e-39b1-435d-a288-2ddb7df85856" }, "cell_type": "code", "source": [ - "iris_df.head()" + "iris_df.head(10)" ], - "execution_count": 0, - "outputs": [] + "execution_count": 3, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
05.13.51.40.2setosa
14.93.01.40.2setosa
24.73.21.30.2setosa
34.63.11.50.2setosa
45.03.61.40.2setosa
55.43.91.70.4setosa
64.63.41.40.3setosa
75.03.41.50.2setosa
84.42.91.40.2setosa
94.93.11.50.1setosa
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species\n", + "0 5.1 3.5 1.4 0.2 setosa\n", + "1 4.9 3.0 1.4 0.2 setosa\n", + "2 4.7 3.2 1.3 0.2 setosa\n", + "3 4.6 3.1 1.5 0.2 setosa\n", + "4 5.0 3.6 1.4 0.2 setosa\n", + "5 5.4 3.9 1.7 0.4 setosa\n", + "6 4.6 3.4 1.4 0.3 setosa\n", + "7 5.0 3.4 1.5 0.2 setosa\n", + "8 4.4 2.9 1.4 0.2 setosa\n", + "9 4.9 3.1 1.5 0.1 setosa" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 3 + } + ] }, { "metadata": { @@ -111,7 +249,11 @@ "metadata": { "id": "6Y-A-lbFqR82", "colab_type": "code", - "colab": {} + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + }, + "outputId": "af0b4150-fed4-46e1-fc96-5dd4d06f7799" }, "cell_type": "code", "source": [ @@ -120,11 +262,21 @@ "#first is row and second is column\n", "#select row by simple indexing\n", "\n", - "#print(iris_df.shape[0])\n", - "#print(iris_df.shape[1])" + "print(iris_df.shape[0])\n", + "print(iris_df.shape[1])" ], - "execution_count": 0, - "outputs": [] + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "text": [ + "(150, 5)\n", + "150\n", + "5\n" + ], + "name": "stdout" + } + ] }, { "metadata": { @@ -140,14 +292,28 @@ "metadata": { "id": "S6jgMyRDrF2a", "colab_type": "code", - "colab": {} + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + }, + "outputId": "7576bcf3-8f63-4cbb-d1ea-a0d36092cafc" }, "cell_type": "code", "source": [ "print(iris_df.columns)" ], - "execution_count": 0, - "outputs": [] + "execution_count": 5, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Index(['sepal_length', 'sepal_width', 'petal_length', 'petal_width',\n", + " 'species'],\n", + " dtype='object')\n" + ], + "name": "stdout" + } + ] }, { "metadata": { @@ -163,14 +329,26 @@ "metadata": { "id": "iu3I9zIGtLDX", "colab_type": "code", - "colab": {} + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "cc6d8cbf-8938-4710-8eb5-256a678bab3a" }, "cell_type": "code", "source": [ "print(iris_df.index)" ], - "execution_count": 0, - "outputs": [] + "execution_count": 6, + "outputs": [ + { + "output_type": "stream", + "text": [ + "RangeIndex(start=0, stop=150, step=1)\n" + ], + "name": "stdout" + } + ] }, { "metadata": { @@ -186,7 +364,11 @@ "metadata": { "id": "Bxc8i6avrZPw", "colab_type": "code", - "colab": {} + "colab": { + "base_uri": "https://localhost:8080/", + "height": 221 + }, + "outputId": "271b01bc-d21f-4e4d-e91c-2e964124c42a" }, "cell_type": "code", "source": [ @@ -199,8 +381,27 @@ "\n", "print(iris_df.head())" ], - "execution_count": 0, - "outputs": [] + "execution_count": 7, + "outputs": [ + { + "output_type": "stream", + "text": [ + " sepal_length sepal_width petal_length petal_width species\n", + "0 5.1 3.5 1.4 0.2 setosa\n", + "1 4.9 3.0 1.4 0.2 setosa\n", + "2 4.7 3.2 1.3 0.2 setosa\n", + "3 4.6 3.1 1.5 0.2 setosa\n", + "4 5.0 3.6 1.4 0.2 setosa\n", + " sepal_length sepal_width petal_length petal_width species\n", + "84 5.4 3.0 4.5 1.5 versicolor\n", + "126 6.2 2.8 4.8 1.8 virginica\n", + "55 5.7 2.8 4.5 1.3 versicolor\n", + "12 4.8 3.0 1.4 0.1 setosa\n", + "1 4.9 3.0 1.4 0.2 setosa\n" + ], + "name": "stdout" + } + ] }, { "metadata": { @@ -216,14 +417,22 @@ "metadata": { "id": "seYXHXsYsYJI", "colab_type": "code", - "colab": {} + "colab": { + "base_uri": "https://localhost:8080/", + "height": 323 + }, + "outputId": "cd43cfe6-8753-4434-b155-c2f2c33437d9" }, "cell_type": "code", "source": [ "#original\n", "\n", + "\n", + "reset_index = np.sort(iris_df.index)\n", + "iris_df = iris_df.reindex(index = reset_index)\n", "print(iris_df.head())\n", "\n", + "\n", "iris_df['sepal_width'] *= 10\n", "\n", "#changed\n", @@ -236,8 +445,33 @@ "\n", "print(iris_df.head())" ], - "execution_count": 0, - "outputs": [] + "execution_count": 8, + "outputs": [ + { + "output_type": "stream", + "text": [ + " sepal_length sepal_width petal_length petal_width species\n", + "0 5.1 3.5 1.4 0.2 setosa\n", + "1 4.9 3.0 1.4 0.2 setosa\n", + "2 4.7 3.2 1.3 0.2 setosa\n", + "3 4.6 3.1 1.5 0.2 setosa\n", + "4 5.0 3.6 1.4 0.2 setosa\n", + " sepal_length sepal_width petal_length petal_width species\n", + "0 5.1 35.0 1.4 0.2 setosa\n", + "1 4.9 30.0 1.4 0.2 setosa\n", + "2 4.7 32.0 1.3 0.2 setosa\n", + "3 4.6 31.0 1.5 0.2 setosa\n", + "4 5.0 36.0 1.4 0.2 setosa\n", + " sepal_length sepal_width petal_length petal_width species\n", + "0 5.1 3.5 1.4 0.2 setosa\n", + "1 4.9 3.0 1.4 0.2 setosa\n", + "2 4.7 3.2 1.3 0.2 setosa\n", + "3 4.6 3.1 1.5 0.2 setosa\n", + "4 5.0 3.6 1.4 0.2 setosa\n" + ], + "name": "stdout" + } + ] }, { "metadata": { @@ -253,14 +487,386 @@ "metadata": { "id": "WJ7W-F-d0AoZ", "colab_type": "code", - "colab": {} + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1165 + }, + "outputId": "dbf8ae62-6a0d-498f-8716-88d089927845" }, "cell_type": "code", "source": [ "iris_df[iris_df['sepal_width']>3.3]" ], - "execution_count": 0, - "outputs": [] + "execution_count": 9, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
05.13.51.40.2setosa
45.03.61.40.2setosa
55.43.91.70.4setosa
64.63.41.40.3setosa
75.03.41.50.2setosa
105.43.71.50.2setosa
114.83.41.60.2setosa
145.84.01.20.2setosa
155.74.41.50.4setosa
165.43.91.30.4setosa
175.13.51.40.3setosa
185.73.81.70.3setosa
195.13.81.50.3setosa
205.43.41.70.2setosa
215.13.71.50.4setosa
224.63.61.00.2setosa
244.83.41.90.2setosa
265.03.41.60.4setosa
275.23.51.50.2setosa
285.23.41.40.2setosa
315.43.41.50.4setosa
325.24.11.50.1setosa
335.54.21.40.2setosa
365.53.51.30.2setosa
395.13.41.50.2setosa
405.03.51.30.3setosa
435.03.51.60.6setosa
445.13.81.90.4setosa
465.13.81.60.2setosa
485.33.71.50.2setosa
856.03.44.51.6versicolor
1097.23.66.12.5virginica
1177.73.86.72.2virginica
1317.93.86.42.0virginica
1366.33.45.62.4virginica
1486.23.45.42.3virginica
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species\n", + "0 5.1 3.5 1.4 0.2 setosa\n", + "4 5.0 3.6 1.4 0.2 setosa\n", + "5 5.4 3.9 1.7 0.4 setosa\n", + "6 4.6 3.4 1.4 0.3 setosa\n", + "7 5.0 3.4 1.5 0.2 setosa\n", + "10 5.4 3.7 1.5 0.2 setosa\n", + "11 4.8 3.4 1.6 0.2 setosa\n", + "14 5.8 4.0 1.2 0.2 setosa\n", + "15 5.7 4.4 1.5 0.4 setosa\n", + "16 5.4 3.9 1.3 0.4 setosa\n", + "17 5.1 3.5 1.4 0.3 setosa\n", + "18 5.7 3.8 1.7 0.3 setosa\n", + "19 5.1 3.8 1.5 0.3 setosa\n", + "20 5.4 3.4 1.7 0.2 setosa\n", + "21 5.1 3.7 1.5 0.4 setosa\n", + "22 4.6 3.6 1.0 0.2 setosa\n", + "24 4.8 3.4 1.9 0.2 setosa\n", + "26 5.0 3.4 1.6 0.4 setosa\n", + "27 5.2 3.5 1.5 0.2 setosa\n", + "28 5.2 3.4 1.4 0.2 setosa\n", + "31 5.4 3.4 1.5 0.4 setosa\n", + "32 5.2 4.1 1.5 0.1 setosa\n", + "33 5.5 4.2 1.4 0.2 setosa\n", + "36 5.5 3.5 1.3 0.2 setosa\n", + "39 5.1 3.4 1.5 0.2 setosa\n", + "40 5.0 3.5 1.3 0.3 setosa\n", + "43 5.0 3.5 1.6 0.6 setosa\n", + "44 5.1 3.8 1.9 0.4 setosa\n", + "46 5.1 3.8 1.6 0.2 setosa\n", + "48 5.3 3.7 1.5 0.2 setosa\n", + "85 6.0 3.4 4.5 1.6 versicolor\n", + "109 7.2 3.6 6.1 2.5 virginica\n", + "117 7.7 3.8 6.7 2.2 virginica\n", + "131 7.9 3.8 6.4 2.0 virginica\n", + "136 6.3 3.4 5.6 2.4 virginica\n", + "148 6.2 3.4 5.4 2.3 virginica" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 9 + } + ] }, { "metadata": { @@ -276,14 +882,71 @@ "metadata": { "id": "4U7ksr_R2H7M", "colab_type": "code", - "colab": {} + "colab": { + "base_uri": "https://localhost:8080/", + "height": 80 + }, + "outputId": "a448ee3a-15d6-42ef-e2db-416aee992b8a" }, "cell_type": "code", "source": [ "iris_df[(iris_df['sepal_width']>3.3) & (iris_df['species'] == 'versicolor')] " ], - "execution_count": 0, - "outputs": [] + "execution_count": 10, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
856.03.44.51.6versicolor
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species\n", + "85 6.0 3.4 4.5 1.6 versicolor" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 10 + } + ] }, { "metadata": { @@ -299,15 +962,155 @@ "metadata": { "id": "K7KIj6fv2zWP", "colab_type": "code", - "colab": {} + "colab": { + "base_uri": "https://localhost:8080/", + "height": 2193 + }, + "outputId": "50829d4f-3591-468f-c515-9507c398a9fe" }, "cell_type": "code", "source": [ - "iris_df.sort_values(by='sepal_width')#, ascending = False)\n", - "#pass ascending = False for descending order" + "print(iris_df.sort_values(by='sepal_width'))#, ascending = False)\n", + "#pass ascending = False for descending order\n", + "print(iris_df.sort_values(by='sepal_width', ascending = False))" ], - "execution_count": 0, - "outputs": [] + "execution_count": 12, + "outputs": [ + { + "output_type": "stream", + "text": [ + " sepal_length sepal_width petal_length petal_width species\n", + "60 5.0 2.0 3.5 1.0 versicolor\n", + "62 6.0 2.2 4.0 1.0 versicolor\n", + "119 6.0 2.2 5.0 1.5 virginica\n", + "68 6.2 2.2 4.5 1.5 versicolor\n", + "41 4.5 2.3 1.3 0.3 setosa\n", + "53 5.5 2.3 4.0 1.3 versicolor\n", + "93 5.0 2.3 3.3 1.0 versicolor\n", + "87 6.3 2.3 4.4 1.3 versicolor\n", + "81 5.5 2.4 3.7 1.0 versicolor\n", + "80 5.5 2.4 3.8 1.1 versicolor\n", + "57 4.9 2.4 3.3 1.0 versicolor\n", + "72 6.3 2.5 4.9 1.5 versicolor\n", + "146 6.3 2.5 5.0 1.9 virginica\n", + "98 5.1 2.5 3.0 1.1 versicolor\n", + "113 5.7 2.5 5.0 2.0 virginica\n", + "108 6.7 2.5 5.8 1.8 virginica\n", + "69 5.6 2.5 3.9 1.1 versicolor\n", + "89 5.5 2.5 4.0 1.3 versicolor\n", + "106 4.9 2.5 4.5 1.7 virginica\n", + "92 5.8 2.6 4.0 1.2 versicolor\n", + "79 5.7 2.6 3.5 1.0 versicolor\n", + "90 5.5 2.6 4.4 1.2 versicolor\n", + "118 7.7 2.6 6.9 2.3 virginica\n", + "134 6.1 2.6 5.6 1.4 virginica\n", + "101 5.8 2.7 5.1 1.9 virginica\n", + "94 5.6 2.7 4.2 1.3 versicolor\n", + "59 5.2 2.7 3.9 1.4 versicolor\n", + "111 6.4 2.7 5.3 1.9 virginica\n", + "82 5.8 2.7 3.9 1.2 versicolor\n", + "67 5.8 2.7 4.1 1.0 versicolor\n", + ".. ... ... ... ... ...\n", + "85 6.0 3.4 4.5 1.6 versicolor\n", + "39 5.1 3.4 1.5 0.2 setosa\n", + "31 5.4 3.4 1.5 0.4 setosa\n", + "20 5.4 3.4 1.7 0.2 setosa\n", + "148 6.2 3.4 5.4 2.3 virginica\n", + "26 5.0 3.4 1.6 0.4 setosa\n", + "43 5.0 3.5 1.6 0.6 setosa\n", + "40 5.0 3.5 1.3 0.3 setosa\n", + "36 5.5 3.5 1.3 0.2 setosa\n", + "27 5.2 3.5 1.5 0.2 setosa\n", + "0 5.1 3.5 1.4 0.2 setosa\n", + "17 5.1 3.5 1.4 0.3 setosa\n", + "22 4.6 3.6 1.0 0.2 setosa\n", + "109 7.2 3.6 6.1 2.5 virginica\n", + "4 5.0 3.6 1.4 0.2 setosa\n", + "10 5.4 3.7 1.5 0.2 setosa\n", + "48 5.3 3.7 1.5 0.2 setosa\n", + "21 5.1 3.7 1.5 0.4 setosa\n", + "131 7.9 3.8 6.4 2.0 virginica\n", + "117 7.7 3.8 6.7 2.2 virginica\n", + "46 5.1 3.8 1.6 0.2 setosa\n", + "44 5.1 3.8 1.9 0.4 setosa\n", + "18 5.7 3.8 1.7 0.3 setosa\n", + "19 5.1 3.8 1.5 0.3 setosa\n", + "5 5.4 3.9 1.7 0.4 setosa\n", + "16 5.4 3.9 1.3 0.4 setosa\n", + "14 5.8 4.0 1.2 0.2 setosa\n", + "32 5.2 4.1 1.5 0.1 setosa\n", + "33 5.5 4.2 1.4 0.2 setosa\n", + "15 5.7 4.4 1.5 0.4 setosa\n", + "\n", + "[150 rows x 5 columns]\n", + " sepal_length sepal_width petal_length petal_width species\n", + "15 5.7 4.4 1.5 0.4 setosa\n", + "33 5.5 4.2 1.4 0.2 setosa\n", + "32 5.2 4.1 1.5 0.1 setosa\n", + "14 5.8 4.0 1.2 0.2 setosa\n", + "16 5.4 3.9 1.3 0.4 setosa\n", + "5 5.4 3.9 1.7 0.4 setosa\n", + "19 5.1 3.8 1.5 0.3 setosa\n", + "44 5.1 3.8 1.9 0.4 setosa\n", + "46 5.1 3.8 1.6 0.2 setosa\n", + "131 7.9 3.8 6.4 2.0 virginica\n", + "117 7.7 3.8 6.7 2.2 virginica\n", + "18 5.7 3.8 1.7 0.3 setosa\n", + "48 5.3 3.7 1.5 0.2 setosa\n", + "10 5.4 3.7 1.5 0.2 setosa\n", + "21 5.1 3.7 1.5 0.4 setosa\n", + "4 5.0 3.6 1.4 0.2 setosa\n", + "22 4.6 3.6 1.0 0.2 setosa\n", + "109 7.2 3.6 6.1 2.5 virginica\n", + "36 5.5 3.5 1.3 0.2 setosa\n", + "0 5.1 3.5 1.4 0.2 setosa\n", + "17 5.1 3.5 1.4 0.3 setosa\n", + "40 5.0 3.5 1.3 0.3 setosa\n", + "43 5.0 3.5 1.6 0.6 setosa\n", + "27 5.2 3.5 1.5 0.2 setosa\n", + "39 5.1 3.4 1.5 0.2 setosa\n", + "28 5.2 3.4 1.4 0.2 setosa\n", + "136 6.3 3.4 5.6 2.4 virginica\n", + "31 5.4 3.4 1.5 0.4 setosa\n", + "85 6.0 3.4 4.5 1.6 versicolor\n", + "7 5.0 3.4 1.5 0.2 setosa\n", + ".. ... ... ... ... ...\n", + "83 6.0 2.7 5.1 1.6 versicolor\n", + "67 5.8 2.7 4.1 1.0 versicolor\n", + "101 5.8 2.7 5.1 1.9 virginica\n", + "82 5.8 2.7 3.9 1.2 versicolor\n", + "111 6.4 2.7 5.3 1.9 virginica\n", + "94 5.6 2.7 4.2 1.3 versicolor\n", + "79 5.7 2.6 3.5 1.0 versicolor\n", + "90 5.5 2.6 4.4 1.2 versicolor\n", + "92 5.8 2.6 4.0 1.2 versicolor\n", + "118 7.7 2.6 6.9 2.3 virginica\n", + "134 6.1 2.6 5.6 1.4 virginica\n", + "146 6.3 2.5 5.0 1.9 virginica\n", + "89 5.5 2.5 4.0 1.3 versicolor\n", + "98 5.1 2.5 3.0 1.1 versicolor\n", + "106 4.9 2.5 4.5 1.7 virginica\n", + "108 6.7 2.5 5.8 1.8 virginica\n", + "72 6.3 2.5 4.9 1.5 versicolor\n", + "69 5.6 2.5 3.9 1.1 versicolor\n", + "113 5.7 2.5 5.0 2.0 virginica\n", + "57 4.9 2.4 3.3 1.0 versicolor\n", + "80 5.5 2.4 3.8 1.1 versicolor\n", + "81 5.5 2.4 3.7 1.0 versicolor\n", + "93 5.0 2.3 3.3 1.0 versicolor\n", + "53 5.5 2.3 4.0 1.3 versicolor\n", + "41 4.5 2.3 1.3 0.3 setosa\n", + "87 6.3 2.3 4.4 1.3 versicolor\n", + "62 6.0 2.2 4.0 1.0 versicolor\n", + "68 6.2 2.2 4.5 1.5 versicolor\n", + "119 6.0 2.2 5.0 1.5 virginica\n", + "60 5.0 2.0 3.5 1.0 versicolor\n", + "\n", + "[150 rows x 5 columns]\n" + ], + "name": "stdout" + } + ] }, { "metadata": { @@ -323,7 +1126,11 @@ "metadata": { "id": "M6EN78ufoJY7", "colab_type": "code", - "colab": {} + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "dd37c611-5ef4-4495-ae00-e2f584c0de2d" }, "cell_type": "code", "source": [ @@ -331,8 +1138,16 @@ "\n", "print(species)" ], - "execution_count": 0, - "outputs": [] + "execution_count": 13, + "outputs": [ + { + "output_type": "stream", + "text": [ + "['setosa' 'versicolor' 'virginica']\n" + ], + "name": "stdout" + } + ] }, { "metadata": { @@ -348,7 +1163,11 @@ "metadata": { "id": "gZvpbKBwoVUe", "colab_type": "code", - "colab": {} + "colab": { + "base_uri": "https://localhost:8080/", + "height": 204 + }, + "outputId": "1b9c963c-6f09-4ff9-9e60-50139a1aa564" }, "cell_type": "code", "source": [ @@ -356,14 +1175,107 @@ "\n", "setosa.head()" ], - "execution_count": 0, - "outputs": [] + "execution_count": 14, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
05.13.51.40.2setosa
14.93.01.40.2setosa
24.73.21.30.2setosa
34.63.11.50.2setosa
45.03.61.40.2setosa
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species\n", + "0 5.1 3.5 1.4 0.2 setosa\n", + "1 4.9 3.0 1.4 0.2 setosa\n", + "2 4.7 3.2 1.3 0.2 setosa\n", + "3 4.6 3.1 1.5 0.2 setosa\n", + "4 5.0 3.6 1.4 0.2 setosa" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 14 + } + ] }, { "metadata": { "id": "7tumfZ3DotPG", "colab_type": "code", - "colab": {} + "colab": { + "base_uri": "https://localhost:8080/", + "height": 204 + }, + "outputId": "1e4cbc14-51ef-4ea1-c2d6-d70e7c0aec77" }, "cell_type": "code", "source": [ @@ -372,14 +1284,107 @@ "\n", "versicolor.head()" ], - "execution_count": 0, - "outputs": [] + "execution_count": 15, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
507.03.24.71.4versicolor
516.43.24.51.5versicolor
526.93.14.91.5versicolor
535.52.34.01.3versicolor
546.52.84.61.5versicolor
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species\n", + "50 7.0 3.2 4.7 1.4 versicolor\n", + "51 6.4 3.2 4.5 1.5 versicolor\n", + "52 6.9 3.1 4.9 1.5 versicolor\n", + "53 5.5 2.3 4.0 1.3 versicolor\n", + "54 6.5 2.8 4.6 1.5 versicolor" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 15 + } + ] }, { "metadata": { "id": "cUYm5UqVpDPy", "colab_type": "code", - "colab": {} + "colab": { + "base_uri": "https://localhost:8080/", + "height": 204 + }, + "outputId": "78cac6d9-6b0c-4235-dd35-d3e1871edd81" }, "cell_type": "code", "source": [ @@ -389,8 +1394,97 @@ "\n", "virginica.head()" ], - "execution_count": 0, - "outputs": [] + "execution_count": 16, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_widthspecies
1006.33.36.02.5virginica
1015.82.75.11.9virginica
1027.13.05.92.1virginica
1036.32.95.61.8virginica
1046.53.05.82.2virginica
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width species\n", + "100 6.3 3.3 6.0 2.5 virginica\n", + "101 5.8 2.7 5.1 1.9 virginica\n", + "102 7.1 3.0 5.9 2.1 virginica\n", + "103 6.3 2.9 5.6 1.8 virginica\n", + "104 6.5 3.0 5.8 2.2 virginica" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 16 + } + ] }, { "metadata": { @@ -407,40 +1501,373 @@ "metadata": { "id": "eHrn3ZVRpOk5", "colab_type": "code", - "colab": {} + "colab": { + "base_uri": "https://localhost:8080/", + "height": 297 + }, + "outputId": "ba30f410-e825-4750-c131-d6dd3ad4af28" }, "cell_type": "code", "source": [ "setosa.describe()" ], - "execution_count": 0, - "outputs": [] + "execution_count": 17, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_width
count50.0000050.00000050.00000050.00000
mean5.006003.4180001.4640000.24400
std0.352490.3810240.1735110.10721
min4.300002.3000001.0000000.10000
25%4.800003.1250001.4000000.20000
50%5.000003.4000001.5000000.20000
75%5.200003.6750001.5750000.30000
max5.800004.4000001.9000000.60000
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width\n", + "count 50.00000 50.000000 50.000000 50.00000\n", + "mean 5.00600 3.418000 1.464000 0.24400\n", + "std 0.35249 0.381024 0.173511 0.10721\n", + "min 4.30000 2.300000 1.000000 0.10000\n", + "25% 4.80000 3.125000 1.400000 0.20000\n", + "50% 5.00000 3.400000 1.500000 0.20000\n", + "75% 5.20000 3.675000 1.575000 0.30000\n", + "max 5.80000 4.400000 1.900000 0.60000" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 17 + } + ] }, { "metadata": { "id": "GwJFT2GlpwUv", "colab_type": "code", - "colab": {} + "colab": { + "base_uri": "https://localhost:8080/", + "height": 297 + }, + "outputId": "bc0b99ae-e91e-456f-d64d-c905b4a93501" }, "cell_type": "code", "source": [ "versicolor.describe()" ], - "execution_count": 0, - "outputs": [] + "execution_count": 18, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_width
count50.00000050.00000050.00000050.000000
mean5.9360002.7700004.2600001.326000
std0.5161710.3137980.4699110.197753
min4.9000002.0000003.0000001.000000
25%5.6000002.5250004.0000001.200000
50%5.9000002.8000004.3500001.300000
75%6.3000003.0000004.6000001.500000
max7.0000003.4000005.1000001.800000
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width\n", + "count 50.000000 50.000000 50.000000 50.000000\n", + "mean 5.936000 2.770000 4.260000 1.326000\n", + "std 0.516171 0.313798 0.469911 0.197753\n", + "min 4.900000 2.000000 3.000000 1.000000\n", + "25% 5.600000 2.525000 4.000000 1.200000\n", + "50% 5.900000 2.800000 4.350000 1.300000\n", + "75% 6.300000 3.000000 4.600000 1.500000\n", + "max 7.000000 3.400000 5.100000 1.800000" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 18 + } + ] }, { "metadata": { "id": "Ad4qhSZLpztf", "colab_type": "code", - "colab": {} + "colab": { + "base_uri": "https://localhost:8080/", + "height": 297 + }, + "outputId": "71653ad0-ee7b-4282-cbdb-8d399de969ce" }, "cell_type": "code", "source": [ "virginica.describe()" ], - "execution_count": 0, - "outputs": [] + "execution_count": 19, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sepal_lengthsepal_widthpetal_lengthpetal_width
count50.0000050.00000050.00000050.00000
mean6.588002.9740005.5520002.02600
std0.635880.3224970.5518950.27465
min4.900002.2000004.5000001.40000
25%6.225002.8000005.1000001.80000
50%6.500003.0000005.5500002.00000
75%6.900003.1750005.8750002.30000
max7.900003.8000006.9000002.50000
\n", + "
" + ], + "text/plain": [ + " sepal_length sepal_width petal_length petal_width\n", + "count 50.00000 50.000000 50.000000 50.00000\n", + "mean 6.58800 2.974000 5.552000 2.02600\n", + "std 0.63588 0.322497 0.551895 0.27465\n", + "min 4.90000 2.200000 4.500000 1.40000\n", + "25% 6.22500 2.800000 5.100000 1.80000\n", + "50% 6.50000 3.000000 5.550000 2.00000\n", + "75% 6.90000 3.175000 5.875000 2.30000\n", + "max 7.90000 3.800000 6.900000 2.50000" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 19 + } + ] }, { "metadata": { @@ -466,7 +1893,11 @@ "metadata": { "id": "rqDXuuAtt7C3", "colab_type": "code", - "colab": {} + "colab": { + "base_uri": "https://localhost:8080/", + "height": 398 + }, + "outputId": "07e60731-bd1d-420d-f35e-aaaae5f2ba9a" }, "cell_type": "code", "source": [ @@ -478,8 +1909,35 @@ "plt.hist(versicolor['sepal_length'])\n", "plt.hist(virginica['sepal_length'])" ], - "execution_count": 0, - "outputs": [] + "execution_count": 20, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(array([ 1., 0., 5., 5., 8., 9., 10., 5., 1., 6.]),\n", + " array([4.9, 5.2, 5.5, 5.8, 6.1, 6.4, 6.7, 7. , 7.3, 7.6, 7.9]),\n", + " )" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 20 + }, + { + "output_type": "display_data", + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAd8AAAFKCAYAAABcq1WoAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAFGdJREFUeJzt3XuMlGfZwOF72ZHy7bLiFnehaItN\no2lsi4W0TaGALXKwxZ7QcgpFYr9EhAImmIIEAwmJkYY2qK1WW6R+EBIsRdgaI40IiWkBjRgUk4aC\nieHQwlKWM4RD5/vDsBELe5idfYZ997r+6r7z7jv3sw+ZHzNTZsvy+Xw+AIBkupR6AADobMQXABIT\nXwBITHwBIDHxBYDExBcAEsuluJP6+hMp7qYoqqsroqHhdKnHaFdZX6P1dXxZX6P1dXwtWWNNTdVV\nb/PM97/kcuWlHqHdZX2N1tfxZX2N1tfxtXWN4gsAiYkvACQmvgCQmPgCQGLiCwCJiS8AJCa+AJCY\n+AJAYi2K765du2L48OGxcuXKiIh47733YsqUKTFp0qSYMmVK1NfXt+uQAJAlzcb39OnTsWjRohg4\ncGDjsaVLl8bYsWNj5cqVMWLEiFi+fHm7DgkAWdJsfLt27Rovv/xy1NbWNh5bsGBBjBo1KiIiqqur\n4+jRo+03IQBkTLPxzeVy0a1bt8uOVVRURHl5eVy8eDFWrVoVDz/8cLsNCABZU/BvNbp48WI888wz\nce+99172kvSVVFdXdKgP2m7qN1Fcix6evb7N13jjuUeLMMm1o6PtYWtlfX0R2V+j9XV8bVljwfH9\n7ne/G3379o2nn3662XM70q+Wqqmp6lC/ArFYsrTmrO9h1tcXkf01Wl/H15I1Fv1XCtbV1cXHPvax\nmDlzZiHfDgCdWrPPfHfu3BmLFy+O/fv3Ry6Xiw0bNsQHH3wQ1113XTz55JMREXHLLbfEwoUL23tW\nAMiEZuN7++23x4oVK1LMAgCdgk+4AoDExBcAEhNfAEhMfAEgMfEFgMTEFwASE18ASEx8ASAx8QWA\nxMQXABITXwBITHwBIDHxBYDExBcAEhNfAEhMfAEgMfEFgMTEFwASE18ASEx8ASAx8QWAxMQXABIT\nXwBITHwBIDHxBYDExBcAEhNfAEhMfAEgMfEFgMTEFwASE18ASEx8ASAx8QWAxMQXABITXwBITHwB\nIDHxBYDExBcAEhNfAEisRfHdtWtXDB8+PFauXBkREe+99148+eSTMXHixJg1a1acO3euXYcEgCxp\nNr6nT5+ORYsWxcCBAxuP/ehHP4qJEyfGqlWrom/fvrFmzZp2HRIAsqTZ+Hbt2jVefvnlqK2tbTy2\nbdu2+NKXvhQREQ888EBs2bKl/SYEgIzJNXtCLhe53OWnnTlzJrp27RoRET179oz6+vr2mQ4AMqjZ\n+DYnn883e051dUXkcuVtvatkamqqSj1Ccllb86X1jF39rXa9n1+N+2m7Xv9qsrZfV9Iea3zr0a8W\n/ZqFqFn/eub3MOvri2jbGguKb0VFRZw9eza6desWBw8evOwl6StpaDhd0HClUFNTFfX1J0o9RnJZ\nWnPKPSzFz60z/BntDGvM8vo6w/61ZI1Nxbmgf2o0aNCg2LBhQ0REvPnmmzFkyJBCLgMAnVKzz3x3\n7twZixcvjv3790cul4sNGzbEkiVLYu7cubF69ero06dPPPbYYylmBYBMaDa+t99+e6xYseIjx5cv\nX94uAwFA1vmEKwBITHwBIDHxBYDExBcAEhNfAEhMfAEgMfEFgMTEFwASE18ASEx8ASAx8QWAxMQX\nABITXwBITHwBIDHxBYDExBcAEhNfAEhMfAEgsVypBwCyb9f/Tmnd+e0zBlwzPPMFgMTEFwASE18A\nSEx8ASAx8QWAxMQXABITXwBITHwBIDHxBYDExBcAEhNfAEhMfAEgMfEFgMTEFwASE18ASEx8ASAx\n8QWAxMQXABITXwBILFfIN506dSrmzJkTx44di/Pnz8f06dNjyJAhxZ4NADKpoPj++te/jptvvjlm\nz54dBw8ejK9//evxu9/9rtizAUAmFfSyc3V1dRw9ejQiIo4fPx7V1dVFHQoAsqygZ76jR4+OtWvX\nxogRI+L48ePxs5/9rNhzAUBmFRTf9evXR58+fWLZsmXxzjvvxLx582Lt2rVXPb+6uiJyufKCh0yt\npqaq1CMk940f/KHN13jjuUeLMElxpNrD9r6fsau/1eZrzFp1qAiTUGxZf5zJ+voi2rbGguK7ffv2\nGDx4cERE3HrrrXHo0KG4ePFilJdfObANDacLHjC1mpqqqK8/UeoxOqRr5eeWcg+vlTXT8WT5z05n\neBxtyRqbinNB7/n27ds3duzYERER+/fvj8rKyquGFwC4XEHPfMeNGxfz5s2LSZMmxYULF2LhwoVF\nHgsAsqug+FZWVsYPf/jDYs8CAJ2CT7gCgMTEFwASE18ASEx8ASAx8QWAxMQXABITXwBITHwBIDHx\nBYDExBcAEhNfAEhMfAEgMfEFgMTEFwASE18ASEx8ASAx8QWAxMQXABITXwBITHwBIDHxBYDExBcA\nEhNfAEhMfAEgMfEFgMTEFwASE18ASEx8ASAx8QWAxMQXABITXwBITHwBIDHxBYDExBcAEhNfAEhM\nfAEgMfEFgMTEFwASE18ASKzg+NbV1cUjjzwSY8aMic2bNxdxJADItoLi29DQEC+++GKsWrUqXnrp\npdi4cWOx5wKAzMoV8k1btmyJgQMHRvfu3aN79+6xaNGiYs8FAJlVUHz37dsXZ8+ejalTp8bx48dj\nxowZMXDgwKueX11dEblcecFDplZTU9Xicx+evb5N9/XGc4+26fuvJa35uf2nsau/VeRJ0il0zZD1\nPzutWd9bj361HSdpufvWv96q89uyhwXFNyLi6NGj8cILL8SBAwdi8uTJsWnTpigrK7viuQ0Npwse\nMLWamqqorz+R7P5S3ld7y9JaWqozrpniyPKfndSPo8XSmplbssam4lzQe749e/aM/v37Ry6Xi5tu\nuikqKyvjyJEjhVwKADqdguI7ePDg2Lp1a3z44YfR0NAQp0+fjurq6mLPBgCZVNDLzr169YpRo0bF\n2LFjIyJi/vz50aWLfzIMAC1R8Hu+48ePj/HjxxdzFgDoFDxdBYDExBcAEhNfAEhMfAEgMfEFgMTE\nFwASE18ASEx8ASAx8QWAxMQXABITXwBITHwBIDHxBYDExBcAEhNfAEhMfAEgMfEFgMTEFwASy5V6\ngM7uGz/4Q6lHyJxZqw4lu69dq6a06/VntevVKZW3Hv1qqUeIiIjPvfJqqUfotDzzBYDExBcAEhNf\nAEhMfAEgMfEFgMTEFwASE18ASEx8ASAx8QWAxMQXABITXwBITHwBIDHxBYDExBcAEhNfAEhMfAEg\nMfEFgMTEFwASE18ASKxN8T179mwMHz481q5dW6x5ACDz2hTfn/70p9GjR49izQIAnULB8d2zZ0/s\n3r077r///iKOAwDZV3B8Fy9eHHPnzi3mLADQKeQK+aZ169bFnXfeGTfeeGOLzq+urohcrryQu7qq\nh2evb/M13nju0Sser6mpavO1U/qfe37Xrtc/86cvt+i8b/zgDwVd/3/uKejbgDZqz8e61lx7V7tN\n0Tqt/Xm05edXUHw3b94ce/fujc2bN8f7778fXbt2jd69e8egQYOueH5Dw+mCB2xP9fUnPnKspqbq\niscBsqa9Hus66uNoa2ZuyRqbinNB8V26dGnjf//4xz+OT33qU1cNLwBwOf/OFwASK+iZ73+aMWNG\nMeYAgE7DM18ASEx8ASAx8QWAxMQXABITXwBITHwBIDHxBYDExBcAEhNfAEhMfAEgMfEFgMTEFwAS\nE18ASEx8ASAx8QWAxMQXABITXwBITHwBIDHxBYDExBcAEhNfAEhMfAEgMfEFgMTEFwASE18ASEx8\nASAx8QWAxMQXABITXwBITHwBIDHxBYDExBcAEhNfAEhMfAEgMfEFgMTEFwASE18ASEx8ASAx8QWA\nxHKFfuOzzz4bf/nLX+LChQvxzW9+M0aOHFnMuQAgswqK79atW+Pdd9+N1atXR0NDQzz++OPiCwAt\nVFB877777ujXr19ERHz84x+PM2fOxMWLF6O8vLyowwFAFhUU3/Ly8qioqIiIiDVr1sTQoUObDG91\ndUXkctdemGtqqlp1HCBL2vOxrjXX3tVuU7ROa38ebfn5Ffyeb0TE73//+1izZk384he/aPK8hobT\nbbmbdlNff+Ijx2pqqq54HCBr2uuxrqM+jrZm5passak4FxzfP/7xj/HSSy/FK6+8ElVVnikCQEsV\nFN8TJ07Es88+G6+++mp84hOfKPZMAJBpBcX3t7/9bTQ0NMS3v/3txmOLFy+OPn36FG0wAMiqguI7\nbty4GDduXLFnAYBOwSdcAUBi4gsAiYkvACQmvgCQmPgCQGLiCwCJiS8AJCa+AJCY+AJAYuILAImJ\nLwAkJr4AkJj4AkBi4gsAiYkvACQmvgCQmPgCQGLiCwCJ5Uo9QCl94wd/KPUIRTFr1aF2vof/a9FZ\nP5xY285zAMW063+ntM912+Wq2eKZLwAkJr4AkJj4AkBi4gsAiYkvACQmvgCQmPgCQGLiCwCJiS8A\nJCa+AJCY+AJAYuILAImJLwAkJr4AkJj4AkBi4gsAiYkvACQmvgCQmPgCQGK5Qr/x+9//fuzYsSPK\nyspi3rx50a9fv2LOBQCZVVB8//SnP8W//vWvWL16dezZsyfmzZsXq1evLvZsAJBJBb3svGXLlhg+\nfHhERNxyyy1x7NixOHnyZFEHA4CsKii+hw8fjurq6savr7/++qivry/aUACQZQW/5/uf8vl8k7fX\n1FQV424u88Zzjxb9mh3XtfGzuK/UA1wyrtQDAJ1BW9pW0DPf2traOHz4cOPXhw4dipqamoKHAIDO\npKD43nfffbFhw4aIiPjHP/4RtbW10b1796IOBgBZVdDLzgMGDIjbbrstxo8fH2VlZbFgwYJizwUA\nmVWWb+4NWwCgqHzCFQAkJr4AkFhR/qlRR3b27Nn4yle+EtOmTYsxY8Y0Hh82bFj07t07ysvLIyJi\nyZIl0atXr1KN2Wrbtm2LWbNmxWc/+9mIiPjc5z4X3/ve9xpvf/vtt+P555+P8vLyGDp0aEyfPr1U\noxakufV19P27pK6uLl555ZXI5XIxc+bMuP/++xtv6+h7GNH0+rKwh6+99lrU1dU1fr1z587461//\n2vh1XV1d/PKXv4wuXbrE2LFj44knnijFmAVrbn233XZbDBgwoPHrV199tXE/O4JTp07FnDlz4tix\nY3H+/PmYPn16DBkypPH2Nu1fvpN7/vnn82PGjMm//vrrlx1/4IEH8idPnizRVG23devW/IwZM656\n+4MPPpg/cOBA/uLFi/kJEybk33333YTTtV1z6+vo+5fP5/NHjhzJjxw5Mn/ixIn8wYMH8/Pnz7/s\n9o6+h82tLwt7+J+2bduWX7hwYePXp06dyo8cOTJ//Pjx/JkzZ/KjR4/ONzQ0lHDCtvnv9eXz+fw9\n99xTommKY8WKFfklS5bk8/l8/v3338+PGjWq8ba27l+nftl5z549sXv37sv+tt0Z7N27N3r06BE3\n3HBDdOnSJb74xS/Gli1bSj0W/2XLli0xcODA6N69e9TW1saiRYsab8vCHja1vix68cUXY9q0aY1f\n79ixI+64446oqqqKbt26xYABA2L79u0lnLBt/nt9WVBdXR1Hjx6NiIjjx49f9smObd2/Th3fxYsX\nx9y5c696+4IFC2LChAmxZMmSZj/F61q0e/fumDp1akyYMCHeeuutxuP19fVx/fXXN37dUT8e9Grr\nu6Sj79++ffvi7NmzMXXq1Jg4ceJlcc3CHja1vks6+h5e8re//S1uuOGGyz6M6PDhwx1+Dy+50voi\nIs6dOxezZ8+O8ePHx/Lly0s0XeFGjx4dBw4ciBEjRsSkSZNizpw5jbe1df867Xu+69atizvvvDNu\nvPHGK94+c+bMGDJkSPTo0SOmT58eGzZsiC9/+cuJpyzcZz7zmXj66afjwQcfjL1798bkyZPjzTff\njK5du5Z6tKJobn0dff8uOXr0aLzwwgtx4MCBmDx5cmzatCnKyspKPVbRNLW+rOxhRMSaNWvi8ccf\nb/KcjvyXi6ut75lnnolHHnkkysrKYtKkSXHXXXfFHXfcUYIJC7N+/fro06dPLFu2LN55552YN29e\nrF279orntnb/Ou0z382bN8fGjRtj7Nix8dprr8VPfvKTePvttxtvf+yxx6Jnz56Ry+Vi6NChsWvX\nrhJO23q9evWKhx56KMrKyuKmm26KT37yk3Hw4MGI+OjHgx48eDBqa2tLNWpBmlpfRMffv4iInj17\nRv/+/SOXy8VNN90UlZWVceTIkYjIxh42tb6IbOzhJdu2bYv+/ftfduxKH9Pb0fbwkiutLyJiwoQJ\nUVlZGRUVFXHvvfd2uD3cvn17DB48OCIibr311jh06FBcvHgxItq+f502vkuXLo3XX389fvWrX8UT\nTzwR06ZNi0GDBkVExIkTJ+Kpp56Kc+fORUTEn//858b/q7ajqKuri2XLlkXEv1+i/OCDDxr/T9FP\nf/rTcfLkydi3b19cuHAhNm3aFPfdd838WoQWaWp9Wdi/iIjBgwfH1q1b48MPP4yGhoY4ffp043tO\nWdjDptaXlT2M+PdfjCorKz/yqtMXvvCF+Pvf/x7Hjx+PU6dOxfbt2+Ouu+4q0ZSFu9r6/vnPf8bs\n2bMjn8/HhQsXYvv27R1uD/v27Rs7duyIiIj9+/dHZWVl4/+t3db967QvO1/J2rVro6qqKkaMGBFD\nhw6NcePGxXXXXRef//znO9zLXcOGDYvvfOc7sXHjxjh//nwsXLgwfvOb3zSub+HChTF79uyIiHjo\noYfi5ptvLvHErdPc+jr6/kX8+9n9qFGjYuzYsRERMX/+/Fi3bl1m9rC59WVhDyM++v78z3/+87j7\n7rujf//+MXv27HjqqaeirKwspk+fHlVVxf8NcO2tqfX17t07vva1r0WXLl1i2LBh0a9fvxJO2nrj\nxo2LefPmxaRJk+LChQuxcOHCou2fj5cEgMQ67cvOAFAq4gsAiYkvACQmvgCQmPgCQGLiCwCJiS8A\nJCa+AJDY/wP18IOmMPyNQgAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [] + } + } + ] } ] } \ No newline at end of file From 81f582d60e67d7f0c69a2b369cf92bec65829250 Mon Sep 17 00:00:00 2001 From: Arnab Ghosh <43007068+ArnabG99@users.noreply.github.com> Date: Mon, 28 Jan 2019 20:17:37 +0530 Subject: [PATCH 09/12] Final Commit From 410d57e8cc0a62b90206f0c0432575523c131293 Mon Sep 17 00:00:00 2001 From: Arnab Ghosh <43007068+ArnabG99@users.noreply.github.com> Date: Mon, 28 Jan 2019 20:19:11 +0530 Subject: [PATCH 10/12] Final Commit --- Get_to_know_your_Data.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Get_to_know_your_Data.ipynb b/Get_to_know_your_Data.ipynb index c19a255..af47857 100644 --- a/Get_to_know_your_Data.ipynb +++ b/Get_to_know_your_Data.ipynb @@ -1392,7 +1392,7 @@ "\n", "virginica = iris_df[iris_df['species'] == species[2]]\n", "\n", - "virginica.head()" + "virginica.head() " ], "execution_count": 16, "outputs": [ From 831ceb44a4e4088b079da61778ce05c03e8a8e0e Mon Sep 17 00:00:00 2001 From: Arnab Ghosh <43007068+ArnabG99@users.noreply.github.com> Date: Mon, 28 Jan 2019 20:24:52 +0530 Subject: [PATCH 11/12] Final Commit --- Exercise.ipynb | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Exercise.ipynb b/Exercise.ipynb index ed9305e..2f887b0 100644 --- a/Exercise.ipynb +++ b/Exercise.ipynb @@ -6,7 +6,7 @@ "name": "Exercise.ipynb", "version": "0.3.2", "provenance": [], - "include_colab_link": false + "include_colab_link": true }, "kernelspec": { "name": "python3", @@ -20,7 +20,10 @@ "id": "view-in-github", "colab_type": "text" }, - + "source": [ + "[View in Colaboratory](https://colab.research.google.com/github/ArnabG99/Assignment-3/blob/ArnabG99/Exercise.ipynb)" + ] + }, { "metadata": { "id": "2LTtpUJEibjg", From 0fa5fd868f67fef903f854888f8dabce2d4f0873 Mon Sep 17 00:00:00 2001 From: Arnab Ghosh <43007068+ArnabG99@users.noreply.github.com> Date: Mon, 28 Jan 2019 20:30:01 +0530 Subject: [PATCH 12/12] Created using Colaboratory --- ArnabG99.ipynb | 1016 +----------------------------------------------- 1 file changed, 2 insertions(+), 1014 deletions(-) diff --git a/ArnabG99.ipynb b/ArnabG99.ipynb index c702352..51e5dfe 100644 --- a/ArnabG99.ipynb +++ b/ArnabG99.ipynb @@ -22,1024 +22,12 @@ "colab_type": "text" }, "source": [ - "[View in Colaboratory](https://colab.research.google.com/github/ArnabG99/Assignment-3/blob/ArnabG99/ArnabG99.ipynb)" + "\"Open" ] }, { "metadata": { - "id": "cGbE814_Xaf9", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "# Pandas\n", - "\n", - "Pandas is an open-source, BSD-licensed Python library providing high-performance, easy-to-use data structures and data analysis tools for the Python programming language. Python with Pandas is used in a wide range of fields including academic and commercial domains including finance, economics, Statistics, analytics, etc.In this tutorial, we will learn the various features of Python Pandas and how to use them in practice.\n", - "\n", - "\n", - "## Import pandas and numpy" - ] - }, - { - "metadata": { - "id": "irlVYeeAXPDL", - "colab_type": "code", - "colab": {} - }, - "cell_type": "code", - "source": [ - "import pandas as pd\n", - "import numpy as np" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "BI2J-zdMbGwE", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "### This is your playground feel free to explore other functions on pandas\n", - "\n", - "#### Create Series from numpy array, list and dict\n", - "\n", - "Don't know what a series is?\n", - "\n", - "[Series Doc](https://pandas.pydata.org/pandas-docs/version/0.22/generated/pandas.Series.html)" - ] - }, - { - "metadata": { - "id": "GeEct691YGE3", - "colab_type": "code", - "colab": {} - }, - "cell_type": "code", - "source": [ - "a_ascii = ord('A')\n", - "z_ascii = ord('Z')\n", - "alphabets = [chr(i) for i in range(a_ascii, z_ascii+1)]\n", - "\n", - "print(alphabets)\n", - "\n", - "numbers = np.arange(26)\n", - "\n", - "print(numbers)\n", - "\n", - "print(type(alphabets), type(numbers))\n", - "\n", - "alpha_numbers = dict(zip(alphabets, numbers))\n", - "\n", - "print(alpha_numbers)\n", - "\n", - "print(type(alpha_numbers))" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "6ouDfjWab_Mc", - "colab_type": "code", - "colab": {} - }, - "cell_type": "code", - "source": [ - "series1 = pd.Series(alphabets)\n", - "print(series1)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "At7nY7vVcBZ3", - "colab_type": "code", - "colab": {} - }, - "cell_type": "code", - "source": [ - "series2 = pd.Series(numbers)\n", - "print(series2)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "J5z-2CWAdH6N", - "colab_type": "code", - "colab": {} - }, - "cell_type": "code", - "source": [ - "series3 = pd.Series(alpha_numbers)\n", - "print(series3)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "fYzblGGudKjO", - "colab_type": "code", - "colab": {} - }, - "cell_type": "code", - "source": [ - "#replace head() with head(n) where n can be any number between [0-25] and observe the output in deach case \n", - "series3.head()" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "OwsJIf5feTtg", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "#### Create DataFrame from lists\n", - "\n", - "[DataFrame Doc](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html)" - ] - }, - { - "metadata": { - "id": "73UTZ07EdWki", - "colab_type": "code", - "colab": {} - }, - "cell_type": "code", - "source": [ - "data = {'alphabets': alphabets, 'values': numbers}\n", - "\n", - "df = pd.DataFrame(data)\n", - "\n", - "#Lets Change the column `values` to `alpha_numbers`\n", - "\n", - "#df.columns = ['alphabets', 'alpha_numbers']\n", - "\n", - "df" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "uaK_1EO9etGS", - "colab_type": "code", - "colab": {} - }, - "cell_type": "code", - "source": [ - "# transpose\n", - "\n", - "df.T\n", - "\n", - "# there are many more operations which we can perform look at the documentation with the subsequent exercises we will learn more" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "ZYonoaW8gEAJ", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "#### Extract Items from a series" - ] - }, - { - "metadata": { - "id": "tc1-KX_Bfe7U", - "colab_type": "code", - "colab": {} - }, - "cell_type": "code", - "source": [ - "ser = pd.Series(list('abcdefghijklmnopqrstuvwxyz'))\n", - "pos = [0, 4, 8, 14, 20]\n", - "\n", - "vowels = ser.take(pos)\n", - "\n", - "df = pd.DataFrame(vowels)#, columns=['vowels'])\n", - "\n", - "df.columns = ['vowels']\n", - "\n", - "#df.index = [0, 1, 2, 3, 4]\n", - "\n", - "df" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "cmDxwtDNjWpO", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "#### Change the first character of each word to upper case in each word of ser" - ] - }, - { - "metadata": { - "id": "5KagP9PpgV2F", - "colab_type": "code", - "colab": {} - }, - "cell_type": "code", - "source": [ - "ser = pd.Series(['we', 'are', 'learning', 'pandas'])\n", - "\n", - "ser.map(lambda x : x.title())\n", - "\n", - "titles = [i.title() for i in ser]\n", - "\n", - "titles" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "qn47ee-MkZN8", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "#### Reindexing" - ] - }, - { - "metadata": { - "id": "h5R0JL2NjuFS", - "colab_type": "code", - "colab": {} - }, - "cell_type": "code", - "source": [ - "my_index = [1, 2, 3, 4, 5]\n", - "\n", - "df1 = pd.DataFrame({'upper values': ['A', 'B', 'C', 'D', 'E'],\n", - " 'lower values': ['a', 'b', 'c', 'd', 'e']},\n", - " index = my_index)\n", - "\n", - "df1" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "G_Frvc3mk93k", - "colab_type": "code", - "colab": {} - }, - "cell_type": "code", - "source": [ - "new_index = [2, 5, 4, 3, 1]\n", - "\n", - "df1.reindex(index = new_index)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "J82LU53m_OU0", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "# Get to know your Data\n", - "\n", - "\n", - "#### Import necessary modules\n" - ] - }, - { - "metadata": { - "id": "ZyO1UXL8mtSj", - "colab_type": "code", - "colab": {} - }, - "cell_type": "code", - "source": [ - "import pandas as pd\n", - "import numpy as np" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "yXTzTowtnwGI", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "#### Loading CSV Data to a DataFrame" - ] - }, - { - "metadata": { - "id": "H1Bjlb5wm9f-", - "colab_type": "code", - "colab": {} - }, - "cell_type": "code", - "source": [ - "iris_df = pd.read_csv('https://raw.githubusercontent.com/uiuc-cse/data-fa14/gh-pages/data/iris.csv')\n" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "KE-k7b_Mn5iN", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "#### See the top 10 rows\n" - ] - }, - { - "metadata": { - "id": "HY2Ps7xMn4ao", - "colab_type": "code", - "colab": {} - }, - "cell_type": "code", - "source": [ - "iris_df.head()" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "ZQXekIodqOZu", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "#### Find number of rows and columns\n" - ] - }, - { - "metadata": { - "id": "6Y-A-lbFqR82", - "colab_type": "code", - "colab": {} - }, - "cell_type": "code", - "source": [ - "print(iris_df.shape)\n", - "\n", - "#first is row and second is column\n", - "#select row by simple indexing\n", - "\n", - "#print(iris_df.shape[0])\n", - "#print(iris_df.shape[1])" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "4ckCiGPhrC_t", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "#### Print all columns" - ] - }, - { - "metadata": { - "id": "S6jgMyRDrF2a", - "colab_type": "code", - "colab": {} - }, - "cell_type": "code", - "source": [ - "print(iris_df.columns)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "kVav5-ACtIqS", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "#### Check Index\n" - ] - }, - { - "metadata": { - "id": "iu3I9zIGtLDX", - "colab_type": "code", - "colab": {} - }, - "cell_type": "code", - "source": [ - "print(iris_df.index)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "psCc7PborOCQ", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "#### Right now the iris_data set has all the species grouped together let's shuffle it" - ] - }, - { - "metadata": { - "id": "Bxc8i6avrZPw", - "colab_type": "code", - "colab": {} - }, - "cell_type": "code", - "source": [ - "#generate a random permutaion on index\n", - "\n", - "print(iris_df.head())\n", - "\n", - "new_index = np.random.permutation(iris_df.index)\n", - "iris_df = iris_df.reindex(index = new_index)\n", - "\n", - "print(iris_df.head())" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "j32h8022sRT8", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "#### We can also apply an operation on whole column of iris_df" - ] - }, - { - "metadata": { - "id": "seYXHXsYsYJI", - "colab_type": "code", - "colab": {} - }, - "cell_type": "code", - "source": [ - "#original\n", - "\n", - "print(iris_df.head())\n", - "\n", - "iris_df['sepal_width'] *= 10\n", - "\n", - "#changed\n", - "\n", - "print(iris_df.head())\n", - "\n", - "#lets undo the operation\n", - "\n", - "iris_df['sepal_width'] /= 10\n", - "\n", - "print(iris_df.head())" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "R-Ca-LBLzjiF", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "#### Show all the rows where sepal_width > 3.3" - ] - }, - { - "metadata": { - "id": "WJ7W-F-d0AoZ", - "colab_type": "code", - "colab": {} - }, - "cell_type": "code", - "source": [ - "iris_df[iris_df['sepal_width']>3.3]" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "gH3DnhCq2Cbl", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "#### Club two filters together - Find all samples where sepal_width > 3.3 and species is versicolor" - ] - }, - { - "metadata": { - "id": "4U7ksr_R2H7M", - "colab_type": "code", - "colab": {} - }, - "cell_type": "code", - "source": [ - "iris_df[(iris_df['sepal_width']>3.3) & (iris_df['species'] == 'versicolor')] " - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "1lmnB3ot2u7I", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "#### Sorting a column by value" - ] - }, - { - "metadata": { - "id": "K7KIj6fv2zWP", - "colab_type": "code", - "colab": {} - }, - "cell_type": "code", - "source": [ - "iris_df.sort_values(by='sepal_width')#, ascending = False)\n", - "#pass ascending = False for descending order" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "9jg_Z4YCoMSV", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "#### List all the unique species" - ] - }, - { - "metadata": { - "id": "M6EN78ufoJY7", - "colab_type": "code", - "colab": {} - }, - "cell_type": "code", - "source": [ - "species = iris_df['species'].unique()\n", - "\n", - "print(species)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "wG1i5nxBodmB", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "#### Selecting a particular species using boolean mask (learnt in previous exercise)" - ] - }, - { - "metadata": { - "id": "gZvpbKBwoVUe", - "colab_type": "code", - "colab": {} - }, - "cell_type": "code", - "source": [ - "setosa = iris_df[iris_df['species'] == species[0]]\n", - "\n", - "setosa.head()" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "7tumfZ3DotPG", - "colab_type": "code", - "colab": {} - }, - "cell_type": "code", - "source": [ - "# do the same for other 2 species \n", - "versicolor = iris_df[iris_df['species'] == species[1]]\n", - "\n", - "versicolor.head()" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "cUYm5UqVpDPy", - "colab_type": "code", - "colab": {} - }, - "cell_type": "code", - "source": [ - "\n", - "\n", - "virginica = iris_df[iris_df['species'] == species[2]]\n", - "\n", - "virginica.head()" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "-y1wDc8SpdQs", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "#### Describe each created species to see the difference\n", - "\n" - ] - }, - { - "metadata": { - "id": "eHrn3ZVRpOk5", - "colab_type": "code", - "colab": {} - }, - "cell_type": "code", - "source": [ - "setosa.describe()" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "GwJFT2GlpwUv", - "colab_type": "code", - "colab": {} - }, - "cell_type": "code", - "source": [ - "versicolor.describe()" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "Ad4qhSZLpztf", - "colab_type": "code", - "colab": {} - }, - "cell_type": "code", - "source": [ - "virginica.describe()" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "Vdu0ulZWtr09", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "#### Let's plot and see the difference" - ] - }, - { - "metadata": { - "id": "PEVMzRvpttmD", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "##### import matplotlib.pyplot " - ] - }, - { - "metadata": { - "id": "rqDXuuAtt7C3", - "colab_type": "code", - "colab": {} - }, - "cell_type": "code", - "source": [ - "import matplotlib.pyplot as plt\n", - "\n", - "#hist creates a histogram there are many more plots(see the documentation) you can play with it.\n", - "\n", - "plt.hist(setosa['sepal_length'])\n", - "plt.hist(versicolor['sepal_length'])\n", - "plt.hist(virginica['sepal_length'])" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "2LTtpUJEibjg", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "# Pandas Exercise :\n", - "\n", - "\n", - "#### import necessary modules" - ] - }, - { - "metadata": { - "id": "c3_UBbMRhiKx", - "colab_type": "code", - "colab": {} - }, - "cell_type": "code", - "source": [ - "import numpy as np\n", - "import pandas as pd" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "tp-cTCyWi8mR", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "#### Load url = \"https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data\" to a dataframe named wine_df\n", - "\n", - "This is a wine dataset\n", - "\n" - ] - }, - { - "metadata": { - "id": "DMojQY3thrRi", - "colab_type": "code", - "colab": {} - }, - "cell_type": "code", - "source": [ - "" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "BF9MMjoZjSlg", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "#### print first five rows" - ] - }, - { - "metadata": { - "id": "1vSMQdnHjYNU", - "colab_type": "code", - "colab": {} - }, - "cell_type": "code", - "source": [ - "" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "Tet6P2DvjY3T", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "#### assign wine_df to a different variable wine_df_copy and then delete all odd rows of wine_df_copy\n", - "\n", - "[Hint](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.drop.html)" - ] - }, - { - "metadata": { - "id": "CMj3qSdJjx0u", - "colab_type": "code", - "colab": {} - }, - "cell_type": "code", - "source": [ - "" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "o6Cs6T1Rjz71", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "#### Assign the columns as below:\n", - "\n", - "The attributes are (dontated by Riccardo Leardi, riclea '@' anchem.unige.it): \n", - "1) Alcohol \n", - "2) Malic acid \n", - "3) Ash \n", - "4) Alcalinity of ash \n", - "5) Magnesium \n", - "6) Total phenols \n", - "7) Flavanoids \n", - "8) Nonflavanoid phenols \n", - "9) Proanthocyanins \n", - "10)Color intensity \n", - "11)Hue \n", - "12)OD280/OD315 of diluted wines \n", - "13)Proline " - ] - }, - { - "metadata": { - "id": "my8HB4V4j779", - "colab_type": "code", - "colab": {} - }, - "cell_type": "code", - "source": [ - "" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "Zqi7hwWpkNbH", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "#### Set the values of the first 3 rows from alcohol as NaN\n", - "\n", - "Hint- Use iloc to select 3 rows of wine_df" - ] - }, - { - "metadata": { - "id": "buyT4vX4kPMl", - "colab_type": "code", - "colab": {} - }, - "cell_type": "code", - "source": [ - "" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "RQMNI2UHkP3o", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "#### Create an array of 10 random numbers uptill 10 and assign it to a variable named `random`" - ] - }, - { - "metadata": { - "id": "xunmCjaEmDwZ", - "colab_type": "code", - "colab": {} - }, - "cell_type": "code", - "source": [ - "" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "hELUakyXmFSu", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "#### Use random numbers you generated as an index and assign NaN value to each of cell of the column alcohol" - ] - }, - { - "metadata": { - "id": "zMgaNnNHmP01", - "colab_type": "code", - "colab": {} - }, - "cell_type": "code", - "source": [ - "" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "PHyK_vRsmRwV", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "#### How many missing values do we have? \n", - "\n", - "Hint: you can use isnull() and sum()" - ] - }, - { - "metadata": { - "id": "EnOYhmEqmfKp", - "colab_type": "code", - "colab": {} - }, - "cell_type": "code", - "source": [ - "" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "-Fd4WBklmf1_", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "#### Delete the rows that contain missing values " - ] - }, - { - "metadata": { - "id": "As7IC6Ktms8-", - "colab_type": "code", - "colab": {} - }, - "cell_type": "code", - "source": [ - "" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "DlpG8drhmz7W", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "### BONUS: Play with the data set below" - ] - }, - { - "metadata": { - "id": "mD40T0Cnm5SA", + "id": "dQmdW6EN-S2E", "colab_type": "code", "colab": {} },