From d615d11243e928840530656692b2b01dd1cc288c Mon Sep 17 00:00:00 2001 From: Foton Date: Tue, 4 Jun 2024 13:44:59 +0300 Subject: [PATCH 1/3] Release Data Science Libreries Lesson4 --- Lesson4/Task2.ipynb | 6815 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 6815 insertions(+) create mode 100644 Lesson4/Task2.ipynb diff --git a/Lesson4/Task2.ipynb b/Lesson4/Task2.ipynb new file mode 100644 index 0000000..ac6e6bf --- /dev/null +++ b/Lesson4/Task2.ipynb @@ -0,0 +1,6815 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "b27e41e4", + "metadata": {}, + "source": [ + "## Тема “Визуализация данных в Matplotlib”" + ] + }, + { + "cell_type": "markdown", + "id": "f324024b", + "metadata": {}, + "source": [ + "### Задание 1\n", + "Загрузите модуль pyplot библиотеки matplotlib с псевдонимом plt, а также библиотеку numpy с\n", + "псевдонимом np.\n", + "Примените магическую функцию %matplotlib inline для отображения графиков в Jupyter Notebook и\n", + "настройки конфигурации ноутбука со значением 'svg' для более четкого отображения графиков.\n", + "Создайте список под названием x с числами 1, 2, 3, 4, 5, 6, 7 и список y с числами 3.5, 3.8, 4.2, 4.5, 5,\n", + "5.5, 7.\n", + "С помощью функции plot постройте график, соединяющий линиями точки с горизонтальными\n", + "координатами из списка x и вертикальными - из списка y.\n", + "Затем в следующей ячейке постройте диаграмму рассеяния (другие названия - диаграмма разброса,\n", + "scatter plot)." + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "99e11a55", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from random import seed\n", + "from matplotlib import pyplot as plt\n", + "\n", + "plt.style.use('fivethirtyeight')\n", + "\n", + "%matplotlib inline\n", + "%config InlineBackend.figure_format = 'svg'" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "bd0d7835", + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " 2024-06-04T11:17:30.715908\n", + " image/svg+xml\n", + " \n", + " \n", + " Matplotlib v3.9.0, https://matplotlib.org/\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n" + ], + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "x = [1, 2, 3, 4, 5, 6, 7]\n", + "y = [3.5, 3.8, 4.2, 4.5, 5, 5.5, 7]\n", + "\n", + "plt.plot(x, y)\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "91fc998c", + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " 2024-06-03T11:42:36.642755\n", + " image/svg+xml\n", + " \n", + " \n", + " Matplotlib v3.9.0, https://matplotlib.org/\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n" + ], + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.scatter(x, y)\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "fbf3b995", + "metadata": {}, + "source": [ + "### Задание 2\n", + "С помощью функции linspace из библиотеки Numpy создайте массив t из 51 числа от 0 до 10\n", + "включительно.\n", + "© geekbrains.ru\n", + "Создайте массив Numpy под названием f, содержащий косинусы элементов массива t.\n", + "Постройте линейную диаграмму, используя массив t для координат по горизонтали,а массив f - для\n", + "координат по вертикали. Линия графика должна быть зеленого цвета.\n", + "Выведите название диаграммы - 'График f(t)'. Также добавьте названия для горизонтальной оси -\n", + "'Значения t' и для вертикальной - 'Значения f'.\n", + "Ограничьте график по оси x значениями 0.5 и 9.5, а по оси y - значениями -2.5 и 2.5." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "f19e1fe4", + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " 2024-06-03T11:42:36.735077\n", + " image/svg+xml\n", + " \n", + " \n", + " Matplotlib v3.9.0, https://matplotlib.org/\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n" + ], + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "t = np.linspace(0, 10, 51)\n", + "\n", + "f = np.cos(t)\n", + "\n", + "plt.plot(t, f, color=\"green\")\n", + "\n", + "plt.title(\"График f(t)\")\n", + "plt.xlabel(\"Значения t\")\n", + "plt.ylabel(\"Значения f\")\n", + "\n", + "plt.axis([0.5, 9.5, -2.5, 2.5])\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "73611d04", + "metadata": {}, + "source": [ + "### Задание 3\n", + "С помощью функции linspace библиотеки Numpy создайте массив x из 51 числа от -3 до 3\n", + "включительно.\n", + "Создайте массивы y1, y2, y3, y4 по следующим формулам:\n", + "y1 = x**2\n", + "y2 = 2 * x + 0.5\n", + "y3 = -3 * x - 1.5\n", + "y4 = sin(x)\n", + "Используя функцию subplots модуля matplotlib.pyplot, создайте объект matplotlib.figure.Figure с\n", + "названием fig и массив объектов Axes под названием ax,причем так, чтобы у вас было 4 отдельных\n", + "графика в сетке, состоящей из двух строк и двух столбцов. В каждом графике массив x используется\n", + "для координат по горизонтали.В левом верхнем графике для координат по вертикали используйте\n", + "y1,в правом верхнем - y2, в левом нижнем - y3, в правом нижнем - y4.Дайте название графикам:\n", + "'График y1', 'График y2' и т.д.\n", + "Для графика в левом верхнем углу установите границы по оси x от -5 до 5.\n", + "Установите размеры фигуры 8 дюймов по горизонтали и 6 дюймов по вертикали.\n", + "Вертикальные и горизонтальные зазоры между графиками должны составлять 0.3" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "ea585c38", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'График y4')" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " 2024-06-03T11:42:37.042700\n", + " image/svg+xml\n", + " \n", + " \n", + " Matplotlib v3.9.0, https://matplotlib.org/\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n" + ], + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "x = np.linspace(-3, 3, 51)\n", + "\n", + "y1 = x**2\n", + "y2 = 2*x + 0.5\n", + "y3 = -3*x - 1.5\n", + "y4 = np.sin(x)\n", + "\n", + "fig, ax = plt.subplots(nrows=2, ncols=2)\n", + "\n", + "fig.set_size_inches(8, 6)\n", + "fig.subplots_adjust(wspace=0.3, hspace=0.3)\n", + "\n", + "ax1, ax2, ax3, ax4 = ax.flatten()\n", + "\n", + "ax1.plot(x, y1)\n", + "ax1.set_title(\"График y1\")\n", + "ax1.set_xlim([-5, 5])\n", + "ax2.plot(x, y2)\n", + "ax2.set_title(\"График y2\")\n", + "ax3.plot(x, y3)\n", + "ax3.set_title(\"График y3\")\n", + "ax4.plot(x, y4)\n", + "ax4.set_title(\"График y4\")" + ] + }, + { + "cell_type": "markdown", + "id": "c77b9177", + "metadata": {}, + "source": [ + "### Задание 4\n", + "В этом задании мы будем работать с датасетом, в котором приведены данные по мошенничеству с\n", + "кредитными данными: Credit Card Fraud Detection (информация об авторах: Andrea Dal Pozzolo, Olivier\n", + "Caelen, Reid A. Johnson and Gianluca Bontempi. Calibrating Probability with Undersampling for Unbalanced\n", + "Classification. In Symposium on Computational Intelligence and Data Mining (CIDM), IEEE, 2015).\n", + "Ознакомьтесь с описанием и скачайте датасет creditcard.csv с сайта Kaggle.com по ссылке:\n", + "Credit Card Fraud Detection\n", + "Данный датасет является примером несбалансированных данных, так как мошеннические операции с\n", + "картами встречаются реже обычных.\n", + "Импортируйте библиотеку Pandas, а также используйте для графиков стиль “fivethirtyeight”.\n", + "© geekbrains.ru 1\n", + "Посчитайте с помощью метода value_counts количество наблюдений для каждого значения целевой\n", + "переменной Class и примените к полученным данным метод plot, чтобы построить столбчатую\n", + "диаграмму. Затем постройте такую же диаграмму, используя логарифмический масштаб.\n", + "На следующем графике постройте две гистограммы по значениям признака V1 - одну для\n", + "мошеннических транзакций (Class равен 1) и другую - для обычных (Class равен 0). Подберите\n", + "значение аргумента density так, чтобы по вертикали графика было расположено не число\n", + "наблюдений, а плотность распределения. Число бинов должно равняться 20 для обеих гистограмм, а\n", + "коэффициент alpha сделайте равным 0.5, чтобы гистограммы были полупрозрачными и не\n", + "загораживали друг друга. Создайте легенду с двумя значениями: “Class 0” и “Class 1”. Гистограмма\n", + "обычных транзакций должна быть серого цвета, а мошеннических - красного. Горизонтальной оси\n", + "дайте название “V1”." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "65f2dc4a-f788-4f45-b8d7-7bd6de270815", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dataset URL: https://www.kaggle.com/datasets/mlg-ulb/creditcardfraud\n", + "License(s): DbCL-1.0\n", + "Downloading creditcardfraud.zip to W:\\Projects\\GB\\Python\\MLearning\\DSLibraries\\Lesson4\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + " 0%| | 0.00/66.0M [00:00\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TimeV1V2V3V4V5V6V7V8V9...V21V22V23V24V25V26V27V28AmountClass
00.0-1.359807-0.0727812.5363471.378155-0.3383210.4623880.2395990.0986980.363787...-0.0183070.277838-0.1104740.0669280.128539-0.1891150.133558-0.021053149.620
10.01.1918570.2661510.1664800.4481540.060018-0.082361-0.0788030.085102-0.255425...-0.225775-0.6386720.101288-0.3398460.1671700.125895-0.0089830.0147242.690
21.0-1.358354-1.3401631.7732090.379780-0.5031981.8004990.7914610.247676-1.514654...0.2479980.7716790.909412-0.689281-0.327642-0.139097-0.055353-0.059752378.660
31.0-0.966272-0.1852261.792993-0.863291-0.0103091.2472030.2376090.377436-1.387024...-0.1083000.005274-0.190321-1.1755750.647376-0.2219290.0627230.061458123.500
42.0-1.1582330.8777371.5487180.403034-0.4071930.0959210.592941-0.2705330.817739...-0.0094310.798278-0.1374580.141267-0.2060100.5022920.2194220.21515369.990
\n", + "

5 rows × 31 columns

\n", + "" + ], + "text/plain": [ + " Time V1 V2 V3 V4 V5 V6 V7 \\\n", + "0 0.0 -1.359807 -0.072781 2.536347 1.378155 -0.338321 0.462388 0.239599 \n", + "1 0.0 1.191857 0.266151 0.166480 0.448154 0.060018 -0.082361 -0.078803 \n", + "2 1.0 -1.358354 -1.340163 1.773209 0.379780 -0.503198 1.800499 0.791461 \n", + "3 1.0 -0.966272 -0.185226 1.792993 -0.863291 -0.010309 1.247203 0.237609 \n", + "4 2.0 -1.158233 0.877737 1.548718 0.403034 -0.407193 0.095921 0.592941 \n", + "\n", + " V8 V9 ... V21 V22 V23 V24 V25 \\\n", + "0 0.098698 0.363787 ... -0.018307 0.277838 -0.110474 0.066928 0.128539 \n", + "1 0.085102 -0.255425 ... -0.225775 -0.638672 0.101288 -0.339846 0.167170 \n", + "2 0.247676 -1.514654 ... 0.247998 0.771679 0.909412 -0.689281 -0.327642 \n", + "3 0.377436 -1.387024 ... -0.108300 0.005274 -0.190321 -1.175575 0.647376 \n", + "4 -0.270533 0.817739 ... -0.009431 0.798278 -0.137458 0.141267 -0.206010 \n", + "\n", + " V26 V27 V28 Amount Class \n", + "0 -0.189115 0.133558 -0.021053 149.62 0 \n", + "1 0.125895 -0.008983 0.014724 2.69 0 \n", + "2 -0.139097 -0.055353 -0.059752 378.66 0 \n", + "3 -0.221929 0.062723 0.061458 123.50 0 \n", + "4 0.502292 0.219422 0.215153 69.99 0 \n", + "\n", + "[5 rows x 31 columns]" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "d4fae789", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TimeV1V2V3V4V5V6V7V8V9...V21V22V23V24V25V26V27V28AmountClass
count284807.0000002.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+05...2.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+05284807.000000284807.000000
mean94813.8595751.168375e-153.416908e-16-1.379537e-152.074095e-159.604066e-161.487313e-15-5.556467e-161.213481e-16-2.406331e-15...1.654067e-16-3.568593e-162.578648e-164.473266e-155.340915e-161.683437e-15-3.660091e-16-1.227390e-1688.3496190.001727
std47488.1459551.958696e+001.651309e+001.516255e+001.415869e+001.380247e+001.332271e+001.237094e+001.194353e+001.098632e+00...7.345240e-017.257016e-016.244603e-016.056471e-015.212781e-014.822270e-014.036325e-013.300833e-01250.1201090.041527
min0.000000-5.640751e+01-7.271573e+01-4.832559e+01-5.683171e+00-1.137433e+02-2.616051e+01-4.355724e+01-7.321672e+01-1.343407e+01...-3.483038e+01-1.093314e+01-4.480774e+01-2.836627e+00-1.029540e+01-2.604551e+00-2.256568e+01-1.543008e+010.0000000.000000
25%54201.500000-9.203734e-01-5.985499e-01-8.903648e-01-8.486401e-01-6.915971e-01-7.682956e-01-5.540759e-01-2.086297e-01-6.430976e-01...-2.283949e-01-5.423504e-01-1.618463e-01-3.545861e-01-3.171451e-01-3.269839e-01-7.083953e-02-5.295979e-025.6000000.000000
50%84692.0000001.810880e-026.548556e-021.798463e-01-1.984653e-02-5.433583e-02-2.741871e-014.010308e-022.235804e-02-5.142873e-02...-2.945017e-026.781943e-03-1.119293e-024.097606e-021.659350e-02-5.213911e-021.342146e-031.124383e-0222.0000000.000000
75%139320.5000001.315642e+008.037239e-011.027196e+007.433413e-016.119264e-013.985649e-015.704361e-013.273459e-015.971390e-01...1.863772e-015.285536e-011.476421e-014.395266e-013.507156e-012.409522e-019.104512e-027.827995e-0277.1650000.000000
max172792.0000002.454930e+002.205773e+019.382558e+001.687534e+013.480167e+017.330163e+011.205895e+022.000721e+011.559499e+01...2.720284e+011.050309e+012.252841e+014.584549e+007.519589e+003.517346e+003.161220e+013.384781e+0125691.1600001.000000
\n", + "

8 rows × 31 columns

\n", + "
" + ], + "text/plain": [ + " Time V1 V2 V3 V4 \\\n", + "count 284807.000000 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 \n", + "mean 94813.859575 1.168375e-15 3.416908e-16 -1.379537e-15 2.074095e-15 \n", + "std 47488.145955 1.958696e+00 1.651309e+00 1.516255e+00 1.415869e+00 \n", + "min 0.000000 -5.640751e+01 -7.271573e+01 -4.832559e+01 -5.683171e+00 \n", + "25% 54201.500000 -9.203734e-01 -5.985499e-01 -8.903648e-01 -8.486401e-01 \n", + "50% 84692.000000 1.810880e-02 6.548556e-02 1.798463e-01 -1.984653e-02 \n", + "75% 139320.500000 1.315642e+00 8.037239e-01 1.027196e+00 7.433413e-01 \n", + "max 172792.000000 2.454930e+00 2.205773e+01 9.382558e+00 1.687534e+01 \n", + "\n", + " V5 V6 V7 V8 V9 \\\n", + "count 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 \n", + "mean 9.604066e-16 1.487313e-15 -5.556467e-16 1.213481e-16 -2.406331e-15 \n", + "std 1.380247e+00 1.332271e+00 1.237094e+00 1.194353e+00 1.098632e+00 \n", + "min -1.137433e+02 -2.616051e+01 -4.355724e+01 -7.321672e+01 -1.343407e+01 \n", + "25% -6.915971e-01 -7.682956e-01 -5.540759e-01 -2.086297e-01 -6.430976e-01 \n", + "50% -5.433583e-02 -2.741871e-01 4.010308e-02 2.235804e-02 -5.142873e-02 \n", + "75% 6.119264e-01 3.985649e-01 5.704361e-01 3.273459e-01 5.971390e-01 \n", + "max 3.480167e+01 7.330163e+01 1.205895e+02 2.000721e+01 1.559499e+01 \n", + "\n", + " ... V21 V22 V23 V24 \\\n", + "count ... 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 \n", + "mean ... 1.654067e-16 -3.568593e-16 2.578648e-16 4.473266e-15 \n", + "std ... 7.345240e-01 7.257016e-01 6.244603e-01 6.056471e-01 \n", + "min ... -3.483038e+01 -1.093314e+01 -4.480774e+01 -2.836627e+00 \n", + "25% ... -2.283949e-01 -5.423504e-01 -1.618463e-01 -3.545861e-01 \n", + "50% ... -2.945017e-02 6.781943e-03 -1.119293e-02 4.097606e-02 \n", + "75% ... 1.863772e-01 5.285536e-01 1.476421e-01 4.395266e-01 \n", + "max ... 2.720284e+01 1.050309e+01 2.252841e+01 4.584549e+00 \n", + "\n", + " V25 V26 V27 V28 Amount \\\n", + "count 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 284807.000000 \n", + "mean 5.340915e-16 1.683437e-15 -3.660091e-16 -1.227390e-16 88.349619 \n", + "std 5.212781e-01 4.822270e-01 4.036325e-01 3.300833e-01 250.120109 \n", + "min -1.029540e+01 -2.604551e+00 -2.256568e+01 -1.543008e+01 0.000000 \n", + "25% -3.171451e-01 -3.269839e-01 -7.083953e-02 -5.295979e-02 5.600000 \n", + "50% 1.659350e-02 -5.213911e-02 1.342146e-03 1.124383e-02 22.000000 \n", + "75% 3.507156e-01 2.409522e-01 9.104512e-02 7.827995e-02 77.165000 \n", + "max 7.519589e+00 3.517346e+00 3.161220e+01 3.384781e+01 25691.160000 \n", + "\n", + " Class \n", + "count 284807.000000 \n", + "mean 0.001727 \n", + "std 0.041527 \n", + "min 0.000000 \n", + "25% 0.000000 \n", + "50% 0.000000 \n", + "75% 0.000000 \n", + "max 1.000000 \n", + "\n", + "[8 rows x 31 columns]" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "ab04cf33-e36b-4991-917b-e91a2dfa7fcd", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 284807 entries, 0 to 284806\n", + "Data columns (total 31 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Time 284807 non-null float64\n", + " 1 V1 284807 non-null float64\n", + " 2 V2 284807 non-null float64\n", + " 3 V3 284807 non-null float64\n", + " 4 V4 284807 non-null float64\n", + " 5 V5 284807 non-null float64\n", + " 6 V6 284807 non-null float64\n", + " 7 V7 284807 non-null float64\n", + " 8 V8 284807 non-null float64\n", + " 9 V9 284807 non-null float64\n", + " 10 V10 284807 non-null float64\n", + " 11 V11 284807 non-null float64\n", + " 12 V12 284807 non-null float64\n", + " 13 V13 284807 non-null float64\n", + " 14 V14 284807 non-null float64\n", + " 15 V15 284807 non-null float64\n", + " 16 V16 284807 non-null float64\n", + " 17 V17 284807 non-null float64\n", + " 18 V18 284807 non-null float64\n", + " 19 V19 284807 non-null float64\n", + " 20 V20 284807 non-null float64\n", + " 21 V21 284807 non-null float64\n", + " 22 V22 284807 non-null float64\n", + " 23 V23 284807 non-null float64\n", + " 24 V24 284807 non-null float64\n", + " 25 V25 284807 non-null float64\n", + " 26 V26 284807 non-null float64\n", + " 27 V27 284807 non-null float64\n", + " 28 V28 284807 non-null float64\n", + " 29 Amount 284807 non-null float64\n", + " 30 Class 284807 non-null int64 \n", + "dtypes: float64(30), int64(1)\n", + "memory usage: 67.4 MB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "6c31fc17", + "metadata": {}, + "outputs": [], + "source": [ + "class_counts = df.Class.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "29b823c1", + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " 2024-06-03T12:53:10.650385\n", + " image/svg+xml\n", + " \n", + " \n", + " Matplotlib v3.9.0, https://matplotlib.org/\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n" + ], + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "class_counts.plot(kind=\"bar\")\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "918cf628", + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " 2024-06-03T12:53:11.836204\n", + " image/svg+xml\n", + " \n", + " \n", + " Matplotlib v3.9.0, https://matplotlib.org/\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n" + ], + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "class_counts.plot(kind=\"bar\", logy=True)\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "b6af24e9", + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " 2024-06-03T12:53:14.097839\n", + " image/svg+xml\n", + " \n", + " \n", + " Matplotlib v3.9.0, https://matplotlib.org/\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n" + ], + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig, ax = plt.subplots()\n", + "\n", + "V1_0 = df[df[\"Class\"]==0].V1\n", + "\n", + "ax.set_xlabel(\"V1\")\n", + "ax.axis([-30, 5, 0, 0.2])\n", + "\n", + "df[df[\"Class\"]==0].V1.hist(ax=ax, label = \"Class 0\", bins=20, alpha=0.5, color='gray', density=True)\n", + "df[df[\"Class\"]==1].V1.hist(ax=ax, label = \"Class 1\", bins=20, alpha=0.5, color='red', density=True)\n", + "\n", + "legend = fig.legend(loc=\"upper right\", frameon=False)" + ] + }, + { + "cell_type": "markdown", + "id": "af2c4346", + "metadata": {}, + "source": [ + "## Задание на повторение материала\n", + "### 1. Создать одномерный массив Numpy под названием a из 12 последовательных целых чисел чисел от 12 до 24 невключительно" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "id": "557a8256-9c01-4f25-a014-0d228c69024c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[12 13 14 15 16 17 18 19 20 21 22 23]\n" + ] + } + ], + "source": [ + "a = np.arange(12, 24, dtype=int)\n", + " \n", + "print(a)" + ] + }, + { + "cell_type": "markdown", + "id": "1c77f2b4-a4a1-4758-ab20-9c95b50a7228", + "metadata": {}, + "source": [ + "### 2. Создать 5 двумерных массивов разной формы из массива a. Не использовать в аргументах метода reshape число -1." + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "c532a55c-a2e4-41b8-88fc-57202937c2df", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[12],\n", + " [13],\n", + " [14],\n", + " [15],\n", + " [16],\n", + " [17],\n", + " [18],\n", + " [19],\n", + " [20],\n", + " [21],\n", + " [22],\n", + " [23]])" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.reshape(a, (12, 1))" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "fdd1014d-c6f5-48bc-b047-81038d8fd827", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[12, 13, 14, 15, 16, 17],\n", + " [18, 19, 20, 21, 22, 23]])" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.reshape(a, (2, 6))" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "0ad59875-d3b2-4e5e-acfe-574669e8a068", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[12, 13],\n", + " [14, 15],\n", + " [16, 17],\n", + " [18, 19],\n", + " [20, 21],\n", + " [22, 23]])" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.reshape(a, (6, 2))" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "fc9a8aaa-d246-4876-a818-3bbb268a4a06", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[12, 13, 14, 15],\n", + " [16, 17, 18, 19],\n", + " [20, 21, 22, 23]])" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.reshape(a, (3, 4))" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "f58c5c92-4e45-4b11-8ee9-03930d40213e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[12, 13, 14],\n", + " [15, 16, 17],\n", + " [18, 19, 20],\n", + " [21, 22, 23]])" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.reshape(a, (4, 3))" + ] + }, + { + "cell_type": "markdown", + "id": "dda06888-4535-4b11-a8ac-8366cb665662", + "metadata": {}, + "source": [ + "### 3. Создать 5 двумерных массивов разной формы из массива a. Использовать в аргументах метода reshape число -1 (в трех примерах - для обозначения числа столбцов, в двух - для строк)." + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "5d2eceed-d468-43e2-a896-03db80347de7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[12, 13, 14, 15, 16, 17],\n", + " [18, 19, 20, 21, 22, 23]])" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.reshape(a, (2, -1))" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "5d4e33c6-9991-48f1-92dc-fe039f1f1403", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[12, 13, 14, 15],\n", + " [16, 17, 18, 19],\n", + " [20, 21, 22, 23]])" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.reshape(a, (3, -1))" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "7c5ee813-ab4a-44eb-af9b-0c6f086ff275", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[12, 13, 14],\n", + " [15, 16, 17],\n", + " [18, 19, 20],\n", + " [21, 22, 23]])" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.reshape(a, (4, -1))" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "19305afe-f66e-439d-86fb-4f1a8e9289fd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[12, 13],\n", + " [14, 15],\n", + " [16, 17],\n", + " [18, 19],\n", + " [20, 21],\n", + " [22, 23]])" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.reshape(a, (-1, 2))" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "021fef61-9517-4ed0-9a49-f0dd68416dff", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[12, 13, 14, 15, 16, 17],\n", + " [18, 19, 20, 21, 22, 23]])" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.reshape(a, (-1, 6))" + ] + }, + { + "cell_type": "markdown", + "id": "aae22acf-9657-4492-b4d8-d6707dd79d6e", + "metadata": {}, + "source": [ + "### 4. Можно ли массив Numpy, состоящий из одного столбца и 12 строк, назвать одномерным?" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "id": "a039beed-b1fb-428a-995b-f4788b8ba5c2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "shape=(12, 1)=>размерность=2, следоватьельно одномерным может быть только вектор\n" + ] + } + ], + "source": [ + "b = np.resize(a, (12, 1))\n", + "\n", + "print(f\"shape={b.shape}=>размерность={b.ndim}, следоватьельно одномерным может быть только вектор\")" + ] + }, + { + "cell_type": "markdown", + "id": "65303edd-4937-4b7e-85f0-82412f9e320b", + "metadata": {}, + "source": [ + "### 5. Создать массив из 3 строк и 4 столбцов, состоящий из случайных чисел с плавающей запятой из нормального распределения со средним, равным 0 и среднеквадратичным отклонением, равным 1.0. Получить из этого массива одномерный массив с таким же атрибутом size, как и исходный массив." + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "id": "6f9c7bba-d9b2-4e26-8630-8322e6d1f329", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 5.30087681e-01 -1.00730270e+00 3.49023257e-01 1.17610515e-01]\n", + " [-3.73873348e-05 -4.65003983e-01 1.17452549e+00 3.80377532e-02]\n", + " [-4.71755245e-02 3.99082256e-01 -1.15223721e-01 -2.65055731e+00]]\n", + "[[ 5.30087681e-01 -1.00730270e+00 3.49023257e-01 1.17610515e-01\n", + " -3.73873348e-05 -4.65003983e-01 1.17452549e+00 3.80377532e-02\n", + " -4.71755245e-02 3.99082256e-01 -1.15223721e-01 -2.65055731e+00]]\n", + "a.size = 12 <=> 12 = b.size\n" + ] + } + ], + "source": [ + "a = np.random.randn(3, 4)\n", + "b = a.reshape(1, 12)\n", + "print(a)\n", + "print(b)\n", + "print(f\"a.size = {a.size} <=> {b.size} = b.size\")" + ] + }, + { + "cell_type": "markdown", + "id": "6b177aff-4007-4af0-a927-82cb03183362", + "metadata": {}, + "source": [ + "### 6. Создать массив a, состоящий из целых чисел, убывающих от 20 до 0 невключительно с интервалом 2." + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "id": "a78dbbed-cbf3-4536-8e0a-f19586aa9218", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[20 18 16 14 12 10 8 6 4 2]\n" + ] + } + ], + "source": [ + "a = np.arange(20, 0, -2, dtype=int)\n", + "\n", + "print(a)" + ] + }, + { + "cell_type": "markdown", + "id": "3edb8cc8-ca5a-414f-9928-0c3be3fae6a1", + "metadata": {}, + "source": [ + "### 7. Создать массив b, состоящий из 1 строки и 10 столбцов: целых чисел, убывающих от 20 до 1 невключительно с интервалом 2. В чем разница между массивами a и b?" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "id": "de8d49ea-a843-46ff-ab8f-f2b967cebeb2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[20 18 16 14 12 10 8 6 4 2] = [20 18 16 14 12 10 8 6 4 2] => разницы нет\n" + ] + } + ], + "source": [ + "b = np.arange(20, 1, -2, dtype=int)\n", + "\n", + "print(f\"{a} = {b} => разницы нет\")" + ] + }, + { + "cell_type": "markdown", + "id": "48add411-602f-46b7-9cb5-98388a6d742c", + "metadata": {}, + "source": [ + "### 8. Вертикально соединить массивы a и b. a - двумерный массив из нулей, число строк которого больше 1 и на 1 меньше, чем число строк двумерного массива b, состоящего из единиц. Итоговый массив v должен иметь атрибут size, равный 10." + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "id": "e64df321-366c-4400-a406-b775e19a6870", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[0. 0.]\n", + " [0. 0.]\n", + " [1. 1.]\n", + " [1. 1.]\n", + " [1. 1.]] \n", + " size=10\n" + ] + } + ], + "source": [ + "a = np.zeros((2, 2))\n", + "b = np.ones((3,2))\n", + "c = np.vstack((a, b))\n", + "print(f\"{c} \\n size={c.size}\")" + ] + }, + { + "cell_type": "markdown", + "id": "85da760f-5076-4e9a-acb4-4e756ac72407", + "metadata": {}, + "source": [ + "### 9. Создать одномерный массив а, состоящий из последовательности целых чисел от 0 до 12. Поменять форму этого массива, чтобы получилась матрица A (двумерный массив Numpy), состоящая из 4 строк и 3 столбцов. Получить матрицу At путем транспонирования атрицы A. Получить матрицу B, умножив матрицу A на матрицу At с помощью матричного умножения. Какой размер имеет матрица B? Получится ли вычислить обратную матрицу для матрицы B и почему?" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "bc97b82a-eaec-4f6c-b9c8-4159dc45c44f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "матрица B =\n", + "[[ 5 14 23 32]\n", + " [ 14 50 86 122]\n", + " [ 23 86 149 212]\n", + " [ 32 122 212 302]]\n", + "размер = (4, 4)\n", + "определитель = 0 => обратная матрица не существует\n" + ] + } + ], + "source": [ + "a = np.arange(0, 12, dtype=int)\n", + "A = np.reshape(a, (4, -1))\n", + "At = A.T\n", + "B = np.dot(A, At)\n", + "print(f\"матрица B =\\n{B}\\nразмер = {B.shape}\\nопределитель = {np.linalg.det(B):1.0f} => обратная матрица не существует\")" + ] + }, + { + "cell_type": "markdown", + "id": "957e860b-9d20-46cf-91b0-2662c2faeebb", + "metadata": {}, + "source": [ + "### 10. Инициализируйте генератор случайных числе с помощью объекта seed, равного 42." + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "97c0fa11-6530-461b-bb89-65cb6d38b83f", + "metadata": {}, + "outputs": [], + "source": [ + "seed(42)" + ] + }, + { + "cell_type": "markdown", + "id": "7d7b04ea-a205-4a64-bf93-f68c62b69b5f", + "metadata": {}, + "source": [ + "### 11. Создайте одномерный массив c, составленный из последовательности 16-ти случайных равномерно распределенных целых чисел от 0 до 16 невключительно." + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "b307b5d5-e3aa-44b8-94f3-2a9b8877fd5b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 16]\n" + ] + } + ], + "source": [ + "c = np.linspace(start=0, stop=16, num=16, dtype=int)\n", + "print(c)" + ] + }, + { + "cell_type": "markdown", + "id": "3107f89b-c513-4eeb-b21f-d091920b51b3", + "metadata": {}, + "source": [ + "### 12. Поменяйте его форму так, чтобы получилась квадратная матрица C. Получите матрицу D, поэлементно прибавив матрицу B из предыдущего вопроса к матрице C, умноженной на 10. Вычислите определитель, ранг и обратную матрицу D_inv для D." + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "721efedc-bcdd-4aa0-b7bf-58469b31fbf2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "C =\n", + "[[ 0 1 2 3]\n", + " [ 4 5 6 7]\n", + " [ 8 9 10 11]\n", + " [12 13 14 16]]\n", + "D =\n", + "[[ 5 24 43 62]\n", + " [ 54 100 146 192]\n", + " [103 176 249 322]\n", + " [152 252 352 462]]\n", + "определитель = 2.2623680706601445e-10\n", + "ранг = 3\n", + "D_inv =\n", + "[[-3.51843721e+13 7.03687442e+13 -3.51843721e+13 -3.12500000e-03]\n", + " [ 7.03687442e+13 -1.40737488e+14 7.03687442e+13 1.06250000e-01]\n", + " [-3.51843721e+13 7.03687442e+13 -3.51843721e+13 -2.03125000e-01]\n", + " [ 2.00000000e-01 -3.00000000e-01 -0.00000000e+00 1.00000000e-01]]\n" + ] + } + ], + "source": [ + "C = np.reshape(c, (4, -1))\n", + "print(f\"C =\\n{C}\")\n", + "D = B + C * 10\n", + "D_inv = np.linalg.inv(D)\n", + "print(f\"D =\\n{D}\\nопределитель = {np.linalg.det(D)}\\nранг = {np.linalg.matrix_rank(D)}\\nD_inv =\\n{D_inv}\")" + ] + }, + { + "cell_type": "markdown", + "id": "05de35ef-439f-4273-8902-9fbf5d1dd878", + "metadata": {}, + "source": [ + "### 13. Приравняйте к нулю отрицательные числа в матрице D_inv, а положительные - к единице. Убедитесь, что в матрице D_inv остались только нули и единицы. С помощью функции numpy.where, используя матрицу D_inv в качестве маски, а матрицы B и C - в качестве источников данных, получите матрицу E размером 4x4. Элементы матрицы E, для которых соответствующий элемент матрицы D_inv равен 1, должны быть равны соответствующему элементу матрицы B, а элементы матрицы E, для которых соответствующий элемент матрицы D_inv равен 0, должны быть равны соответствующему элементу матрицы C." + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "id": "ae14bda8-3197-4ce6-8ed7-9227f40ee440", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "D_inv =\n", + "[[ 0. 1. 0. 0.]\n", + " [ 1. 0. 1. 1.]\n", + " [ 0. 1. 0. 0.]\n", + " [ 1. 0. -0. 1.]]\n", + "E =\n", + "[[ 0. 14. 2. 3.]\n", + " [ 14. 5. 86. 122.]\n", + " [ 8. 86. 10. 11.]\n", + " [ 32. 13. 14. 302.]]\n" + ] + } + ], + "source": [ + "D_inv[np.where(D_inv < 0)] = 0\n", + "D_inv[np.where(D_inv > 0)] = 1\n", + "print(f\"D_inv =\\n{D_inv}\")\n", + "\n", + "E = np.zeros((4, 4))\n", + "E[np.where(D_inv == 0)] = C[np.where(D_inv == 0)]\n", + "E[np.where(D_inv == 1)] = B[np.where(D_inv == 1)]\n", + "print(f\"E =\\n{E}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e8ecf6a0-078c-4d26-9aa0-8d84514df3e7", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 6fc5beff16a37111bdea3e6ad063953403ed4c89 Mon Sep 17 00:00:00 2001 From: Foton Date: Mon, 10 Jun 2024 18:33:48 +0300 Subject: [PATCH 2/3] Release Home Work Machine Learning DScience Lesson6 --- Lesson6/Task2.ipynb | 1979 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1979 insertions(+) create mode 100644 Lesson6/Task2.ipynb diff --git a/Lesson6/Task2.ipynb b/Lesson6/Task2.ipynb new file mode 100644 index 0000000..2ebd9ce --- /dev/null +++ b/Lesson6/Task2.ipynb @@ -0,0 +1,1979 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "b2f2e5e2", + "metadata": {}, + "source": [ + "## Тема “Обучение с учителем”" + ] + }, + { + "cell_type": "markdown", + "id": "16595a84", + "metadata": {}, + "source": [ + "### Задание 1\n", + "Импортируйте библиотеки pandas и numpy.\n", + "Загрузите \"Boston House Prices dataset\" из встроенных наборов данных библиотеки sklearn..\n", + "Разбейте эти датафреймы на тренировочные (X_train, y_train) и тестовые (X_test, y_test) с помощью\n", + "функции train_test_split так, чтобы размер тестовой выборки\n", + "составлял 30% от всех данных, при этом аргумент random state должен быть равен 42.\n", + "Создайте модель линейной регрессии под названием lr с помощью класса LinearRegression из модуля\n", + "sklearn.linear_model.\n", + "Обучите модель на тренировочных данных (используйте все признаки) и сделайте предсказание на\n", + "тестовых." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "274303e6", + "metadata": {}, + "outputs": [], + "source": [ + "import warnings\n", + "\n", + "warnings.filterwarnings('ignore')" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "693a9c36", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "pd.options.display.max_columns = 100\n", + "\n", + "from sklearn.datasets import load_boston" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "a37f783d", + "metadata": {}, + "outputs": [], + "source": [ + "boston = load_boston()\n", + "\n", + "feature_names = boston[\"feature_names\"]\n", + "\n", + "X = pd.DataFrame(boston[\"data\"], columns=feature_names)\n", + "y = pd.DataFrame(boston[\"target\"], columns=[\"price\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "5a2e0780", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "((354, 13), (152, 13))" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)\n", + "\n", + "X_train.shape, X_test.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "96164976", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.linear_model import LinearRegression\n", + "\n", + "lr = LinearRegression()\n", + "\n", + "lr.fit(X_train, y_train)\n", + "\n", + "lr_pred = lr.predict(X_test)" + ] + }, + { + "cell_type": "markdown", + "id": "a07cffa5", + "metadata": {}, + "source": [ + "### Задание 2\n", + "Создайте модель под названием model с помощью класса RandomForestRegressor из модуля\n", + "sklearn.ensemble.\n", + "Сделайте агрумент n_estimators равным 1000,\n", + "max_depth должен быть равен 12 и random_state сделайте равным 42.\n", + "Обучите модель на тренировочных данных аналогично тому, как вы обучали модель LinearRegression,\n", + "но при этом в метод fit вместо датафрейма y_train поставьте y_train.values[:, 0],\n", + "чтобы получить из датафрейма одномерный массив Numpy,\n", + "так как для класса RandomForestRegressor в данном методе для аргумента y предпочтительно\n", + "применение массивов вместо датафрейма.\n", + "Сделайте предсказание на тестовых данных и посчитайте R2. Сравните с результатом из\n", + "предыдущего задания.\n", + "Напишите в комментариях к коду, какая модель в данном случае работает лучше" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "64042e74", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "оценка R2 модели ансамбля случайного леса выше чем у линейной регрессии\n", + "RandomForestRegressor=0.8747\n", + "LinearRegression=0.7112\n" + ] + } + ], + "source": [ + "from sklearn.ensemble import RandomForestRegressor\n", + "from sklearn.metrics import r2_score\n", + "\n", + "model = RandomForestRegressor(max_depth=12, n_estimators=1000, random_state=42)\n", + "\n", + "model.fit(X_train, y_train.values[:,0])\n", + "\n", + "rf_pred = model.predict(X_test)\n", + "\n", + "r2_lr = r2_score(y_test, lr_pred)\n", + "r2_rf = r2_score(y_test, rf_pred)\n", + "\n", + "print(f\"оценка R2 модели ансамбля случайного леса {'выше' if r2_rf > r2_lr else 'ниже' } \" \n", + " f\"чем у линейной регрессии\\nRandomForestRegressor={r2_rf:.4f}\\nLinearRegression={r2_lr:.4f}\")" + ] + }, + { + "cell_type": "markdown", + "id": "94b0548e", + "metadata": {}, + "source": [ + "### *Задание 3\n", + "Вызовите документацию для класса RandomForestRegressor,\n", + "найдите информацию об атрибуте feature_importances_.\n", + "С помощью этого атрибута найдите сумму всех показателей важности,\n", + "установите, какие два признака показывают наибольшую важность." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "1327004d", + "metadata": {}, + "outputs": [], + "source": [ + "?RandomForestRegressor" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "c8e1fab0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "сумма показателей важности признаков модели = 1.0\n" + ] + } + ], + "source": [ + "print(f\"сумма показателей важности признаков модели = {np.sum(model.feature_importances_)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "744deb30", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "первые два наиболее важные признака\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
featureimportance
12LSTAT0.415847
5RM0.402682
\n", + "
" + ], + "text/plain": [ + " feature importance\n", + "12 LSTAT 0.415847\n", + "5 RM 0.402682" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(\"первые два наиболее важные признака\")\n", + "pd.DataFrame({'feature': feature_names,'importance': model.feature_importances_}) \\\n", + ".sort_values('importance',ascending = False) \\\n", + ".head(2)" + ] + }, + { + "cell_type": "markdown", + "id": "b9004884", + "metadata": {}, + "source": [ + "## *Задание 4\n", + "В этом задании мы будем работать с датасетом, с которым мы уже знакомы по домашнему заданию\n", + "по библиотеке Matplotlib, это датасет Credit Card Fraud Detection.Для этого датасета мы будем решать\n", + "задачу классификации - будем определять,какие из транзакции по кредитной карте являются\n", + "мошенническими.Данный датасет сильно несбалансирован (так как случаи мошенничества\n", + "относительно редки),так что применение метрики accuracy не принесет пользы и не поможет выбрать\n", + "лучшую модель.Мы будем вычислять AUC, то есть площадь под кривой ROC.\n", + "Импортируйте из соответствующих модулей RandomForestClassifier, GridSearchCV и train_test_split.\n", + "Загрузите датасет creditcard.csv и создайте датафрейм df." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "a65b98a0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dataset URL: https://www.kaggle.com/datasets/mlg-ulb/creditcardfraud\n", + "License(s): DbCL-1.0\n", + "Downloading creditcardfraud.zip to W:\\Projects\\GB\\Python\\MLearning\\DSLibraries\\Lesson6\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + " 0%| | 0.00/66.0M [00:00\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TimeV1V2V3V4V5V6V7V8V9V10V11V12V13V14V15V16V17V18V19V20V21V22V23V24V25V26V27V28AmountClass
00.0-1.359807-0.0727812.5363471.378155-0.3383210.4623880.2395990.0986980.3637870.090794-0.551600-0.617801-0.991390-0.3111691.468177-0.4704010.2079710.0257910.4039930.251412-0.0183070.277838-0.1104740.0669280.128539-0.1891150.133558-0.021053149.620
10.01.1918570.2661510.1664800.4481540.060018-0.082361-0.0788030.085102-0.255425-0.1669741.6127271.0652350.489095-0.1437720.6355580.463917-0.114805-0.183361-0.145783-0.069083-0.225775-0.6386720.101288-0.3398460.1671700.125895-0.0089830.0147242.690
21.0-1.358354-1.3401631.7732090.379780-0.5031981.8004990.7914610.247676-1.5146540.2076430.6245010.0660840.717293-0.1659462.345865-2.8900831.109969-0.121359-2.2618570.5249800.2479980.7716790.909412-0.689281-0.327642-0.139097-0.055353-0.059752378.660
31.0-0.966272-0.1852261.792993-0.863291-0.0103091.2472030.2376090.377436-1.387024-0.054952-0.2264870.1782280.507757-0.287924-0.631418-1.059647-0.6840931.965775-1.232622-0.208038-0.1083000.005274-0.190321-1.1755750.647376-0.2219290.0627230.061458123.500
42.0-1.1582330.8777371.5487180.403034-0.4071930.0959210.592941-0.2705330.8177390.753074-0.8228430.5381961.345852-1.1196700.175121-0.451449-0.237033-0.0381950.8034870.408542-0.0094310.798278-0.1374580.141267-0.2060100.5022920.2194220.21515369.990
52.0-0.4259660.9605231.141109-0.1682520.420987-0.0297280.4762010.260314-0.568671-0.3714071.3412620.359894-0.358091-0.1371340.5176170.401726-0.0581330.068653-0.0331940.084968-0.208254-0.559825-0.026398-0.371427-0.2327940.1059150.2538440.0810803.670
64.01.2296580.1410040.0453711.2026130.1918810.272708-0.0051590.0812130.464960-0.099254-1.416907-0.153826-0.7510630.1673720.050144-0.4435870.002821-0.611987-0.045575-0.219633-0.167716-0.270710-0.154104-0.7800550.750137-0.2572370.0345070.0051684.990
77.0-0.6442691.4179641.074380-0.4921990.9489340.4281181.120631-3.8078640.6153751.249376-0.6194680.2914741.757964-1.3238650.686133-0.076127-1.222127-0.3582220.324505-0.1567421.943465-1.0154550.057504-0.649709-0.415267-0.051634-1.206921-1.08533940.800
87.0-0.8942860.286157-0.113192-0.2715262.6695993.7218180.3701450.851084-0.392048-0.410430-0.705117-0.110452-0.2862540.074355-0.328783-0.210077-0.4997680.1187650.5703280.052736-0.073425-0.268092-0.2042331.0115920.373205-0.3841570.0117470.14240493.200
99.0-0.3382621.1195931.044367-0.2221870.499361-0.2467610.6515830.069539-0.736727-0.3668461.0176140.8363901.006844-0.4435230.1502190.739453-0.5409800.4766770.4517730.203711-0.246914-0.633753-0.120794-0.385050-0.0697330.0941990.2462190.0830763.680
\n", + "" + ], + "text/plain": [ + " Time V1 V2 V3 V4 V5 V6 V7 \\\n", + "0 0.0 -1.359807 -0.072781 2.536347 1.378155 -0.338321 0.462388 0.239599 \n", + "1 0.0 1.191857 0.266151 0.166480 0.448154 0.060018 -0.082361 -0.078803 \n", + "2 1.0 -1.358354 -1.340163 1.773209 0.379780 -0.503198 1.800499 0.791461 \n", + "3 1.0 -0.966272 -0.185226 1.792993 -0.863291 -0.010309 1.247203 0.237609 \n", + "4 2.0 -1.158233 0.877737 1.548718 0.403034 -0.407193 0.095921 0.592941 \n", + "5 2.0 -0.425966 0.960523 1.141109 -0.168252 0.420987 -0.029728 0.476201 \n", + "6 4.0 1.229658 0.141004 0.045371 1.202613 0.191881 0.272708 -0.005159 \n", + "7 7.0 -0.644269 1.417964 1.074380 -0.492199 0.948934 0.428118 1.120631 \n", + "8 7.0 -0.894286 0.286157 -0.113192 -0.271526 2.669599 3.721818 0.370145 \n", + "9 9.0 -0.338262 1.119593 1.044367 -0.222187 0.499361 -0.246761 0.651583 \n", + "\n", + " V8 V9 V10 V11 V12 V13 V14 \\\n", + "0 0.098698 0.363787 0.090794 -0.551600 -0.617801 -0.991390 -0.311169 \n", + "1 0.085102 -0.255425 -0.166974 1.612727 1.065235 0.489095 -0.143772 \n", + "2 0.247676 -1.514654 0.207643 0.624501 0.066084 0.717293 -0.165946 \n", + "3 0.377436 -1.387024 -0.054952 -0.226487 0.178228 0.507757 -0.287924 \n", + "4 -0.270533 0.817739 0.753074 -0.822843 0.538196 1.345852 -1.119670 \n", + "5 0.260314 -0.568671 -0.371407 1.341262 0.359894 -0.358091 -0.137134 \n", + "6 0.081213 0.464960 -0.099254 -1.416907 -0.153826 -0.751063 0.167372 \n", + "7 -3.807864 0.615375 1.249376 -0.619468 0.291474 1.757964 -1.323865 \n", + "8 0.851084 -0.392048 -0.410430 -0.705117 -0.110452 -0.286254 0.074355 \n", + "9 0.069539 -0.736727 -0.366846 1.017614 0.836390 1.006844 -0.443523 \n", + "\n", + " V15 V16 V17 V18 V19 V20 V21 \\\n", + "0 1.468177 -0.470401 0.207971 0.025791 0.403993 0.251412 -0.018307 \n", + "1 0.635558 0.463917 -0.114805 -0.183361 -0.145783 -0.069083 -0.225775 \n", + "2 2.345865 -2.890083 1.109969 -0.121359 -2.261857 0.524980 0.247998 \n", + "3 -0.631418 -1.059647 -0.684093 1.965775 -1.232622 -0.208038 -0.108300 \n", + "4 0.175121 -0.451449 -0.237033 -0.038195 0.803487 0.408542 -0.009431 \n", + "5 0.517617 0.401726 -0.058133 0.068653 -0.033194 0.084968 -0.208254 \n", + "6 0.050144 -0.443587 0.002821 -0.611987 -0.045575 -0.219633 -0.167716 \n", + "7 0.686133 -0.076127 -1.222127 -0.358222 0.324505 -0.156742 1.943465 \n", + "8 -0.328783 -0.210077 -0.499768 0.118765 0.570328 0.052736 -0.073425 \n", + "9 0.150219 0.739453 -0.540980 0.476677 0.451773 0.203711 -0.246914 \n", + "\n", + " V22 V23 V24 V25 V26 V27 V28 \\\n", + "0 0.277838 -0.110474 0.066928 0.128539 -0.189115 0.133558 -0.021053 \n", + "1 -0.638672 0.101288 -0.339846 0.167170 0.125895 -0.008983 0.014724 \n", + "2 0.771679 0.909412 -0.689281 -0.327642 -0.139097 -0.055353 -0.059752 \n", + "3 0.005274 -0.190321 -1.175575 0.647376 -0.221929 0.062723 0.061458 \n", + "4 0.798278 -0.137458 0.141267 -0.206010 0.502292 0.219422 0.215153 \n", + "5 -0.559825 -0.026398 -0.371427 -0.232794 0.105915 0.253844 0.081080 \n", + "6 -0.270710 -0.154104 -0.780055 0.750137 -0.257237 0.034507 0.005168 \n", + "7 -1.015455 0.057504 -0.649709 -0.415267 -0.051634 -1.206921 -1.085339 \n", + "8 -0.268092 -0.204233 1.011592 0.373205 -0.384157 0.011747 0.142404 \n", + "9 -0.633753 -0.120794 -0.385050 -0.069733 0.094199 0.246219 0.083076 \n", + "\n", + " Amount Class \n", + "0 149.62 0 \n", + "1 2.69 0 \n", + "2 378.66 0 \n", + "3 123.50 0 \n", + "4 69.99 0 \n", + "5 3.67 0 \n", + "6 4.99 0 \n", + "7 40.80 0 \n", + "8 93.20 0 \n", + "9 3.68 0 " + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from zipfile import ZipFile\n", + "\n", + "ZipFile(\"creditcardfraud.zip\").extractall(\".\")\n", + "\n", + "df = pd.read_csv(\"creditcard.csv\")\n", + "\n", + "df.head(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "9302f8fe", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 284807 entries, 0 to 284806\n", + "Data columns (total 31 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Time 284807 non-null float64\n", + " 1 V1 284807 non-null float64\n", + " 2 V2 284807 non-null float64\n", + " 3 V3 284807 non-null float64\n", + " 4 V4 284807 non-null float64\n", + " 5 V5 284807 non-null float64\n", + " 6 V6 284807 non-null float64\n", + " 7 V7 284807 non-null float64\n", + " 8 V8 284807 non-null float64\n", + " 9 V9 284807 non-null float64\n", + " 10 V10 284807 non-null float64\n", + " 11 V11 284807 non-null float64\n", + " 12 V12 284807 non-null float64\n", + " 13 V13 284807 non-null float64\n", + " 14 V14 284807 non-null float64\n", + " 15 V15 284807 non-null float64\n", + " 16 V16 284807 non-null float64\n", + " 17 V17 284807 non-null float64\n", + " 18 V18 284807 non-null float64\n", + " 19 V19 284807 non-null float64\n", + " 20 V20 284807 non-null float64\n", + " 21 V21 284807 non-null float64\n", + " 22 V22 284807 non-null float64\n", + " 23 V23 284807 non-null float64\n", + " 24 V24 284807 non-null float64\n", + " 25 V25 284807 non-null float64\n", + " 26 V26 284807 non-null float64\n", + " 27 V27 284807 non-null float64\n", + " 28 V28 284807 non-null float64\n", + " 29 Amount 284807 non-null float64\n", + " 30 Class 284807 non-null int64 \n", + "dtypes: float64(30), int64(1)\n", + "memory usage: 67.4 MB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "markdown", + "id": "4ba00248", + "metadata": {}, + "source": [ + "Создайте датафрейм X из датафрейма df, исключив столбец Class.\n", + "Создайте объект Series под названием y из столбца Class.\n", + "Разбейте X и y на тренировочный и тестовый наборы данных при помощи функции train_test_split,\n", + "используя аргументы: test_size=0.3, random_state=100, stratify=y.\n", + "У вас должны получиться объекты X_train, X_test, y_train и y_test.\n", + "Просмотрите информацию о их форме." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "59e1e34e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "выборка не сбалансированна, данных первого класса значительно меньше\n", + "0 0.998273\n", + "1 0.001727\n", + "Name: Class, dtype: float64\n" + ] + }, + { + "data": { + "text/plain": [ + "((199364, 30), (85443, 30), (199364,), (85443,))" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "target = \"Class\"\n", + "\n", + "y = df[target]\n", + "X = df.drop(target, axis=1)\n", + "\n", + "print(f\"выборка не сбалансированна, данных первого класса значительно меньше\\n{y.value_counts(normalize=True)}\")\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=100, stratify=y)\n", + "X_train.shape, X_test.shape, y_train.shape, y_test.shape" + ] + }, + { + "cell_type": "markdown", + "id": "fe7fae18", + "metadata": {}, + "source": [ + "Для поиска по сетке параметров задайте такие параметры:\n", + "parameters = [{'n_estimators': [10, 15],\n", + "'max_features': np.arange(3, 5),\n", + "'max_depth': np.arange(4, 7)}]\n", + "Создайте модель GridSearchCV со следующими аргументами:\n", + "estimator=RandomForestClassifier(random_state=100),\n", + "param_grid=parameters,\n", + "scoring='roc_auc',\n", + "cv=3." + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "id": "c034c2a4", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.ensemble import RandomForestClassifier\n", + "\n", + "parameters = {\n", + " 'n_estimators': [10, 15],\n", + " 'max_features': np.arange(3, 5),\n", + " 'max_depth': np.arange(4, 7),\n", + "}\n", + "\n", + "clf = GridSearchCV(\n", + " estimator=RandomForestClassifier(random_state=100),\n", + " param_grid=parameters,\n", + " scoring='roc_auc',\n", + " cv=3,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "a4308ac3", + "metadata": {}, + "source": [ + "Обучите модель на тренировочном наборе данных (может занять несколько минут).\n", + "Просмотрите параметры лучшей модели с помощью атрибута best_params_.\n", + "Предскажите вероятности классов с помощью полученной модели и метода predict_proba.\n", + "Из полученного результата (массив Numpy) выберите столбец с индексом 1 (вероятность класса 1) и\n", + "запишите в массив y_pred_proba. Из модуля sklearn.metrics импортируйте метрику roc_auc_score.\n", + "Вычислите AUC на тестовых данных и сравните с результатом,полученным на тренировочных данных,\n", + "используя в качестве аргументов массивы y_test и y_pred_proba" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "id": "9b43d0b7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "GridSearchCV(cv=3, estimator=RandomForestClassifier(random_state=100),\n", + " param_grid={'max_depth': array([4, 5, 6]),\n", + " 'max_features': array([3, 4]),\n", + " 'n_estimators': [10, 15]},\n", + " scoring='roc_auc')" + ] + }, + "execution_count": 104, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "clf.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "id": "966c3d4d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'max_depth': 6, 'max_features': 3, 'n_estimators': 15}" + ] + }, + "execution_count": 105, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "clf.best_params_" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "id": "405d63d0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "метрика AUC=0.9436 на тестовых данных меньше метрики AUC=0.9660 на обучающем наборе\n" + ] + } + ], + "source": [ + "from sklearn.metrics import roc_auc_score\n", + "\n", + "model = RandomForestClassifier(max_depth=6, max_features=3, n_estimators=15)\n", + "\n", + "model.fit(X_train, y_train)\n", + "\n", + "y_pred_proba = model.predict_proba(X_test)[:, 1]\n", + "\n", + "auc = roc_auc_score(y_test, y_pred_proba)\n", + "\n", + "print(f\"метрика AUC={auc:.4f} на тестовых данных {'больше' if auc > clf.best_score_ else 'меньше'} \"\n", + " f\"метрики AUC={clf.best_score_:.4f} на обучающем наборе\")" + ] + }, + { + "cell_type": "markdown", + "id": "5cb517be", + "metadata": {}, + "source": [ + "## *Дополнительные задания:\n", + "1). Загрузите датасет Wine из встроенных датасетов sklearn.datasets с помощью функции load_wine в\n", + "переменную data." + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "id": "1dbcea26", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.datasets import load_wine\n", + "\n", + "wine = load_wine()\n", + "\n", + "data = wine[\"data\"]" + ] + }, + { + "cell_type": "markdown", + "id": "18dea80c", + "metadata": {}, + "source": [ + "2). Полученный датасет не является датафреймом. Это структура данных, имеющая ключи\n", + "аналогично словарю. Просмотрите тип данных этой структуры данных и создайте список data_keys,\n", + "содержащий ее ключи." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "d5f5e52a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names'])" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data_keys = wine.keys()\n", + "data_keys" + ] + }, + { + "cell_type": "markdown", + "id": "73bff5cf", + "metadata": {}, + "source": [ + "3). Просмотрите данные, описание и названия признаков в датасете. Описание нужно вывести в виде\n", + "привычного, аккуратно оформленного текста, без обозначений переноса строки, но с самими\n", + "переносами и т.д." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "39433029", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + ".. _wine_dataset:\n", + "\n", + "Wine recognition dataset\n", + "------------------------\n", + "\n", + "**Data Set Characteristics:**\n", + "\n", + " :Number of Instances: 178 (50 in each of three classes)\n", + " :Number of Attributes: 13 numeric, predictive attributes and the class\n", + " :Attribute Information:\n", + " \t\t- Alcohol\n", + " \t\t- Malic acid\n", + " \t\t- Ash\n", + "\t\t- Alcalinity of ash \n", + " \t\t- Magnesium\n", + "\t\t- Total phenols\n", + " \t\t- Flavanoids\n", + " \t\t- Nonflavanoid phenols\n", + " \t\t- Proanthocyanins\n", + "\t\t- Color intensity\n", + " \t\t- Hue\n", + " \t\t- OD280/OD315 of diluted wines\n", + " \t\t- Proline\n", + "\n", + " - class:\n", + " - class_0\n", + " - class_1\n", + " - class_2\n", + "\t\t\n", + " :Summary Statistics:\n", + " \n", + " ============================= ==== ===== ======= =====\n", + " Min Max Mean SD\n", + " ============================= ==== ===== ======= =====\n", + " Alcohol: 11.0 14.8 13.0 0.8\n", + " Malic Acid: 0.74 5.80 2.34 1.12\n", + " Ash: 1.36 3.23 2.36 0.27\n", + " Alcalinity of Ash: 10.6 30.0 19.5 3.3\n", + " Magnesium: 70.0 162.0 99.7 14.3\n", + " Total Phenols: 0.98 3.88 2.29 0.63\n", + " Flavanoids: 0.34 5.08 2.03 1.00\n", + " Nonflavanoid Phenols: 0.13 0.66 0.36 0.12\n", + " Proanthocyanins: 0.41 3.58 1.59 0.57\n", + " Colour Intensity: 1.3 13.0 5.1 2.3\n", + " Hue: 0.48 1.71 0.96 0.23\n", + " OD280/OD315 of diluted wines: 1.27 4.00 2.61 0.71\n", + " Proline: 278 1680 746 315\n", + " ============================= ==== ===== ======= =====\n", + "\n", + " :Missing Attribute Values: None\n", + " :Class Distribution: class_0 (59), class_1 (71), class_2 (48)\n", + " :Creator: R.A. Fisher\n", + " :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)\n", + " :Date: July, 1988\n", + "\n", + "This is a copy of UCI ML Wine recognition datasets.\n", + "https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data\n", + "\n", + "The data is the results of a chemical analysis of wines grown in the same\n", + "region in Italy by three different cultivators. There are thirteen different\n", + "measurements taken for different constituents found in the three types of\n", + "wine.\n", + "\n", + "Original Owners: \n", + "\n", + "Forina, M. et al, PARVUS - \n", + "An Extendible Package for Data Exploration, Classification and Correlation. \n", + "Institute of Pharmaceutical and Food Analysis and Technologies,\n", + "Via Brigata Salerno, 16147 Genoa, Italy.\n", + "\n", + "Citation:\n", + "\n", + "Lichman, M. (2013). UCI Machine Learning Repository\n", + "[https://archive.ics.uci.edu/ml]. Irvine, CA: University of California,\n", + "School of Information and Computer Science. \n", + "\n", + ".. topic:: References\n", + "\n", + " (1) S. Aeberhard, D. Coomans and O. de Vel, \n", + " Comparison of Classifiers in High Dimensional Settings, \n", + " Tech. Rep. no. 92-02, (1992), Dept. of Computer Science and Dept. of \n", + " Mathematics and Statistics, James Cook University of North Queensland. \n", + " (Also submitted to Technometrics). \n", + "\n", + " The data was used with many others for comparing various \n", + " classifiers. The classes are separable, though only RDA \n", + " has achieved 100% correct classification. \n", + " (RDA : 100%, QDA 99.4%, LDA 98.9%, 1NN 96.1% (z-transformed data)) \n", + " (All results using the leave-one-out technique) \n", + "\n", + " (2) S. Aeberhard, D. Coomans and O. de Vel, \n", + " \"THE CLASSIFICATION PERFORMANCE OF RDA\" \n", + " Tech. Rep. no. 92-01, (1992), Dept. of Computer Science and Dept. of \n", + " Mathematics and Statistics, James Cook University of North Queensland. \n", + " (Also submitted to Journal of Chemometrics).\n", + "\n" + ] + } + ], + "source": [ + "print(wine[\"DESCR\"])" + ] + }, + { + "cell_type": "markdown", + "id": "772359a0", + "metadata": {}, + "source": [ + "4). Сколько классов содержит целевая переменная датасета? Выведите названия классов." + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "183b0c76", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['class_0' 'class_1' 'class_2']\n", + "кол-во:3\n" + ] + } + ], + "source": [ + "target_class = wine[\"target_names\"]\n", + "\n", + "print(f\"{target_class}\\nкол-во:{len(target_class)}\")" + ] + }, + { + "cell_type": "markdown", + "id": "41d6690b", + "metadata": {}, + "source": [ + "5). На основе данных датасета (они содержатся в двумерном массиве Numpy) и названий признаков\n", + "создайте датафрейм под названием X." + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "15557fbe", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
alcoholmalic_acidashalcalinity_of_ashmagnesiumtotal_phenolsflavanoidsnonflavanoid_phenolsproanthocyaninscolor_intensityhueod280/od315_of_diluted_winesproline
014.231.712.4315.6127.02.803.060.282.295.641.043.921065.0
113.201.782.1411.2100.02.652.760.261.284.381.053.401050.0
213.162.362.6718.6101.02.803.240.302.815.681.033.171185.0
314.371.952.5016.8113.03.853.490.242.187.800.863.451480.0
413.242.592.8721.0118.02.802.690.391.824.321.042.93735.0
\n", + "
" + ], + "text/plain": [ + " alcohol malic_acid ash alcalinity_of_ash magnesium total_phenols \\\n", + "0 14.23 1.71 2.43 15.6 127.0 2.80 \n", + "1 13.20 1.78 2.14 11.2 100.0 2.65 \n", + "2 13.16 2.36 2.67 18.6 101.0 2.80 \n", + "3 14.37 1.95 2.50 16.8 113.0 3.85 \n", + "4 13.24 2.59 2.87 21.0 118.0 2.80 \n", + "\n", + " flavanoids nonflavanoid_phenols proanthocyanins color_intensity hue \\\n", + "0 3.06 0.28 2.29 5.64 1.04 \n", + "1 2.76 0.26 1.28 4.38 1.05 \n", + "2 3.24 0.30 2.81 5.68 1.03 \n", + "3 3.49 0.24 2.18 7.80 0.86 \n", + "4 2.69 0.39 1.82 4.32 1.04 \n", + "\n", + " od280/od315_of_diluted_wines proline \n", + "0 3.92 1065.0 \n", + "1 3.40 1050.0 \n", + "2 3.17 1185.0 \n", + "3 3.45 1480.0 \n", + "4 2.93 735.0 " + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "feature_names = wine[\"feature_names\"]\n", + "\n", + "X = pd.DataFrame(data, columns=feature_names)\n", + "\n", + "X.head()" + ] + }, + { + "cell_type": "markdown", + "id": "e9ef2d4b", + "metadata": {}, + "source": [ + "6). Выясните размер датафрейма X и установите, имеются ли в нем пропущенные значения." + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "9f2ee54a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 178 entries, 0 to 177\n", + "Data columns (total 13 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 alcohol 178 non-null float64\n", + " 1 malic_acid 178 non-null float64\n", + " 2 ash 178 non-null float64\n", + " 3 alcalinity_of_ash 178 non-null float64\n", + " 4 magnesium 178 non-null float64\n", + " 5 total_phenols 178 non-null float64\n", + " 6 flavanoids 178 non-null float64\n", + " 7 nonflavanoid_phenols 178 non-null float64\n", + " 8 proanthocyanins 178 non-null float64\n", + " 9 color_intensity 178 non-null float64\n", + " 10 hue 178 non-null float64\n", + " 11 od280/od315_of_diluted_wines 178 non-null float64\n", + " 12 proline 178 non-null float64\n", + "dtypes: float64(13)\n", + "memory usage: 18.2 KB\n" + ] + } + ], + "source": [ + "X.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "90a743fa", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "размер = (178, 13)\n", + "кол-во пустых значений\n", + "alcohol 0\n", + "malic_acid 0\n", + "ash 0\n", + "alcalinity_of_ash 0\n", + "magnesium 0\n", + "total_phenols 0\n", + "flavanoids 0\n", + "nonflavanoid_phenols 0\n", + "proanthocyanins 0\n", + "color_intensity 0\n", + "hue 0\n", + "od280/od315_of_diluted_wines 0\n", + "proline 0\n", + "dtype: int64\n" + ] + } + ], + "source": [ + "print(f\"размер = {X.shape}\\nкол-во пустых значений\\n{X.isnull().sum(axis=0)}\")" + ] + }, + { + "cell_type": "markdown", + "id": "cf169b47", + "metadata": {}, + "source": [ + "7). Добавьте в датафрейм поле с классами вин в виде чисел, имеющих тип данных numpy.int64.\n", + "Название поля - 'target'." + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "id": "3883dbe8", + "metadata": {}, + "outputs": [], + "source": [ + "X[\"target\"] = wine[\"target\"].astype(\"int64\")" + ] + }, + { + "cell_type": "markdown", + "id": "e5918c4e", + "metadata": {}, + "source": [ + "8). Постройте матрицу корреляций для всех полей X. Дайте полученному датафрейму название\n", + "X_corr" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "id": "d4c9b5e1", + "metadata": {}, + "outputs": [], + "source": [ + "X_corr = X.corr()" + ] + }, + { + "cell_type": "markdown", + "id": "43fd36d4", + "metadata": {}, + "source": [ + "9). Создайте список high_corr из признаков, корреляция которых с полем target по абсолютному\n", + "значению превышает 0.5 (причем, само поле target не должно входить в этот список)." + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "id": "a788a848", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['alcalinity_of_ash',\n", + " 'total_phenols',\n", + " 'flavanoids',\n", + " 'hue',\n", + " 'od280/od315_of_diluted_wines',\n", + " 'proline']" + ] + }, + "execution_count": 98, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "high_corr = [item for item in X_corr[abs(X_corr.target) > 0.5].index if item != \"target\"]\n", + "high_corr" + ] + }, + { + "cell_type": "markdown", + "id": "beab1b7d", + "metadata": {}, + "source": [ + "10). Удалите из датафрейма X поле с целевой переменной. Для всех признаков, названия которых\n", + "содержатся в списке high_corr, вычислите квадрат их значений и добавьте в датафрейм X\n", + "соответствующие поля с суффиксом '_2', добавленного к первоначальному названию признака.\n", + "Итоговый датафрейм должен содержать все поля, которые, были в нем изначально, а также поля с\n", + "признаками из списка high_corr, возведенными в квадрат. Выведите описание полей датафрейма X с\n", + "помощью метода describe." + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "id": "426afeb5", + "metadata": {}, + "outputs": [], + "source": [ + "X.drop(\"target\", axis = 1, inplace = True) \n", + "\n", + "for item in high_corr:\n", + " X[item+\"_2\"] = X[item] ** 2" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "id": "382274e0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
alcoholmalic_acidashalcalinity_of_ashmagnesiumtotal_phenolsflavanoidsnonflavanoid_phenolsproanthocyaninscolor_intensityhueod280/od315_of_diluted_winesprolinealcalinity_of_ash_2total_phenols_2flavanoids_2hue_2od280/od315_of_diluted_wines_2proline_2
count178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.000000178.0000001.780000e+02
mean13.0006182.3363482.36651719.49494499.7415732.2951122.0292700.3618541.5908995.0580900.9574492.611685746.893258391.1428655.6570305.1100490.9686617.3221556.564591e+05
std0.8118271.1171460.2743443.33956414.2824840.6258510.9988590.1244530.5723592.3182860.2285720.709990314.907474133.6717752.9362944.2114410.4437983.5843165.558591e+05
min11.0300000.7400001.36000010.60000070.0000000.9800000.3400000.1300000.4100001.2800000.4800001.270000278.000000112.3600000.9604000.1156000.2304001.6129007.728400e+04
25%12.3625001.6025002.21000017.20000088.0000001.7425001.2050000.2700001.2500003.2200000.7825001.937500500.500000295.8400003.0363251.4521000.6123253.7540752.505010e+05
50%13.0500001.8650002.36000019.50000098.0000002.3550002.1350000.3400001.5550004.6900000.9650002.780000673.500000380.2500005.5460504.5582500.9312507.7284004.536045e+05
75%13.6775003.0825002.55750021.500000107.0000002.8000002.8750000.4375001.9500006.2000001.1200003.170000985.000000462.2500007.8400008.2657001.25440010.0489009.702250e+05
max14.8300005.8000003.23000030.000000162.0000003.8800005.0800000.6600003.58000013.0000001.7100004.0000001680.000000900.00000015.05440025.8064002.92410016.0000002.822400e+06
\n", + "
" + ], + "text/plain": [ + " alcohol malic_acid ash alcalinity_of_ash magnesium \\\n", + "count 178.000000 178.000000 178.000000 178.000000 178.000000 \n", + "mean 13.000618 2.336348 2.366517 19.494944 99.741573 \n", + "std 0.811827 1.117146 0.274344 3.339564 14.282484 \n", + "min 11.030000 0.740000 1.360000 10.600000 70.000000 \n", + "25% 12.362500 1.602500 2.210000 17.200000 88.000000 \n", + "50% 13.050000 1.865000 2.360000 19.500000 98.000000 \n", + "75% 13.677500 3.082500 2.557500 21.500000 107.000000 \n", + "max 14.830000 5.800000 3.230000 30.000000 162.000000 \n", + "\n", + " total_phenols flavanoids nonflavanoid_phenols proanthocyanins \\\n", + "count 178.000000 178.000000 178.000000 178.000000 \n", + "mean 2.295112 2.029270 0.361854 1.590899 \n", + "std 0.625851 0.998859 0.124453 0.572359 \n", + "min 0.980000 0.340000 0.130000 0.410000 \n", + "25% 1.742500 1.205000 0.270000 1.250000 \n", + "50% 2.355000 2.135000 0.340000 1.555000 \n", + "75% 2.800000 2.875000 0.437500 1.950000 \n", + "max 3.880000 5.080000 0.660000 3.580000 \n", + "\n", + " color_intensity hue od280/od315_of_diluted_wines proline \\\n", + "count 178.000000 178.000000 178.000000 178.000000 \n", + "mean 5.058090 0.957449 2.611685 746.893258 \n", + "std 2.318286 0.228572 0.709990 314.907474 \n", + "min 1.280000 0.480000 1.270000 278.000000 \n", + "25% 3.220000 0.782500 1.937500 500.500000 \n", + "50% 4.690000 0.965000 2.780000 673.500000 \n", + "75% 6.200000 1.120000 3.170000 985.000000 \n", + "max 13.000000 1.710000 4.000000 1680.000000 \n", + "\n", + " alcalinity_of_ash_2 total_phenols_2 flavanoids_2 hue_2 \\\n", + "count 178.000000 178.000000 178.000000 178.000000 \n", + "mean 391.142865 5.657030 5.110049 0.968661 \n", + "std 133.671775 2.936294 4.211441 0.443798 \n", + "min 112.360000 0.960400 0.115600 0.230400 \n", + "25% 295.840000 3.036325 1.452100 0.612325 \n", + "50% 380.250000 5.546050 4.558250 0.931250 \n", + "75% 462.250000 7.840000 8.265700 1.254400 \n", + "max 900.000000 15.054400 25.806400 2.924100 \n", + "\n", + " od280/od315_of_diluted_wines_2 proline_2 \n", + "count 178.000000 1.780000e+02 \n", + "mean 7.322155 6.564591e+05 \n", + "std 3.584316 5.558591e+05 \n", + "min 1.612900 7.728400e+04 \n", + "25% 3.754075 2.505010e+05 \n", + "50% 7.728400 4.536045e+05 \n", + "75% 10.048900 9.702250e+05 \n", + "max 16.000000 2.822400e+06 " + ] + }, + "execution_count": 101, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c1245b68", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 623c3e9b2431c8bc7443b5c2ca0912f43f1eda4b Mon Sep 17 00:00:00 2001 From: Foton Date: Mon, 10 Jun 2024 18:42:28 +0300 Subject: [PATCH 3/3] Release Home Work Machine Learning DLibraries Lesson6 --- Lesson4/Task2.ipynb | 6815 ------------------------------------------- 1 file changed, 6815 deletions(-) delete mode 100644 Lesson4/Task2.ipynb diff --git a/Lesson4/Task2.ipynb b/Lesson4/Task2.ipynb deleted file mode 100644 index ac6e6bf..0000000 --- a/Lesson4/Task2.ipynb +++ /dev/null @@ -1,6815 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "b27e41e4", - "metadata": {}, - "source": [ - "## Тема “Визуализация данных в Matplotlib”" - ] - }, - { - "cell_type": "markdown", - "id": "f324024b", - "metadata": {}, - "source": [ - "### Задание 1\n", - "Загрузите модуль pyplot библиотеки matplotlib с псевдонимом plt, а также библиотеку numpy с\n", - "псевдонимом np.\n", - "Примените магическую функцию %matplotlib inline для отображения графиков в Jupyter Notebook и\n", - "настройки конфигурации ноутбука со значением 'svg' для более четкого отображения графиков.\n", - "Создайте список под названием x с числами 1, 2, 3, 4, 5, 6, 7 и список y с числами 3.5, 3.8, 4.2, 4.5, 5,\n", - "5.5, 7.\n", - "С помощью функции plot постройте график, соединяющий линиями точки с горизонтальными\n", - "координатами из списка x и вертикальными - из списка y.\n", - "Затем в следующей ячейке постройте диаграмму рассеяния (другие названия - диаграмма разброса,\n", - "scatter plot)." - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "99e11a55", - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import pandas as pd\n", - "from random import seed\n", - "from matplotlib import pyplot as plt\n", - "\n", - "plt.style.use('fivethirtyeight')\n", - "\n", - "%matplotlib inline\n", - "%config InlineBackend.figure_format = 'svg'" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "bd0d7835", - "metadata": {}, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " 2024-06-04T11:17:30.715908\n", - " image/svg+xml\n", - " \n", - " \n", - " Matplotlib v3.9.0, https://matplotlib.org/\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n" - ], - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "x = [1, 2, 3, 4, 5, 6, 7]\n", - "y = [3.5, 3.8, 4.2, 4.5, 5, 5.5, 7]\n", - "\n", - "plt.plot(x, y)\n", - "\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "91fc998c", - "metadata": {}, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " 2024-06-03T11:42:36.642755\n", - " image/svg+xml\n", - " \n", - " \n", - " Matplotlib v3.9.0, https://matplotlib.org/\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n" - ], - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "plt.scatter(x, y)\n", - "\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "id": "fbf3b995", - "metadata": {}, - "source": [ - "### Задание 2\n", - "С помощью функции linspace из библиотеки Numpy создайте массив t из 51 числа от 0 до 10\n", - "включительно.\n", - "© geekbrains.ru\n", - "Создайте массив Numpy под названием f, содержащий косинусы элементов массива t.\n", - "Постройте линейную диаграмму, используя массив t для координат по горизонтали,а массив f - для\n", - "координат по вертикали. Линия графика должна быть зеленого цвета.\n", - "Выведите название диаграммы - 'График f(t)'. Также добавьте названия для горизонтальной оси -\n", - "'Значения t' и для вертикальной - 'Значения f'.\n", - "Ограничьте график по оси x значениями 0.5 и 9.5, а по оси y - значениями -2.5 и 2.5." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "f19e1fe4", - "metadata": {}, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " 2024-06-03T11:42:36.735077\n", - " image/svg+xml\n", - " \n", - " \n", - " Matplotlib v3.9.0, https://matplotlib.org/\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n" - ], - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "t = np.linspace(0, 10, 51)\n", - "\n", - "f = np.cos(t)\n", - "\n", - "plt.plot(t, f, color=\"green\")\n", - "\n", - "plt.title(\"График f(t)\")\n", - "plt.xlabel(\"Значения t\")\n", - "plt.ylabel(\"Значения f\")\n", - "\n", - "plt.axis([0.5, 9.5, -2.5, 2.5])\n", - "\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "id": "73611d04", - "metadata": {}, - "source": [ - "### Задание 3\n", - "С помощью функции linspace библиотеки Numpy создайте массив x из 51 числа от -3 до 3\n", - "включительно.\n", - "Создайте массивы y1, y2, y3, y4 по следующим формулам:\n", - "y1 = x**2\n", - "y2 = 2 * x + 0.5\n", - "y3 = -3 * x - 1.5\n", - "y4 = sin(x)\n", - "Используя функцию subplots модуля matplotlib.pyplot, создайте объект matplotlib.figure.Figure с\n", - "названием fig и массив объектов Axes под названием ax,причем так, чтобы у вас было 4 отдельных\n", - "графика в сетке, состоящей из двух строк и двух столбцов. В каждом графике массив x используется\n", - "для координат по горизонтали.В левом верхнем графике для координат по вертикали используйте\n", - "y1,в правом верхнем - y2, в левом нижнем - y3, в правом нижнем - y4.Дайте название графикам:\n", - "'График y1', 'График y2' и т.д.\n", - "Для графика в левом верхнем углу установите границы по оси x от -5 до 5.\n", - "Установите размеры фигуры 8 дюймов по горизонтали и 6 дюймов по вертикали.\n", - "Вертикальные и горизонтальные зазоры между графиками должны составлять 0.3" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "ea585c38", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Text(0.5, 1.0, 'График y4')" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " 2024-06-03T11:42:37.042700\n", - " image/svg+xml\n", - " \n", - " \n", - " Matplotlib v3.9.0, https://matplotlib.org/\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n" - ], - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "x = np.linspace(-3, 3, 51)\n", - "\n", - "y1 = x**2\n", - "y2 = 2*x + 0.5\n", - "y3 = -3*x - 1.5\n", - "y4 = np.sin(x)\n", - "\n", - "fig, ax = plt.subplots(nrows=2, ncols=2)\n", - "\n", - "fig.set_size_inches(8, 6)\n", - "fig.subplots_adjust(wspace=0.3, hspace=0.3)\n", - "\n", - "ax1, ax2, ax3, ax4 = ax.flatten()\n", - "\n", - "ax1.plot(x, y1)\n", - "ax1.set_title(\"График y1\")\n", - "ax1.set_xlim([-5, 5])\n", - "ax2.plot(x, y2)\n", - "ax2.set_title(\"График y2\")\n", - "ax3.plot(x, y3)\n", - "ax3.set_title(\"График y3\")\n", - "ax4.plot(x, y4)\n", - "ax4.set_title(\"График y4\")" - ] - }, - { - "cell_type": "markdown", - "id": "c77b9177", - "metadata": {}, - "source": [ - "### Задание 4\n", - "В этом задании мы будем работать с датасетом, в котором приведены данные по мошенничеству с\n", - "кредитными данными: Credit Card Fraud Detection (информация об авторах: Andrea Dal Pozzolo, Olivier\n", - "Caelen, Reid A. Johnson and Gianluca Bontempi. Calibrating Probability with Undersampling for Unbalanced\n", - "Classification. In Symposium on Computational Intelligence and Data Mining (CIDM), IEEE, 2015).\n", - "Ознакомьтесь с описанием и скачайте датасет creditcard.csv с сайта Kaggle.com по ссылке:\n", - "Credit Card Fraud Detection\n", - "Данный датасет является примером несбалансированных данных, так как мошеннические операции с\n", - "картами встречаются реже обычных.\n", - "Импортируйте библиотеку Pandas, а также используйте для графиков стиль “fivethirtyeight”.\n", - "© geekbrains.ru 1\n", - "Посчитайте с помощью метода value_counts количество наблюдений для каждого значения целевой\n", - "переменной Class и примените к полученным данным метод plot, чтобы построить столбчатую\n", - "диаграмму. Затем постройте такую же диаграмму, используя логарифмический масштаб.\n", - "На следующем графике постройте две гистограммы по значениям признака V1 - одну для\n", - "мошеннических транзакций (Class равен 1) и другую - для обычных (Class равен 0). Подберите\n", - "значение аргумента density так, чтобы по вертикали графика было расположено не число\n", - "наблюдений, а плотность распределения. Число бинов должно равняться 20 для обеих гистограмм, а\n", - "коэффициент alpha сделайте равным 0.5, чтобы гистограммы были полупрозрачными и не\n", - "загораживали друг друга. Создайте легенду с двумя значениями: “Class 0” и “Class 1”. Гистограмма\n", - "обычных транзакций должна быть серого цвета, а мошеннических - красного. Горизонтальной оси\n", - "дайте название “V1”." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "65f2dc4a-f788-4f45-b8d7-7bd6de270815", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Dataset URL: https://www.kaggle.com/datasets/mlg-ulb/creditcardfraud\n", - "License(s): DbCL-1.0\n", - "Downloading creditcardfraud.zip to W:\\Projects\\GB\\Python\\MLearning\\DSLibraries\\Lesson4\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - " 0%| | 0.00/66.0M [00:00\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
TimeV1V2V3V4V5V6V7V8V9...V21V22V23V24V25V26V27V28AmountClass
00.0-1.359807-0.0727812.5363471.378155-0.3383210.4623880.2395990.0986980.363787...-0.0183070.277838-0.1104740.0669280.128539-0.1891150.133558-0.021053149.620
10.01.1918570.2661510.1664800.4481540.060018-0.082361-0.0788030.085102-0.255425...-0.225775-0.6386720.101288-0.3398460.1671700.125895-0.0089830.0147242.690
21.0-1.358354-1.3401631.7732090.379780-0.5031981.8004990.7914610.247676-1.514654...0.2479980.7716790.909412-0.689281-0.327642-0.139097-0.055353-0.059752378.660
31.0-0.966272-0.1852261.792993-0.863291-0.0103091.2472030.2376090.377436-1.387024...-0.1083000.005274-0.190321-1.1755750.647376-0.2219290.0627230.061458123.500
42.0-1.1582330.8777371.5487180.403034-0.4071930.0959210.592941-0.2705330.817739...-0.0094310.798278-0.1374580.141267-0.2060100.5022920.2194220.21515369.990
\n", - "

5 rows × 31 columns

\n", - "" - ], - "text/plain": [ - " Time V1 V2 V3 V4 V5 V6 V7 \\\n", - "0 0.0 -1.359807 -0.072781 2.536347 1.378155 -0.338321 0.462388 0.239599 \n", - "1 0.0 1.191857 0.266151 0.166480 0.448154 0.060018 -0.082361 -0.078803 \n", - "2 1.0 -1.358354 -1.340163 1.773209 0.379780 -0.503198 1.800499 0.791461 \n", - "3 1.0 -0.966272 -0.185226 1.792993 -0.863291 -0.010309 1.247203 0.237609 \n", - "4 2.0 -1.158233 0.877737 1.548718 0.403034 -0.407193 0.095921 0.592941 \n", - "\n", - " V8 V9 ... V21 V22 V23 V24 V25 \\\n", - "0 0.098698 0.363787 ... -0.018307 0.277838 -0.110474 0.066928 0.128539 \n", - "1 0.085102 -0.255425 ... -0.225775 -0.638672 0.101288 -0.339846 0.167170 \n", - "2 0.247676 -1.514654 ... 0.247998 0.771679 0.909412 -0.689281 -0.327642 \n", - "3 0.377436 -1.387024 ... -0.108300 0.005274 -0.190321 -1.175575 0.647376 \n", - "4 -0.270533 0.817739 ... -0.009431 0.798278 -0.137458 0.141267 -0.206010 \n", - "\n", - " V26 V27 V28 Amount Class \n", - "0 -0.189115 0.133558 -0.021053 149.62 0 \n", - "1 0.125895 -0.008983 0.014724 2.69 0 \n", - "2 -0.139097 -0.055353 -0.059752 378.66 0 \n", - "3 -0.221929 0.062723 0.061458 123.50 0 \n", - "4 0.502292 0.219422 0.215153 69.99 0 \n", - "\n", - "[5 rows x 31 columns]" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "d4fae789", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
TimeV1V2V3V4V5V6V7V8V9...V21V22V23V24V25V26V27V28AmountClass
count284807.0000002.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+05...2.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+05284807.000000284807.000000
mean94813.8595751.168375e-153.416908e-16-1.379537e-152.074095e-159.604066e-161.487313e-15-5.556467e-161.213481e-16-2.406331e-15...1.654067e-16-3.568593e-162.578648e-164.473266e-155.340915e-161.683437e-15-3.660091e-16-1.227390e-1688.3496190.001727
std47488.1459551.958696e+001.651309e+001.516255e+001.415869e+001.380247e+001.332271e+001.237094e+001.194353e+001.098632e+00...7.345240e-017.257016e-016.244603e-016.056471e-015.212781e-014.822270e-014.036325e-013.300833e-01250.1201090.041527
min0.000000-5.640751e+01-7.271573e+01-4.832559e+01-5.683171e+00-1.137433e+02-2.616051e+01-4.355724e+01-7.321672e+01-1.343407e+01...-3.483038e+01-1.093314e+01-4.480774e+01-2.836627e+00-1.029540e+01-2.604551e+00-2.256568e+01-1.543008e+010.0000000.000000
25%54201.500000-9.203734e-01-5.985499e-01-8.903648e-01-8.486401e-01-6.915971e-01-7.682956e-01-5.540759e-01-2.086297e-01-6.430976e-01...-2.283949e-01-5.423504e-01-1.618463e-01-3.545861e-01-3.171451e-01-3.269839e-01-7.083953e-02-5.295979e-025.6000000.000000
50%84692.0000001.810880e-026.548556e-021.798463e-01-1.984653e-02-5.433583e-02-2.741871e-014.010308e-022.235804e-02-5.142873e-02...-2.945017e-026.781943e-03-1.119293e-024.097606e-021.659350e-02-5.213911e-021.342146e-031.124383e-0222.0000000.000000
75%139320.5000001.315642e+008.037239e-011.027196e+007.433413e-016.119264e-013.985649e-015.704361e-013.273459e-015.971390e-01...1.863772e-015.285536e-011.476421e-014.395266e-013.507156e-012.409522e-019.104512e-027.827995e-0277.1650000.000000
max172792.0000002.454930e+002.205773e+019.382558e+001.687534e+013.480167e+017.330163e+011.205895e+022.000721e+011.559499e+01...2.720284e+011.050309e+012.252841e+014.584549e+007.519589e+003.517346e+003.161220e+013.384781e+0125691.1600001.000000
\n", - "

8 rows × 31 columns

\n", - "
" - ], - "text/plain": [ - " Time V1 V2 V3 V4 \\\n", - "count 284807.000000 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 \n", - "mean 94813.859575 1.168375e-15 3.416908e-16 -1.379537e-15 2.074095e-15 \n", - "std 47488.145955 1.958696e+00 1.651309e+00 1.516255e+00 1.415869e+00 \n", - "min 0.000000 -5.640751e+01 -7.271573e+01 -4.832559e+01 -5.683171e+00 \n", - "25% 54201.500000 -9.203734e-01 -5.985499e-01 -8.903648e-01 -8.486401e-01 \n", - "50% 84692.000000 1.810880e-02 6.548556e-02 1.798463e-01 -1.984653e-02 \n", - "75% 139320.500000 1.315642e+00 8.037239e-01 1.027196e+00 7.433413e-01 \n", - "max 172792.000000 2.454930e+00 2.205773e+01 9.382558e+00 1.687534e+01 \n", - "\n", - " V5 V6 V7 V8 V9 \\\n", - "count 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 \n", - "mean 9.604066e-16 1.487313e-15 -5.556467e-16 1.213481e-16 -2.406331e-15 \n", - "std 1.380247e+00 1.332271e+00 1.237094e+00 1.194353e+00 1.098632e+00 \n", - "min -1.137433e+02 -2.616051e+01 -4.355724e+01 -7.321672e+01 -1.343407e+01 \n", - "25% -6.915971e-01 -7.682956e-01 -5.540759e-01 -2.086297e-01 -6.430976e-01 \n", - "50% -5.433583e-02 -2.741871e-01 4.010308e-02 2.235804e-02 -5.142873e-02 \n", - "75% 6.119264e-01 3.985649e-01 5.704361e-01 3.273459e-01 5.971390e-01 \n", - "max 3.480167e+01 7.330163e+01 1.205895e+02 2.000721e+01 1.559499e+01 \n", - "\n", - " ... V21 V22 V23 V24 \\\n", - "count ... 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 \n", - "mean ... 1.654067e-16 -3.568593e-16 2.578648e-16 4.473266e-15 \n", - "std ... 7.345240e-01 7.257016e-01 6.244603e-01 6.056471e-01 \n", - "min ... -3.483038e+01 -1.093314e+01 -4.480774e+01 -2.836627e+00 \n", - "25% ... -2.283949e-01 -5.423504e-01 -1.618463e-01 -3.545861e-01 \n", - "50% ... -2.945017e-02 6.781943e-03 -1.119293e-02 4.097606e-02 \n", - "75% ... 1.863772e-01 5.285536e-01 1.476421e-01 4.395266e-01 \n", - "max ... 2.720284e+01 1.050309e+01 2.252841e+01 4.584549e+00 \n", - "\n", - " V25 V26 V27 V28 Amount \\\n", - "count 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 284807.000000 \n", - "mean 5.340915e-16 1.683437e-15 -3.660091e-16 -1.227390e-16 88.349619 \n", - "std 5.212781e-01 4.822270e-01 4.036325e-01 3.300833e-01 250.120109 \n", - "min -1.029540e+01 -2.604551e+00 -2.256568e+01 -1.543008e+01 0.000000 \n", - "25% -3.171451e-01 -3.269839e-01 -7.083953e-02 -5.295979e-02 5.600000 \n", - "50% 1.659350e-02 -5.213911e-02 1.342146e-03 1.124383e-02 22.000000 \n", - "75% 3.507156e-01 2.409522e-01 9.104512e-02 7.827995e-02 77.165000 \n", - "max 7.519589e+00 3.517346e+00 3.161220e+01 3.384781e+01 25691.160000 \n", - "\n", - " Class \n", - "count 284807.000000 \n", - "mean 0.001727 \n", - "std 0.041527 \n", - "min 0.000000 \n", - "25% 0.000000 \n", - "50% 0.000000 \n", - "75% 0.000000 \n", - "max 1.000000 \n", - "\n", - "[8 rows x 31 columns]" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.describe()" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "ab04cf33-e36b-4991-917b-e91a2dfa7fcd", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 284807 entries, 0 to 284806\n", - "Data columns (total 31 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 Time 284807 non-null float64\n", - " 1 V1 284807 non-null float64\n", - " 2 V2 284807 non-null float64\n", - " 3 V3 284807 non-null float64\n", - " 4 V4 284807 non-null float64\n", - " 5 V5 284807 non-null float64\n", - " 6 V6 284807 non-null float64\n", - " 7 V7 284807 non-null float64\n", - " 8 V8 284807 non-null float64\n", - " 9 V9 284807 non-null float64\n", - " 10 V10 284807 non-null float64\n", - " 11 V11 284807 non-null float64\n", - " 12 V12 284807 non-null float64\n", - " 13 V13 284807 non-null float64\n", - " 14 V14 284807 non-null float64\n", - " 15 V15 284807 non-null float64\n", - " 16 V16 284807 non-null float64\n", - " 17 V17 284807 non-null float64\n", - " 18 V18 284807 non-null float64\n", - " 19 V19 284807 non-null float64\n", - " 20 V20 284807 non-null float64\n", - " 21 V21 284807 non-null float64\n", - " 22 V22 284807 non-null float64\n", - " 23 V23 284807 non-null float64\n", - " 24 V24 284807 non-null float64\n", - " 25 V25 284807 non-null float64\n", - " 26 V26 284807 non-null float64\n", - " 27 V27 284807 non-null float64\n", - " 28 V28 284807 non-null float64\n", - " 29 Amount 284807 non-null float64\n", - " 30 Class 284807 non-null int64 \n", - "dtypes: float64(30), int64(1)\n", - "memory usage: 67.4 MB\n" - ] - } - ], - "source": [ - "df.info()" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "6c31fc17", - "metadata": {}, - "outputs": [], - "source": [ - "class_counts = df.Class.value_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "29b823c1", - "metadata": {}, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " 2024-06-03T12:53:10.650385\n", - " image/svg+xml\n", - " \n", - " \n", - " Matplotlib v3.9.0, https://matplotlib.org/\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n" - ], - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "class_counts.plot(kind=\"bar\")\n", - "\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "918cf628", - "metadata": {}, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " 2024-06-03T12:53:11.836204\n", - " image/svg+xml\n", - " \n", - " \n", - " Matplotlib v3.9.0, https://matplotlib.org/\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n" - ], - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "class_counts.plot(kind=\"bar\", logy=True)\n", - "\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "id": "b6af24e9", - "metadata": {}, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " 2024-06-03T12:53:14.097839\n", - " image/svg+xml\n", - " \n", - " \n", - " Matplotlib v3.9.0, https://matplotlib.org/\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n" - ], - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "fig, ax = plt.subplots()\n", - "\n", - "V1_0 = df[df[\"Class\"]==0].V1\n", - "\n", - "ax.set_xlabel(\"V1\")\n", - "ax.axis([-30, 5, 0, 0.2])\n", - "\n", - "df[df[\"Class\"]==0].V1.hist(ax=ax, label = \"Class 0\", bins=20, alpha=0.5, color='gray', density=True)\n", - "df[df[\"Class\"]==1].V1.hist(ax=ax, label = \"Class 1\", bins=20, alpha=0.5, color='red', density=True)\n", - "\n", - "legend = fig.legend(loc=\"upper right\", frameon=False)" - ] - }, - { - "cell_type": "markdown", - "id": "af2c4346", - "metadata": {}, - "source": [ - "## Задание на повторение материала\n", - "### 1. Создать одномерный массив Numpy под названием a из 12 последовательных целых чисел чисел от 12 до 24 невключительно" - ] - }, - { - "cell_type": "code", - "execution_count": 72, - "id": "557a8256-9c01-4f25-a014-0d228c69024c", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[12 13 14 15 16 17 18 19 20 21 22 23]\n" - ] - } - ], - "source": [ - "a = np.arange(12, 24, dtype=int)\n", - " \n", - "print(a)" - ] - }, - { - "cell_type": "markdown", - "id": "1c77f2b4-a4a1-4758-ab20-9c95b50a7228", - "metadata": {}, - "source": [ - "### 2. Создать 5 двумерных массивов разной формы из массива a. Не использовать в аргументах метода reshape число -1." - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "id": "c532a55c-a2e4-41b8-88fc-57202937c2df", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[12],\n", - " [13],\n", - " [14],\n", - " [15],\n", - " [16],\n", - " [17],\n", - " [18],\n", - " [19],\n", - " [20],\n", - " [21],\n", - " [22],\n", - " [23]])" - ] - }, - "execution_count": 49, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "np.reshape(a, (12, 1))" - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "id": "fdd1014d-c6f5-48bc-b047-81038d8fd827", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[12, 13, 14, 15, 16, 17],\n", - " [18, 19, 20, 21, 22, 23]])" - ] - }, - "execution_count": 44, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "np.reshape(a, (2, 6))" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "id": "0ad59875-d3b2-4e5e-acfe-574669e8a068", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[12, 13],\n", - " [14, 15],\n", - " [16, 17],\n", - " [18, 19],\n", - " [20, 21],\n", - " [22, 23]])" - ] - }, - "execution_count": 45, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "np.reshape(a, (6, 2))" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "id": "fc9a8aaa-d246-4876-a818-3bbb268a4a06", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[12, 13, 14, 15],\n", - " [16, 17, 18, 19],\n", - " [20, 21, 22, 23]])" - ] - }, - "execution_count": 46, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "np.reshape(a, (3, 4))" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "id": "f58c5c92-4e45-4b11-8ee9-03930d40213e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[12, 13, 14],\n", - " [15, 16, 17],\n", - " [18, 19, 20],\n", - " [21, 22, 23]])" - ] - }, - "execution_count": 47, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "np.reshape(a, (4, 3))" - ] - }, - { - "cell_type": "markdown", - "id": "dda06888-4535-4b11-a8ac-8366cb665662", - "metadata": {}, - "source": [ - "### 3. Создать 5 двумерных массивов разной формы из массива a. Использовать в аргументах метода reshape число -1 (в трех примерах - для обозначения числа столбцов, в двух - для строк)." - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "id": "5d2eceed-d468-43e2-a896-03db80347de7", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[12, 13, 14, 15, 16, 17],\n", - " [18, 19, 20, 21, 22, 23]])" - ] - }, - "execution_count": 51, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "np.reshape(a, (2, -1))" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "id": "5d4e33c6-9991-48f1-92dc-fe039f1f1403", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[12, 13, 14, 15],\n", - " [16, 17, 18, 19],\n", - " [20, 21, 22, 23]])" - ] - }, - "execution_count": 52, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "np.reshape(a, (3, -1))" - ] - }, - { - "cell_type": "code", - "execution_count": 58, - "id": "7c5ee813-ab4a-44eb-af9b-0c6f086ff275", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[12, 13, 14],\n", - " [15, 16, 17],\n", - " [18, 19, 20],\n", - " [21, 22, 23]])" - ] - }, - "execution_count": 58, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "np.reshape(a, (4, -1))" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "id": "19305afe-f66e-439d-86fb-4f1a8e9289fd", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[12, 13],\n", - " [14, 15],\n", - " [16, 17],\n", - " [18, 19],\n", - " [20, 21],\n", - " [22, 23]])" - ] - }, - "execution_count": 54, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "np.reshape(a, (-1, 2))" - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "id": "021fef61-9517-4ed0-9a49-f0dd68416dff", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[12, 13, 14, 15, 16, 17],\n", - " [18, 19, 20, 21, 22, 23]])" - ] - }, - "execution_count": 57, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "np.reshape(a, (-1, 6))" - ] - }, - { - "cell_type": "markdown", - "id": "aae22acf-9657-4492-b4d8-d6707dd79d6e", - "metadata": {}, - "source": [ - "### 4. Можно ли массив Numpy, состоящий из одного столбца и 12 строк, назвать одномерным?" - ] - }, - { - "cell_type": "code", - "execution_count": 70, - "id": "a039beed-b1fb-428a-995b-f4788b8ba5c2", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "shape=(12, 1)=>размерность=2, следоватьельно одномерным может быть только вектор\n" - ] - } - ], - "source": [ - "b = np.resize(a, (12, 1))\n", - "\n", - "print(f\"shape={b.shape}=>размерность={b.ndim}, следоватьельно одномерным может быть только вектор\")" - ] - }, - { - "cell_type": "markdown", - "id": "65303edd-4937-4b7e-85f0-82412f9e320b", - "metadata": {}, - "source": [ - "### 5. Создать массив из 3 строк и 4 столбцов, состоящий из случайных чисел с плавающей запятой из нормального распределения со средним, равным 0 и среднеквадратичным отклонением, равным 1.0. Получить из этого массива одномерный массив с таким же атрибутом size, как и исходный массив." - ] - }, - { - "cell_type": "code", - "execution_count": 78, - "id": "6f9c7bba-d9b2-4e26-8630-8322e6d1f329", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[ 5.30087681e-01 -1.00730270e+00 3.49023257e-01 1.17610515e-01]\n", - " [-3.73873348e-05 -4.65003983e-01 1.17452549e+00 3.80377532e-02]\n", - " [-4.71755245e-02 3.99082256e-01 -1.15223721e-01 -2.65055731e+00]]\n", - "[[ 5.30087681e-01 -1.00730270e+00 3.49023257e-01 1.17610515e-01\n", - " -3.73873348e-05 -4.65003983e-01 1.17452549e+00 3.80377532e-02\n", - " -4.71755245e-02 3.99082256e-01 -1.15223721e-01 -2.65055731e+00]]\n", - "a.size = 12 <=> 12 = b.size\n" - ] - } - ], - "source": [ - "a = np.random.randn(3, 4)\n", - "b = a.reshape(1, 12)\n", - "print(a)\n", - "print(b)\n", - "print(f\"a.size = {a.size} <=> {b.size} = b.size\")" - ] - }, - { - "cell_type": "markdown", - "id": "6b177aff-4007-4af0-a927-82cb03183362", - "metadata": {}, - "source": [ - "### 6. Создать массив a, состоящий из целых чисел, убывающих от 20 до 0 невключительно с интервалом 2." - ] - }, - { - "cell_type": "code", - "execution_count": 85, - "id": "a78dbbed-cbf3-4536-8e0a-f19586aa9218", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[20 18 16 14 12 10 8 6 4 2]\n" - ] - } - ], - "source": [ - "a = np.arange(20, 0, -2, dtype=int)\n", - "\n", - "print(a)" - ] - }, - { - "cell_type": "markdown", - "id": "3edb8cc8-ca5a-414f-9928-0c3be3fae6a1", - "metadata": {}, - "source": [ - "### 7. Создать массив b, состоящий из 1 строки и 10 столбцов: целых чисел, убывающих от 20 до 1 невключительно с интервалом 2. В чем разница между массивами a и b?" - ] - }, - { - "cell_type": "code", - "execution_count": 93, - "id": "de8d49ea-a843-46ff-ab8f-f2b967cebeb2", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[20 18 16 14 12 10 8 6 4 2] = [20 18 16 14 12 10 8 6 4 2] => разницы нет\n" - ] - } - ], - "source": [ - "b = np.arange(20, 1, -2, dtype=int)\n", - "\n", - "print(f\"{a} = {b} => разницы нет\")" - ] - }, - { - "cell_type": "markdown", - "id": "48add411-602f-46b7-9cb5-98388a6d742c", - "metadata": {}, - "source": [ - "### 8. Вертикально соединить массивы a и b. a - двумерный массив из нулей, число строк которого больше 1 и на 1 меньше, чем число строк двумерного массива b, состоящего из единиц. Итоговый массив v должен иметь атрибут size, равный 10." - ] - }, - { - "cell_type": "code", - "execution_count": 104, - "id": "e64df321-366c-4400-a406-b775e19a6870", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[0. 0.]\n", - " [0. 0.]\n", - " [1. 1.]\n", - " [1. 1.]\n", - " [1. 1.]] \n", - " size=10\n" - ] - } - ], - "source": [ - "a = np.zeros((2, 2))\n", - "b = np.ones((3,2))\n", - "c = np.vstack((a, b))\n", - "print(f\"{c} \\n size={c.size}\")" - ] - }, - { - "cell_type": "markdown", - "id": "85da760f-5076-4e9a-acb4-4e756ac72407", - "metadata": {}, - "source": [ - "### 9. Создать одномерный массив а, состоящий из последовательности целых чисел от 0 до 12. Поменять форму этого массива, чтобы получилась матрица A (двумерный массив Numpy), состоящая из 4 строк и 3 столбцов. Получить матрицу At путем транспонирования атрицы A. Получить матрицу B, умножив матрицу A на матрицу At с помощью матричного умножения. Какой размер имеет матрица B? Получится ли вычислить обратную матрицу для матрицы B и почему?" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "bc97b82a-eaec-4f6c-b9c8-4159dc45c44f", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "матрица B =\n", - "[[ 5 14 23 32]\n", - " [ 14 50 86 122]\n", - " [ 23 86 149 212]\n", - " [ 32 122 212 302]]\n", - "размер = (4, 4)\n", - "определитель = 0 => обратная матрица не существует\n" - ] - } - ], - "source": [ - "a = np.arange(0, 12, dtype=int)\n", - "A = np.reshape(a, (4, -1))\n", - "At = A.T\n", - "B = np.dot(A, At)\n", - "print(f\"матрица B =\\n{B}\\nразмер = {B.shape}\\nопределитель = {np.linalg.det(B):1.0f} => обратная матрица не существует\")" - ] - }, - { - "cell_type": "markdown", - "id": "957e860b-9d20-46cf-91b0-2662c2faeebb", - "metadata": {}, - "source": [ - "### 10. Инициализируйте генератор случайных числе с помощью объекта seed, равного 42." - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "97c0fa11-6530-461b-bb89-65cb6d38b83f", - "metadata": {}, - "outputs": [], - "source": [ - "seed(42)" - ] - }, - { - "cell_type": "markdown", - "id": "7d7b04ea-a205-4a64-bf93-f68c62b69b5f", - "metadata": {}, - "source": [ - "### 11. Создайте одномерный массив c, составленный из последовательности 16-ти случайных равномерно распределенных целых чисел от 0 до 16 невключительно." - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "id": "b307b5d5-e3aa-44b8-94f3-2a9b8877fd5b", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 16]\n" - ] - } - ], - "source": [ - "c = np.linspace(start=0, stop=16, num=16, dtype=int)\n", - "print(c)" - ] - }, - { - "cell_type": "markdown", - "id": "3107f89b-c513-4eeb-b21f-d091920b51b3", - "metadata": {}, - "source": [ - "### 12. Поменяйте его форму так, чтобы получилась квадратная матрица C. Получите матрицу D, поэлементно прибавив матрицу B из предыдущего вопроса к матрице C, умноженной на 10. Вычислите определитель, ранг и обратную матрицу D_inv для D." - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "id": "721efedc-bcdd-4aa0-b7bf-58469b31fbf2", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "C =\n", - "[[ 0 1 2 3]\n", - " [ 4 5 6 7]\n", - " [ 8 9 10 11]\n", - " [12 13 14 16]]\n", - "D =\n", - "[[ 5 24 43 62]\n", - " [ 54 100 146 192]\n", - " [103 176 249 322]\n", - " [152 252 352 462]]\n", - "определитель = 2.2623680706601445e-10\n", - "ранг = 3\n", - "D_inv =\n", - "[[-3.51843721e+13 7.03687442e+13 -3.51843721e+13 -3.12500000e-03]\n", - " [ 7.03687442e+13 -1.40737488e+14 7.03687442e+13 1.06250000e-01]\n", - " [-3.51843721e+13 7.03687442e+13 -3.51843721e+13 -2.03125000e-01]\n", - " [ 2.00000000e-01 -3.00000000e-01 -0.00000000e+00 1.00000000e-01]]\n" - ] - } - ], - "source": [ - "C = np.reshape(c, (4, -1))\n", - "print(f\"C =\\n{C}\")\n", - "D = B + C * 10\n", - "D_inv = np.linalg.inv(D)\n", - "print(f\"D =\\n{D}\\nопределитель = {np.linalg.det(D)}\\nранг = {np.linalg.matrix_rank(D)}\\nD_inv =\\n{D_inv}\")" - ] - }, - { - "cell_type": "markdown", - "id": "05de35ef-439f-4273-8902-9fbf5d1dd878", - "metadata": {}, - "source": [ - "### 13. Приравняйте к нулю отрицательные числа в матрице D_inv, а положительные - к единице. Убедитесь, что в матрице D_inv остались только нули и единицы. С помощью функции numpy.where, используя матрицу D_inv в качестве маски, а матрицы B и C - в качестве источников данных, получите матрицу E размером 4x4. Элементы матрицы E, для которых соответствующий элемент матрицы D_inv равен 1, должны быть равны соответствующему элементу матрицы B, а элементы матрицы E, для которых соответствующий элемент матрицы D_inv равен 0, должны быть равны соответствующему элементу матрицы C." - ] - }, - { - "cell_type": "code", - "execution_count": 75, - "id": "ae14bda8-3197-4ce6-8ed7-9227f40ee440", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "D_inv =\n", - "[[ 0. 1. 0. 0.]\n", - " [ 1. 0. 1. 1.]\n", - " [ 0. 1. 0. 0.]\n", - " [ 1. 0. -0. 1.]]\n", - "E =\n", - "[[ 0. 14. 2. 3.]\n", - " [ 14. 5. 86. 122.]\n", - " [ 8. 86. 10. 11.]\n", - " [ 32. 13. 14. 302.]]\n" - ] - } - ], - "source": [ - "D_inv[np.where(D_inv < 0)] = 0\n", - "D_inv[np.where(D_inv > 0)] = 1\n", - "print(f\"D_inv =\\n{D_inv}\")\n", - "\n", - "E = np.zeros((4, 4))\n", - "E[np.where(D_inv == 0)] = C[np.where(D_inv == 0)]\n", - "E[np.where(D_inv == 1)] = B[np.where(D_inv == 1)]\n", - "print(f\"E =\\n{E}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e8ecf6a0-078c-4d26-9aa0-8d84514df3e7", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.3" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -}