{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Indukcja drzew decyzyjnych" ] }, { "cell_type": "code", "execution_count": 103, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SepalLengthSepalWidthPetalLengthPetalWidthName
05.13.51.40.2Iris-setosa
14.93.01.40.2Iris-setosa
24.73.21.30.2Iris-setosa
34.63.11.50.2Iris-setosa
45.03.61.40.2Iris-setosa
55.43.91.70.4Iris-setosa
64.63.41.40.3Iris-setosa
75.03.41.50.2Iris-setosa
84.42.91.40.2Iris-setosa
94.93.11.50.1Iris-setosa
105.43.71.50.2Iris-setosa
114.83.41.60.2Iris-setosa
124.83.01.40.1Iris-setosa
134.33.01.10.1Iris-setosa
145.84.01.20.2Iris-setosa
155.74.41.50.4Iris-setosa
165.43.91.30.4Iris-setosa
175.13.51.40.3Iris-setosa
185.73.81.70.3Iris-setosa
195.13.81.50.3Iris-setosa
205.43.41.70.2Iris-setosa
215.13.71.50.4Iris-setosa
224.63.61.00.2Iris-setosa
235.13.31.70.5Iris-setosa
244.83.41.90.2Iris-setosa
255.03.01.60.2Iris-setosa
265.03.41.60.4Iris-setosa
275.23.51.50.2Iris-setosa
285.23.41.40.2Iris-setosa
294.73.21.60.2Iris-setosa
..................
1206.93.25.72.3Iris-virginica
1215.62.84.92.0Iris-virginica
1227.72.86.72.0Iris-virginica
1236.32.74.91.8Iris-virginica
1246.73.35.72.1Iris-virginica
1257.23.26.01.8Iris-virginica
1266.22.84.81.8Iris-virginica
1276.13.04.91.8Iris-virginica
1286.42.85.62.1Iris-virginica
1297.23.05.81.6Iris-virginica
1307.42.86.11.9Iris-virginica
1317.93.86.42.0Iris-virginica
1326.42.85.62.2Iris-virginica
1336.32.85.11.5Iris-virginica
1346.12.65.61.4Iris-virginica
1357.73.06.12.3Iris-virginica
1366.33.45.62.4Iris-virginica
1376.43.15.51.8Iris-virginica
1386.03.04.81.8Iris-virginica
1396.93.15.42.1Iris-virginica
1406.73.15.62.4Iris-virginica
1416.93.15.12.3Iris-virginica
1425.82.75.11.9Iris-virginica
1436.83.25.92.3Iris-virginica
1446.73.35.72.5Iris-virginica
1456.73.05.22.3Iris-virginica
1466.32.55.01.9Iris-virginica
1476.53.05.22.0Iris-virginica
1486.23.45.42.3Iris-virginica
1495.93.05.11.8Iris-virginica
\n", "

150 rows × 5 columns

\n", "
" ], "text/plain": [ " SepalLength SepalWidth PetalLength PetalWidth Name\n", "0 5.1 3.5 1.4 0.2 Iris-setosa\n", "1 4.9 3.0 1.4 0.2 Iris-setosa\n", "2 4.7 3.2 1.3 0.2 Iris-setosa\n", "3 4.6 3.1 1.5 0.2 Iris-setosa\n", "4 5.0 3.6 1.4 0.2 Iris-setosa\n", "5 5.4 3.9 1.7 0.4 Iris-setosa\n", "6 4.6 3.4 1.4 0.3 Iris-setosa\n", "7 5.0 3.4 1.5 0.2 Iris-setosa\n", "8 4.4 2.9 1.4 0.2 Iris-setosa\n", "9 4.9 3.1 1.5 0.1 Iris-setosa\n", "10 5.4 3.7 1.5 0.2 Iris-setosa\n", "11 4.8 3.4 1.6 0.2 Iris-setosa\n", "12 4.8 3.0 1.4 0.1 Iris-setosa\n", "13 4.3 3.0 1.1 0.1 Iris-setosa\n", "14 5.8 4.0 1.2 0.2 Iris-setosa\n", "15 5.7 4.4 1.5 0.4 Iris-setosa\n", "16 5.4 3.9 1.3 0.4 Iris-setosa\n", "17 5.1 3.5 1.4 0.3 Iris-setosa\n", "18 5.7 3.8 1.7 0.3 Iris-setosa\n", "19 5.1 3.8 1.5 0.3 Iris-setosa\n", "20 5.4 3.4 1.7 0.2 Iris-setosa\n", "21 5.1 3.7 1.5 0.4 Iris-setosa\n", "22 4.6 3.6 1.0 0.2 Iris-setosa\n", "23 5.1 3.3 1.7 0.5 Iris-setosa\n", "24 4.8 3.4 1.9 0.2 Iris-setosa\n", "25 5.0 3.0 1.6 0.2 Iris-setosa\n", "26 5.0 3.4 1.6 0.4 Iris-setosa\n", "27 5.2 3.5 1.5 0.2 Iris-setosa\n", "28 5.2 3.4 1.4 0.2 Iris-setosa\n", "29 4.7 3.2 1.6 0.2 Iris-setosa\n", ".. ... ... ... ... ...\n", "120 6.9 3.2 5.7 2.3 Iris-virginica\n", "121 5.6 2.8 4.9 2.0 Iris-virginica\n", "122 7.7 2.8 6.7 2.0 Iris-virginica\n", "123 6.3 2.7 4.9 1.8 Iris-virginica\n", "124 6.7 3.3 5.7 2.1 Iris-virginica\n", "125 7.2 3.2 6.0 1.8 Iris-virginica\n", "126 6.2 2.8 4.8 1.8 Iris-virginica\n", "127 6.1 3.0 4.9 1.8 Iris-virginica\n", "128 6.4 2.8 5.6 2.1 Iris-virginica\n", "129 7.2 3.0 5.8 1.6 Iris-virginica\n", "130 7.4 2.8 6.1 1.9 Iris-virginica\n", "131 7.9 3.8 6.4 2.0 Iris-virginica\n", "132 6.4 2.8 5.6 2.2 Iris-virginica\n", "133 6.3 2.8 5.1 1.5 Iris-virginica\n", "134 6.1 2.6 5.6 1.4 Iris-virginica\n", "135 7.7 3.0 6.1 2.3 Iris-virginica\n", "136 6.3 3.4 5.6 2.4 Iris-virginica\n", "137 6.4 3.1 5.5 1.8 Iris-virginica\n", "138 6.0 3.0 4.8 1.8 Iris-virginica\n", "139 6.9 3.1 5.4 2.1 Iris-virginica\n", "140 6.7 3.1 5.6 2.4 Iris-virginica\n", "141 6.9 3.1 5.1 2.3 Iris-virginica\n", "142 5.8 2.7 5.1 1.9 Iris-virginica\n", "143 6.8 3.2 5.9 2.3 Iris-virginica\n", "144 6.7 3.3 5.7 2.5 Iris-virginica\n", "145 6.7 3.0 5.2 2.3 Iris-virginica\n", "146 6.3 2.5 5.0 1.9 Iris-virginica\n", "147 6.5 3.0 5.2 2.0 Iris-virginica\n", "148 6.2 3.4 5.4 2.3 Iris-virginica\n", "149 5.9 3.0 5.1 1.8 Iris-virginica\n", "\n", "[150 rows x 5 columns]" ] }, "execution_count": 103, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from __future__ import print_function\n", " \n", "import os\n", "import subprocess\n", " \n", "import pandas as pd\n", "import numpy as np\n", "from sklearn.tree import DecisionTreeClassifier, export_graphviz\n", " \n", "fn = \"https://raw.githubusercontent.com/pydata/pandas/master/pandas/tests/data/iris.csv\"\n", " \n", "try:\n", " df = pd.read_csv(fn)\n", "except:\n", " exit(\"Błąd pobierania pliku iris.csv\")\n", " \n", "df" ] }, { "cell_type": "code", "execution_count": 104, "metadata": { "collapsed": true }, "outputs": [], "source": [ "df2 = df.copy()\n", "\n", "targets = df2[\"Name\"].unique()\n", "map_to_int = {name: n for n, name in enumerate(targets)}\n", "features = list(df2.columns[:4])\n", "\n", "df2[\"Target\"] = df2[\"Name\"].replace(map_to_int)" ] }, { "cell_type": "code", "execution_count": 105, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,\n", " max_features=None, max_leaf_nodes=None,\n", " min_impurity_split=1e-07, min_samples_leaf=1,\n", " min_samples_split=2, min_weight_fraction_leaf=0.0,\n", " presort=False, random_state=999, splitter='best')" ] }, "execution_count": 105, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y = df2[\"Target\"]\n", "X = df2[features]\n", "\n", "dt = DecisionTreeClassifier(random_state=999)\n", "dt.fit(X, y)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Zadanie 1: Normalizacja atrybutów" ] }, { "cell_type": "code", "execution_count": 106, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SepalLengthSepalWidthPetalLengthPetalWidthNameTarget
05.13.50.0677970.041667Iris-setosa0
14.93.00.0677970.041667Iris-setosa0
24.73.20.0508470.041667Iris-setosa0
34.63.10.0847460.041667Iris-setosa0
45.03.60.0677970.041667Iris-setosa0
55.43.90.1186440.125000Iris-setosa0
64.63.40.0677970.083333Iris-setosa0
75.03.40.0847460.041667Iris-setosa0
84.42.90.0677970.041667Iris-setosa0
94.93.10.0847460.000000Iris-setosa0
105.43.70.0847460.041667Iris-setosa0
114.83.40.1016950.041667Iris-setosa0
124.83.00.0677970.000000Iris-setosa0
134.33.00.0169490.000000Iris-setosa0
145.84.00.0338980.041667Iris-setosa0
155.74.40.0847460.125000Iris-setosa0
165.43.90.0508470.125000Iris-setosa0
175.13.50.0677970.083333Iris-setosa0
185.73.80.1186440.083333Iris-setosa0
195.13.80.0847460.083333Iris-setosa0
205.43.40.1186440.041667Iris-setosa0
215.13.70.0847460.125000Iris-setosa0
224.63.60.0000000.041667Iris-setosa0
235.13.30.1186440.166667Iris-setosa0
244.83.40.1525420.041667Iris-setosa0
255.03.00.1016950.041667Iris-setosa0
265.03.40.1016950.125000Iris-setosa0
275.23.50.0847460.041667Iris-setosa0
285.23.40.0677970.041667Iris-setosa0
294.73.20.1016950.041667Iris-setosa0
.....................
1206.93.20.7966100.916667Iris-virginica2
1215.62.80.6610170.791667Iris-virginica2
1227.72.80.9661020.791667Iris-virginica2
1236.32.70.6610170.708333Iris-virginica2
1246.73.30.7966100.833333Iris-virginica2
1257.23.20.8474580.708333Iris-virginica2
1266.22.80.6440680.708333Iris-virginica2
1276.13.00.6610170.708333Iris-virginica2
1286.42.80.7796610.833333Iris-virginica2
1297.23.00.8135590.625000Iris-virginica2
1307.42.80.8644070.750000Iris-virginica2
1317.93.80.9152540.791667Iris-virginica2
1326.42.80.7796610.875000Iris-virginica2
1336.32.80.6949150.583333Iris-virginica2
1346.12.60.7796610.541667Iris-virginica2
1357.73.00.8644070.916667Iris-virginica2
1366.33.40.7796610.958333Iris-virginica2
1376.43.10.7627120.708333Iris-virginica2
1386.03.00.6440680.708333Iris-virginica2
1396.93.10.7457630.833333Iris-virginica2
1406.73.10.7796610.958333Iris-virginica2
1416.93.10.6949150.916667Iris-virginica2
1425.82.70.6949150.750000Iris-virginica2
1436.83.20.8305080.916667Iris-virginica2
1446.73.30.7966101.000000Iris-virginica2
1456.73.00.7118640.916667Iris-virginica2
1466.32.50.6779660.750000Iris-virginica2
1476.53.00.7118640.791667Iris-virginica2
1486.23.40.7457630.916667Iris-virginica2
1495.93.00.6949150.708333Iris-virginica2
\n", "

150 rows × 6 columns

\n", "
" ], "text/plain": [ " SepalLength SepalWidth PetalLength PetalWidth Name Target\n", "0 5.1 3.5 0.067797 0.041667 Iris-setosa 0\n", "1 4.9 3.0 0.067797 0.041667 Iris-setosa 0\n", "2 4.7 3.2 0.050847 0.041667 Iris-setosa 0\n", "3 4.6 3.1 0.084746 0.041667 Iris-setosa 0\n", "4 5.0 3.6 0.067797 0.041667 Iris-setosa 0\n", "5 5.4 3.9 0.118644 0.125000 Iris-setosa 0\n", "6 4.6 3.4 0.067797 0.083333 Iris-setosa 0\n", "7 5.0 3.4 0.084746 0.041667 Iris-setosa 0\n", "8 4.4 2.9 0.067797 0.041667 Iris-setosa 0\n", "9 4.9 3.1 0.084746 0.000000 Iris-setosa 0\n", "10 5.4 3.7 0.084746 0.041667 Iris-setosa 0\n", "11 4.8 3.4 0.101695 0.041667 Iris-setosa 0\n", "12 4.8 3.0 0.067797 0.000000 Iris-setosa 0\n", "13 4.3 3.0 0.016949 0.000000 Iris-setosa 0\n", "14 5.8 4.0 0.033898 0.041667 Iris-setosa 0\n", "15 5.7 4.4 0.084746 0.125000 Iris-setosa 0\n", "16 5.4 3.9 0.050847 0.125000 Iris-setosa 0\n", "17 5.1 3.5 0.067797 0.083333 Iris-setosa 0\n", "18 5.7 3.8 0.118644 0.083333 Iris-setosa 0\n", "19 5.1 3.8 0.084746 0.083333 Iris-setosa 0\n", "20 5.4 3.4 0.118644 0.041667 Iris-setosa 0\n", "21 5.1 3.7 0.084746 0.125000 Iris-setosa 0\n", "22 4.6 3.6 0.000000 0.041667 Iris-setosa 0\n", "23 5.1 3.3 0.118644 0.166667 Iris-setosa 0\n", "24 4.8 3.4 0.152542 0.041667 Iris-setosa 0\n", "25 5.0 3.0 0.101695 0.041667 Iris-setosa 0\n", "26 5.0 3.4 0.101695 0.125000 Iris-setosa 0\n", "27 5.2 3.5 0.084746 0.041667 Iris-setosa 0\n", "28 5.2 3.4 0.067797 0.041667 Iris-setosa 0\n", "29 4.7 3.2 0.101695 0.041667 Iris-setosa 0\n", ".. ... ... ... ... ... ...\n", "120 6.9 3.2 0.796610 0.916667 Iris-virginica 2\n", "121 5.6 2.8 0.661017 0.791667 Iris-virginica 2\n", "122 7.7 2.8 0.966102 0.791667 Iris-virginica 2\n", "123 6.3 2.7 0.661017 0.708333 Iris-virginica 2\n", "124 6.7 3.3 0.796610 0.833333 Iris-virginica 2\n", "125 7.2 3.2 0.847458 0.708333 Iris-virginica 2\n", "126 6.2 2.8 0.644068 0.708333 Iris-virginica 2\n", "127 6.1 3.0 0.661017 0.708333 Iris-virginica 2\n", "128 6.4 2.8 0.779661 0.833333 Iris-virginica 2\n", "129 7.2 3.0 0.813559 0.625000 Iris-virginica 2\n", "130 7.4 2.8 0.864407 0.750000 Iris-virginica 2\n", "131 7.9 3.8 0.915254 0.791667 Iris-virginica 2\n", "132 6.4 2.8 0.779661 0.875000 Iris-virginica 2\n", "133 6.3 2.8 0.694915 0.583333 Iris-virginica 2\n", "134 6.1 2.6 0.779661 0.541667 Iris-virginica 2\n", "135 7.7 3.0 0.864407 0.916667 Iris-virginica 2\n", "136 6.3 3.4 0.779661 0.958333 Iris-virginica 2\n", "137 6.4 3.1 0.762712 0.708333 Iris-virginica 2\n", "138 6.0 3.0 0.644068 0.708333 Iris-virginica 2\n", "139 6.9 3.1 0.745763 0.833333 Iris-virginica 2\n", "140 6.7 3.1 0.779661 0.958333 Iris-virginica 2\n", "141 6.9 3.1 0.694915 0.916667 Iris-virginica 2\n", "142 5.8 2.7 0.694915 0.750000 Iris-virginica 2\n", "143 6.8 3.2 0.830508 0.916667 Iris-virginica 2\n", "144 6.7 3.3 0.796610 1.000000 Iris-virginica 2\n", "145 6.7 3.0 0.711864 0.916667 Iris-virginica 2\n", "146 6.3 2.5 0.677966 0.750000 Iris-virginica 2\n", "147 6.5 3.0 0.711864 0.791667 Iris-virginica 2\n", "148 6.2 3.4 0.745763 0.916667 Iris-virginica 2\n", "149 5.9 3.0 0.694915 0.708333 Iris-virginica 2\n", "\n", "[150 rows x 6 columns]" ] }, "execution_count": 106, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.preprocessing import MinMaxScaler\n", "\n", "min_max_scaler = MinMaxScaler()\n", "X_norm = min_max_scaler.fit_transform(X[['PetalLength','PetalWidth']])\n", "\n", "df2[['PetalLength','PetalWidth']] = X_norm\n", "df2" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Zadanie 2: Podział na zbiór uczący i testujący" ] }, { "cell_type": "code", "execution_count": 107, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SepalLengthSepalWidthPetalLengthPetalWidth
105.43.70.0847460.041667
916.13.00.6101690.541667
365.53.50.0508470.041667
507.03.20.6271190.541667
836.02.70.6949150.625000
945.62.70.5423730.500000
776.73.00.6779660.666667
815.52.40.4576270.375000
1307.42.80.8644070.750000
885.63.00.5254240.500000
766.82.80.6440680.541667
94.93.10.0847460.000000
1215.62.80.6610170.791667
55.43.90.1186440.125000
1376.43.10.7627120.708333
374.93.10.0847460.000000
1126.83.00.7627120.833333
1097.23.60.8644071.000000
1436.83.20.8305080.916667
84.42.90.0677970.041667
294.73.20.1016950.041667
224.63.60.0000000.041667
665.63.00.5932200.583333
716.12.80.5084750.500000
124.83.00.0677970.000000
114.83.40.1016950.041667
876.32.30.5762710.500000
304.83.10.1016950.041667
1266.22.80.6440680.708333
1425.82.70.6949150.750000
...............
795.72.60.4237290.375000
1106.53.20.6949150.791667
414.52.30.0508470.083333
574.92.40.3898310.375000
05.13.50.0677970.041667
1156.43.20.7288140.916667
586.62.90.6101690.500000
1086.72.50.8135590.708333
786.02.90.5932200.583333
546.52.80.6101690.583333
465.13.80.1016950.041667
235.13.30.1186440.166667
335.54.20.0677970.041667
155.74.40.0847460.125000
1236.32.70.6610170.708333
424.43.20.0508470.041667
756.63.00.5762710.541667
1196.02.20.6779660.583333
454.83.00.0677970.083333
726.32.50.6610170.583333
866.73.10.6271190.583333
615.93.00.5423730.583333
435.03.50.1016950.208333
175.13.50.0677970.083333
75.03.40.0847460.041667
1466.32.50.6779660.750000
965.72.90.5423730.500000
1206.93.20.7966100.916667
165.43.90.0508470.125000
14.93.00.0677970.041667
\n", "

90 rows × 4 columns

\n", "
" ], "text/plain": [ " SepalLength SepalWidth PetalLength PetalWidth\n", "10 5.4 3.7 0.084746 0.041667\n", "91 6.1 3.0 0.610169 0.541667\n", "36 5.5 3.5 0.050847 0.041667\n", "50 7.0 3.2 0.627119 0.541667\n", "83 6.0 2.7 0.694915 0.625000\n", "94 5.6 2.7 0.542373 0.500000\n", "77 6.7 3.0 0.677966 0.666667\n", "81 5.5 2.4 0.457627 0.375000\n", "130 7.4 2.8 0.864407 0.750000\n", "88 5.6 3.0 0.525424 0.500000\n", "76 6.8 2.8 0.644068 0.541667\n", "9 4.9 3.1 0.084746 0.000000\n", "121 5.6 2.8 0.661017 0.791667\n", "5 5.4 3.9 0.118644 0.125000\n", "137 6.4 3.1 0.762712 0.708333\n", "37 4.9 3.1 0.084746 0.000000\n", "112 6.8 3.0 0.762712 0.833333\n", "109 7.2 3.6 0.864407 1.000000\n", "143 6.8 3.2 0.830508 0.916667\n", "8 4.4 2.9 0.067797 0.041667\n", "29 4.7 3.2 0.101695 0.041667\n", "22 4.6 3.6 0.000000 0.041667\n", "66 5.6 3.0 0.593220 0.583333\n", "71 6.1 2.8 0.508475 0.500000\n", "12 4.8 3.0 0.067797 0.000000\n", "11 4.8 3.4 0.101695 0.041667\n", "87 6.3 2.3 0.576271 0.500000\n", "30 4.8 3.1 0.101695 0.041667\n", "126 6.2 2.8 0.644068 0.708333\n", "142 5.8 2.7 0.694915 0.750000\n", ".. ... ... ... ...\n", "79 5.7 2.6 0.423729 0.375000\n", "110 6.5 3.2 0.694915 0.791667\n", "41 4.5 2.3 0.050847 0.083333\n", "57 4.9 2.4 0.389831 0.375000\n", "0 5.1 3.5 0.067797 0.041667\n", "115 6.4 3.2 0.728814 0.916667\n", "58 6.6 2.9 0.610169 0.500000\n", "108 6.7 2.5 0.813559 0.708333\n", "78 6.0 2.9 0.593220 0.583333\n", "54 6.5 2.8 0.610169 0.583333\n", "46 5.1 3.8 0.101695 0.041667\n", "23 5.1 3.3 0.118644 0.166667\n", "33 5.5 4.2 0.067797 0.041667\n", "15 5.7 4.4 0.084746 0.125000\n", "123 6.3 2.7 0.661017 0.708333\n", "42 4.4 3.2 0.050847 0.041667\n", "75 6.6 3.0 0.576271 0.541667\n", "119 6.0 2.2 0.677966 0.583333\n", "45 4.8 3.0 0.067797 0.083333\n", "72 6.3 2.5 0.661017 0.583333\n", "86 6.7 3.1 0.627119 0.583333\n", "61 5.9 3.0 0.542373 0.583333\n", "43 5.0 3.5 0.101695 0.208333\n", "17 5.1 3.5 0.067797 0.083333\n", "7 5.0 3.4 0.084746 0.041667\n", "146 6.3 2.5 0.677966 0.750000\n", "96 5.7 2.9 0.542373 0.500000\n", "120 6.9 3.2 0.796610 0.916667\n", "16 5.4 3.9 0.050847 0.125000\n", "1 4.9 3.0 0.067797 0.041667\n", "\n", "[90 rows x 4 columns]" ] }, "execution_count": 107, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.model_selection import train_test_split\n", "\n", "y = df2[\"Target\"]\n", "X = df2[features]\n", "\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)\n", "X_train" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Zadanie 3: Wyświetlenie macierzy pomyłek i dokładności" ] }, { "cell_type": "code", "execution_count": 108, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 0.883333333333\n" ] }, { "data": { "text/plain": [ "array([[16, 0, 0],\n", " [ 0, 17, 4],\n", " [ 0, 3, 20]])" ] }, "execution_count": 108, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.metrics import confusion_matrix\n", "from sklearn.metrics import accuracy_score\n", "\n", "dt.fit(X_train, y_train)\n", "y_pred = dt.predict(X_test)\n", "\n", "y_true = y_test\n", "\n", "dt_acc_score = accuracy_score(y_true, y_pred)\n", "\n", "print('Accuracy: ', dt_acc_score)\n", "confusion_matrix(y_true, y_pred)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Zadanie 4: Wyświetlenie ważności atrybutów" ] }, { "cell_type": "code", "execution_count": 111, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "PetalLength \t: 0.531673666862\n", "PetalWidth \t: 0.405659010186\n", "SepalWidth \t: 0.0323396173145\n", "SepalLength \t: 0.0303277056383\n" ] } ], "source": [ "feature_importance = zip(features, dt.feature_importances_)\n", "feature_importance = sorted(feature_importance, key=lambda x:x[1], reverse=True)\n", "\n", "for (feature, importance) in feature_importance:\n", " print(feature, '\\t:', importance)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Zadanie 5: Zamiana klasyfikatora na RandomForest" ] }, { "cell_type": "code", "execution_count": 110, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 0.933333333333\n" ] }, { "data": { "text/plain": [ "array([[16, 0, 0],\n", " [ 0, 20, 1],\n", " [ 0, 3, 20]])" ] }, "execution_count": 110, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.ensemble import RandomForestClassifier\n", "\n", "rf = RandomForestClassifier(n_estimators=100, random_state=999)\n", "rf.fit(X_train, y_train)\n", "z_pred = rf.predict(X_test)\n", "\n", "rf_acc_score = accuracy_score(y_true, z_pred)\n", "\n", "print('Accuracy: ', rf_acc_score)\n", "confusion_matrix(y_true, z_pred)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.3" } }, "nbformat": 4, "nbformat_minor": 2 }