{ "cells": [ { "cell_type": "markdown", "id": "2c90de90-1e3e-4bf1-8705-b2b85ed8a387", "metadata": {}, "source": [ "# Regression : Understanding effect and cause" ] }, { "cell_type": "markdown", "id": "49d86280-47eb-471e-becf-33affb9f56c5", "metadata": {}, "source": [ "## Imports" ] }, { "cell_type": "code", "execution_count": null, "id": "5cb2af73-246e-4a95-ad43-1816b79d3985", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import scipy as sp\n", "import matplotlib.pyplot as plt\n", "import pandas as pd\n", "from sklearn.linear_model import LinearRegression, LogisticRegression\n", "from sklearn.preprocessing import StandardScaler, LabelEncoder, OrdinalEncoder\n", "from sklearn.pipeline import Pipeline, make_pipeline\n", "import statsmodels.api as sm\n", "import seaborn as sns" ] }, { "cell_type": "code", "execution_count": null, "id": "f1e147e1-14c7-4dbf-beab-02a3ed42c760", "metadata": {}, "outputs": [], "source": [ "sns.set()" ] }, { "cell_type": "markdown", "id": "17a98d1e-3959-405c-bc52-ffcc9f53fb9a", "metadata": {}, "source": [ "## Credit Score Rating Example" ] }, { "cell_type": "code", "execution_count": null, "id": "3f142627-eaa6-40f9-830b-90594c450469", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>Income</th>\n", " <th>Rating</th>\n", " <th>Cards</th>\n", " <th>Age</th>\n", " <th>Education</th>\n", " <th>Gender</th>\n", " <th>Student</th>\n", " <th>Married</th>\n", " <th>Ethnicity</th>\n", " <th>Balance</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>14.891</td>\n", " <td>283</td>\n", " <td>2</td>\n", " <td>34</td>\n", " <td>11</td>\n", " <td>Male</td>\n", " <td>No</td>\n", " <td>Yes</td>\n", " <td>Caucasian</td>\n", " <td>333</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>106.025</td>\n", " <td>483</td>\n", " <td>3</td>\n", " <td>82</td>\n", " <td>15</td>\n", " <td>Female</td>\n", " <td>Yes</td>\n", " <td>Yes</td>\n", " <td>Asian</td>\n", " <td>903</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>104.593</td>\n", " <td>514</td>\n", " <td>4</td>\n", " <td>71</td>\n", " <td>11</td>\n", " <td>Male</td>\n", " <td>No</td>\n", " <td>No</td>\n", " <td>Asian</td>\n", " <td>580</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>148.924</td>\n", " <td>681</td>\n", " <td>3</td>\n", " <td>36</td>\n", " <td>11</td>\n", " <td>Female</td>\n", " <td>No</td>\n", " <td>No</td>\n", " <td>Asian</td>\n", " <td>964</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>55.882</td>\n", " <td>357</td>\n", " <td>2</td>\n", " <td>68</td>\n", " <td>16</td>\n", " <td>Male</td>\n", " <td>No</td>\n", " <td>Yes</td>\n", " <td>Caucasian</td>\n", " <td>331</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " Income Rating Cards Age Education Gender Student Married Ethnicity \\\n", "0 14.891 283 2 34 11 Male No Yes Caucasian \n", "1 106.025 483 3 82 15 Female Yes Yes Asian \n", "2 104.593 514 4 71 11 Male No No Asian \n", "3 148.924 681 3 36 11 Female No No Asian \n", "4 55.882 357 2 68 16 Male No Yes Caucasian \n", "\n", " Balance \n", "0 333 \n", "1 903 \n", "2 580 \n", "3 964 \n", "4 331 " ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_credscore = pd.read_csv(\"DATA_3.01_CREDIT.csv\", dtype={'Gender':'category', \n", " 'Student':'category',\n", " 'Married':'category',\n", " 'Ethnicity':'category'\n", " });df_credscore.head()" ] }, { "cell_type": "code", "execution_count": null, "id": "58c58a15-5422-4cd9-9ea0-11e2eb912f98", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "<class 'pandas.core.frame.DataFrame'>\n", "RangeIndex: 300 entries, 0 to 299\n", "Data columns (total 10 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 Income 300 non-null float64 \n", " 1 Rating 300 non-null int64 \n", " 2 Cards 300 non-null int64 \n", " 3 Age 300 non-null int64 \n", " 4 Education 300 non-null int64 \n", " 5 Gender 300 non-null category\n", " 6 Student 300 non-null category\n", " 7 Married 300 non-null category\n", " 8 Ethnicity 300 non-null category\n", " 9 Balance 300 non-null int64 \n", "dtypes: category(4), float64(1), int64(5)\n", "memory usage: 15.6 KB\n" ] } ], "source": [ "df_credscore.info()" ] }, { "cell_type": "code", "execution_count": null, "id": "cdb64a98-e23c-41ef-b0c0-5f672ac69e7b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>Income</th>\n", " <th>Rating</th>\n", " <th>Cards</th>\n", " <th>Age</th>\n", " <th>Education</th>\n", " <th>Gender</th>\n", " <th>Student</th>\n", " <th>Married</th>\n", " <th>Ethnicity</th>\n", " <th>Balance</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>count</th>\n", " <td>300.000000</td>\n", " <td>300.000000</td>\n", " <td>300.000000</td>\n", " <td>300.000000</td>\n", " <td>300.000000</td>\n", " <td>300</td>\n", " <td>300</td>\n", " <td>300</td>\n", " <td>300</td>\n", " <td>300.000000</td>\n", " </tr>\n", " <tr>\n", " <th>unique</th>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>2</td>\n", " <td>2</td>\n", " <td>2</td>\n", " <td>3</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>top</th>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>Female</td>\n", " <td>No</td>\n", " <td>Yes</td>\n", " <td>Caucasian</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>freq</th>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>168</td>\n", " <td>268</td>\n", " <td>183</td>\n", " <td>141</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>mean</th>\n", " <td>44.054393</td>\n", " <td>348.116667</td>\n", " <td>3.026667</td>\n", " <td>54.983333</td>\n", " <td>13.393333</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>502.686667</td>\n", " </tr>\n", " <tr>\n", " <th>std</th>\n", " <td>33.863066</td>\n", " <td>150.871547</td>\n", " <td>1.351064</td>\n", " <td>17.216982</td>\n", " <td>3.075193</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>466.991447</td>\n", " </tr>\n", " <tr>\n", " <th>min</th>\n", " <td>10.354000</td>\n", " <td>93.000000</td>\n", " <td>1.000000</td>\n", " <td>24.000000</td>\n", " <td>5.000000</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>0.000000</td>\n", " </tr>\n", " <tr>\n", " <th>25%</th>\n", " <td>21.027500</td>\n", " <td>235.000000</td>\n", " <td>2.000000</td>\n", " <td>41.000000</td>\n", " <td>11.000000</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>15.750000</td>\n", " </tr>\n", " <tr>\n", " <th>50%</th>\n", " <td>33.115500</td>\n", " <td>339.000000</td>\n", " <td>3.000000</td>\n", " <td>55.000000</td>\n", " <td>14.000000</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>433.500000</td>\n", " </tr>\n", " <tr>\n", " <th>75%</th>\n", " <td>55.975500</td>\n", " <td>433.000000</td>\n", " <td>4.000000</td>\n", " <td>69.000000</td>\n", " <td>16.000000</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>857.750000</td>\n", " </tr>\n", " <tr>\n", " <th>max</th>\n", " <td>186.634000</td>\n", " <td>949.000000</td>\n", " <td>8.000000</td>\n", " <td>91.000000</td>\n", " <td>20.000000</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>1809.000000</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " Income Rating Cards Age Education Gender \\\n", "count 300.000000 300.000000 300.000000 300.000000 300.000000 300 \n", "unique NaN NaN NaN NaN NaN 2 \n", "top NaN NaN NaN NaN NaN Female \n", "freq NaN NaN NaN NaN NaN 168 \n", "mean 44.054393 348.116667 3.026667 54.983333 13.393333 NaN \n", "std 33.863066 150.871547 1.351064 17.216982 3.075193 NaN \n", "min 10.354000 93.000000 1.000000 24.000000 5.000000 NaN \n", "25% 21.027500 235.000000 2.000000 41.000000 11.000000 NaN \n", "50% 33.115500 339.000000 3.000000 55.000000 14.000000 NaN \n", "75% 55.975500 433.000000 4.000000 69.000000 16.000000 NaN \n", "max 186.634000 949.000000 8.000000 91.000000 20.000000 NaN \n", "\n", " Student Married Ethnicity Balance \n", "count 300 300 300 300.000000 \n", "unique 2 2 3 NaN \n", "top No Yes Caucasian NaN \n", "freq 268 183 141 NaN \n", "mean NaN NaN NaN 502.686667 \n", "std NaN NaN NaN 466.991447 \n", "min NaN NaN NaN 0.000000 \n", "25% NaN NaN NaN 15.750000 \n", "50% NaN NaN NaN 433.500000 \n", "75% NaN NaN NaN 857.750000 \n", "max NaN NaN NaN 1809.000000 " ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_credscore.describe(include='all')" ] }, { "cell_type": "code", "execution_count": null, "id": "cf537297-931e-4db9-bba1-c24983cdc2b2", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>Income</th>\n", " <th>Rating</th>\n", " <th>Cards</th>\n", " <th>Age</th>\n", " <th>Education</th>\n", " <th>Balance</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>Income</th>\n", " <td>1.000000</td>\n", " <td>0.771167</td>\n", " <td>0.028875</td>\n", " <td>0.123201</td>\n", " <td>-0.070959</td>\n", " <td>0.432327</td>\n", " </tr>\n", " <tr>\n", " <th>Rating</th>\n", " <td>0.771167</td>\n", " <td>1.000000</td>\n", " <td>0.095854</td>\n", " <td>0.042377</td>\n", " <td>-0.095433</td>\n", " <td>0.859829</td>\n", " </tr>\n", " <tr>\n", " <th>Cards</th>\n", " <td>0.028875</td>\n", " <td>0.095854</td>\n", " <td>1.000000</td>\n", " <td>0.054655</td>\n", " <td>0.015176</td>\n", " <td>0.123846</td>\n", " </tr>\n", " <tr>\n", " <th>Age</th>\n", " <td>0.123201</td>\n", " <td>0.042377</td>\n", " <td>0.054655</td>\n", " <td>1.000000</td>\n", " <td>-0.046178</td>\n", " <td>-0.052426</td>\n", " </tr>\n", " <tr>\n", " <th>Education</th>\n", " <td>-0.070959</td>\n", " <td>-0.095433</td>\n", " <td>0.015176</td>\n", " <td>-0.046178</td>\n", " <td>1.000000</td>\n", " <td>-0.073167</td>\n", " </tr>\n", " <tr>\n", " <th>Balance</th>\n", " <td>0.432327</td>\n", " <td>0.859829</td>\n", " <td>0.123846</td>\n", " <td>-0.052426</td>\n", " <td>-0.073167</td>\n", " <td>1.000000</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " Income Rating Cards Age Education Balance\n", "Income 1.000000 0.771167 0.028875 0.123201 -0.070959 0.432327\n", "Rating 0.771167 1.000000 0.095854 0.042377 -0.095433 0.859829\n", "Cards 0.028875 0.095854 1.000000 0.054655 0.015176 0.123846\n", "Age 0.123201 0.042377 0.054655 1.000000 -0.046178 -0.052426\n", "Education -0.070959 -0.095433 0.015176 -0.046178 1.000000 -0.073167\n", "Balance 0.432327 0.859829 0.123846 -0.052426 -0.073167 1.000000" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_credscore.corr() # Individual correlations" ] }, { "cell_type": "code", "execution_count": null, "id": "a4f86f79-7ab6-4fcb-8f4c-60fa461990bb", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Income 0.771167\n", "Rating 1.000000\n", "Cards 0.095854\n", "Age 0.042377\n", "Education -0.095433\n", "Balance 0.859829\n", "Name: Rating, dtype: float64" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_credscore.corr()[\"Rating\"] # We need to understand interactions" ] }, { "cell_type": "code", "execution_count": null, "id": "71f8d364-b3c7-442b-9d1f-a0177b224326", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Pipeline(steps=[('ordinalencoder', OrdinalEncoder()),\n", " ('linearregression', LinearRegression())])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pipeline = make_pipeline(OrdinalEncoder(),LinearRegression()); pipeline" ] }, { "cell_type": "code", "execution_count": null, "id": "49bddc88-7e6e-4b6a-aa93-fb20e5755f17", "metadata": {}, "outputs": [], "source": [ "y = df_credscore.pop('Rating')\n", "X = df_credscore" ] }, { "cell_type": "code", "execution_count": null, "id": "738990d1-3603-4508-994f-6f7c9965e98d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "<class 'pandas.core.frame.DataFrame'>\n", "RangeIndex: 300 entries, 0 to 299\n", "Data columns (total 9 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 Income 300 non-null float64 \n", " 1 Cards 300 non-null int64 \n", " 2 Age 300 non-null int64 \n", " 3 Education 300 non-null int64 \n", " 4 Gender 300 non-null category\n", " 5 Student 300 non-null category\n", " 6 Married 300 non-null category\n", " 7 Ethnicity 300 non-null category\n", " 8 Balance 300 non-null int64 \n", "dtypes: category(4), float64(1), int64(4)\n", "memory usage: 13.3 KB\n" ] } ], "source": [ "X.info()" ] }, { "cell_type": "code", "execution_count": null, "id": "7483a8ff-7b00-482c-aa7e-a68aad62023b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Pipeline(steps=[('ordinalencoder', OrdinalEncoder()),\n", " ('linearregression', LinearRegression())])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pipeline.fit(X,y)" ] }, { "cell_type": "code", "execution_count": null, "id": "074306df-d771-4564-a985-569c6512e2cf", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "LinearRegression()" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model = pipeline['linearregression']\n", "model" ] }, { "cell_type": "code", "execution_count": null, "id": "23a58fef-1ec2-46a1-9e65-9e993981c9de", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([ 0.71532602, 1.41275992, 0.17419851, 0.61789045,\n", " 0.33006896, -91.64416173, 3.56809569, -2.47231507,\n", " 1.6260681 ])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.coef_" ] }, { "cell_type": "markdown", "id": "e75338a2-9086-461e-9f30-3a51fa87a542", "metadata": {}, "source": [ "### Statsmodel api" ] }, { "cell_type": "code", "execution_count": null, "id": "cc42cee7-4ae2-487c-ad8b-0e1d347f50ca", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>Income</th>\n", " <th>Cards</th>\n", " <th>Age</th>\n", " <th>Education</th>\n", " <th>Gender</th>\n", " <th>Student</th>\n", " <th>Married</th>\n", " <th>Ethnicity</th>\n", " <th>Balance</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>14.891</td>\n", " <td>2</td>\n", " <td>34</td>\n", " <td>11</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>1</td>\n", " <td>2</td>\n", " <td>333</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>106.025</td>\n", " <td>3</td>\n", " <td>82</td>\n", " <td>15</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>903</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>104.593</td>\n", " <td>4</td>\n", " <td>71</td>\n", " <td>11</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>1</td>\n", " <td>580</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>148.924</td>\n", " <td>3</td>\n", " <td>36</td>\n", " <td>11</td>\n", " <td>1</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>1</td>\n", " <td>964</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>55.882</td>\n", " <td>2</td>\n", " <td>68</td>\n", " <td>16</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>1</td>\n", " <td>2</td>\n", " <td>331</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>295</th>\n", " <td>27.272</td>\n", " <td>5</td>\n", " <td>67</td>\n", " <td>10</td>\n", " <td>1</td>\n", " <td>0</td>\n", " <td>1</td>\n", " <td>2</td>\n", " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>296</th>\n", " <td>65.896</td>\n", " <td>1</td>\n", " <td>49</td>\n", " <td>17</td>\n", " <td>1</td>\n", " <td>0</td>\n", " <td>1</td>\n", " <td>2</td>\n", " <td>293</td>\n", " </tr>\n", " <tr>\n", " <th>297</th>\n", " <td>55.054</td>\n", " <td>3</td>\n", " <td>74</td>\n", " <td>17</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>188</td>\n", " </tr>\n", " <tr>\n", " <th>298</th>\n", " <td>20.791</td>\n", " <td>1</td>\n", " <td>70</td>\n", " <td>18</td>\n", " <td>1</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>299</th>\n", " <td>24.919</td>\n", " <td>3</td>\n", " <td>76</td>\n", " <td>11</td>\n", " <td>1</td>\n", " <td>0</td>\n", " <td>1</td>\n", " <td>0</td>\n", " <td>711</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>300 rows × 9 columns</p>\n", "</div>" ], "text/plain": [ " Income Cards Age Education Gender Student Married Ethnicity \\\n", "0 14.891 2 34 11 0 0 1 2 \n", "1 106.025 3 82 15 1 1 1 1 \n", "2 104.593 4 71 11 0 0 0 1 \n", "3 148.924 3 36 11 1 0 0 1 \n", "4 55.882 2 68 16 0 0 1 2 \n", ".. ... ... ... ... ... ... ... ... \n", "295 27.272 5 67 10 1 0 1 2 \n", "296 65.896 1 49 17 1 0 1 2 \n", "297 55.054 3 74 17 0 0 1 1 \n", "298 20.791 1 70 18 1 0 0 0 \n", "299 24.919 3 76 11 1 0 1 0 \n", "\n", " Balance \n", "0 333 \n", "1 903 \n", "2 580 \n", "3 964 \n", "4 331 \n", ".. ... \n", "295 0 \n", "296 293 \n", "297 188 \n", "298 0 \n", "299 711 \n", "\n", "[300 rows x 9 columns]" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def preprocess_categories(df):\n", " df_out = df.copy()\n", " for col in df.dtypes[df.dtypes=='category'].index:\n", " df_out[col] = df[col].cat.codes\n", " return df_out\n", "\n", "preprocess_categories(X)" ] }, { "cell_type": "code", "execution_count": null, "id": "ad73cf4b-6928-4a3a-94db-b9751777b364", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>const</th>\n", " <th>Income</th>\n", " <th>Cards</th>\n", " <th>Age</th>\n", " <th>Education</th>\n", " <th>Gender</th>\n", " <th>Student</th>\n", " <th>Married</th>\n", " <th>Ethnicity</th>\n", " <th>Balance</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>1.0</td>\n", " <td>14.891</td>\n", " <td>2</td>\n", " <td>34</td>\n", " <td>11</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>1</td>\n", " <td>2</td>\n", " <td>333</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>1.0</td>\n", " <td>106.025</td>\n", " <td>3</td>\n", " <td>82</td>\n", " <td>15</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>903</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>1.0</td>\n", " <td>104.593</td>\n", " <td>4</td>\n", " <td>71</td>\n", " <td>11</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>1</td>\n", " <td>580</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>1.0</td>\n", " <td>148.924</td>\n", " <td>3</td>\n", " <td>36</td>\n", " <td>11</td>\n", " <td>1</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>1</td>\n", " <td>964</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>1.0</td>\n", " <td>55.882</td>\n", " <td>2</td>\n", " <td>68</td>\n", " <td>16</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>1</td>\n", " <td>2</td>\n", " <td>331</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>295</th>\n", " <td>1.0</td>\n", " <td>27.272</td>\n", " <td>5</td>\n", " <td>67</td>\n", " <td>10</td>\n", " <td>1</td>\n", " <td>0</td>\n", " <td>1</td>\n", " <td>2</td>\n", " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>296</th>\n", " <td>1.0</td>\n", " <td>65.896</td>\n", " <td>1</td>\n", " <td>49</td>\n", " <td>17</td>\n", " <td>1</td>\n", " <td>0</td>\n", " <td>1</td>\n", " <td>2</td>\n", " <td>293</td>\n", " </tr>\n", " <tr>\n", " <th>297</th>\n", " <td>1.0</td>\n", " <td>55.054</td>\n", " <td>3</td>\n", " <td>74</td>\n", " <td>17</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>188</td>\n", " </tr>\n", " <tr>\n", " <th>298</th>\n", " <td>1.0</td>\n", " <td>20.791</td>\n", " <td>1</td>\n", " <td>70</td>\n", " <td>18</td>\n", " <td>1</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>299</th>\n", " <td>1.0</td>\n", " <td>24.919</td>\n", " <td>3</td>\n", " <td>76</td>\n", " <td>11</td>\n", " <td>1</td>\n", " <td>0</td>\n", " <td>1</td>\n", " <td>0</td>\n", " <td>711</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>300 rows × 10 columns</p>\n", "</div>" ], "text/plain": [ " const Income Cards Age Education Gender Student Married \\\n", "0 1.0 14.891 2 34 11 0 0 1 \n", "1 1.0 106.025 3 82 15 1 1 1 \n", "2 1.0 104.593 4 71 11 0 0 0 \n", "3 1.0 148.924 3 36 11 1 0 0 \n", "4 1.0 55.882 2 68 16 0 0 1 \n", ".. ... ... ... ... ... ... ... ... \n", "295 1.0 27.272 5 67 10 1 0 1 \n", "296 1.0 65.896 1 49 17 1 0 1 \n", "297 1.0 55.054 3 74 17 0 0 1 \n", "298 1.0 20.791 1 70 18 1 0 0 \n", "299 1.0 24.919 3 76 11 1 0 1 \n", "\n", " Ethnicity Balance \n", "0 2 333 \n", "1 1 903 \n", "2 1 580 \n", "3 1 964 \n", "4 2 331 \n", ".. ... ... \n", "295 2 0 \n", "296 2 293 \n", "297 1 188 \n", "298 0 0 \n", "299 0 711 \n", "\n", "[300 rows x 10 columns]" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X = sm.add_constant(preprocess_categories(X))\n", "X" ] }, { "cell_type": "code", "execution_count": null, "id": "8b43d7bd-46bd-412a-9645-43364bc89b3e", "metadata": {}, "outputs": [], "source": [ "model = sm.OLS(y, X).fit()" ] }, { "cell_type": "code", "execution_count": null, "id": "af0e37a0-e8ab-41f0-b1aa-8f9b4a73a630", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<table class=\"simpletable\">\n", "<caption>OLS Regression Results</caption>\n", "<tr>\n", " <th>Dep. Variable:</th> <td>Rating</td> <th> R-squared: </th> <td> 0.974</td> \n", "</tr>\n", "<tr>\n", " <th>Model:</th> <td>OLS</td> <th> Adj. R-squared: </th> <td> 0.973</td> \n", "</tr>\n", "<tr>\n", " <th>Method:</th> <td>Least Squares</td> <th> F-statistic: </th> <td> 1185.</td> \n", "</tr>\n", "<tr>\n", " <th>Date:</th> <td>Fri, 27 May 2022</td> <th> Prob (F-statistic):</th> <td>6.33e-223</td>\n", "</tr>\n", "<tr>\n", " <th>Time:</th> <td>08:02:31</td> <th> Log-Likelihood: </th> <td> -1385.4</td> \n", "</tr>\n", "<tr>\n", " <th>No. Observations:</th> <td> 300</td> <th> AIC: </th> <td> 2791.</td> \n", "</tr>\n", "<tr>\n", " <th>Df Residuals:</th> <td> 290</td> <th> BIC: </th> <td> 2828.</td> \n", "</tr>\n", "<tr>\n", " <th>Df Model:</th> <td> 9</td> <th> </th> <td> </td> \n", "</tr>\n", "<tr>\n", " <th>Covariance Type:</th> <td>nonrobust</td> <th> </th> <td> </td> \n", "</tr>\n", "</table>\n", "<table class=\"simpletable\">\n", "<tr>\n", " <td></td> <th>coef</th> <th>std err</th> <th>t</th> <th>P>|t|</th> <th>[0.025</th> <th>0.975]</th> \n", "</tr>\n", "<tr>\n", " <th>const</th> <td> 139.4908</td> <td> 9.595</td> <td> 14.538</td> <td> 0.000</td> <td> 120.607</td> <td> 158.375</td>\n", "</tr>\n", "<tr>\n", " <th>Income</th> <td> 2.0946</td> <td> 0.048</td> <td> 43.507</td> <td> 0.000</td> <td> 2.000</td> <td> 2.189</td>\n", "</tr>\n", "<tr>\n", " <th>Cards</th> <td> -0.7769</td> <td> 1.080</td> <td> -0.719</td> <td> 0.473</td> <td> -2.903</td> <td> 1.349</td>\n", "</tr>\n", "<tr>\n", " <th>Age</th> <td> 0.1493</td> <td> 0.086</td> <td> 1.740</td> <td> 0.083</td> <td> -0.020</td> <td> 0.318</td>\n", "</tr>\n", "<tr>\n", " <th>Education</th> <td> 0.1721</td> <td> 0.474</td> <td> 0.363</td> <td> 0.717</td> <td> -0.761</td> <td> 1.105</td>\n", "</tr>\n", "<tr>\n", " <th>Gender</th> <td> 1.8529</td> <td> 2.919</td> <td> 0.635</td> <td> 0.526</td> <td> -3.891</td> <td> 7.597</td>\n", "</tr>\n", "<tr>\n", " <th>Student</th> <td> -99.2582</td> <td> 4.947</td> <td> -20.066</td> <td> 0.000</td> <td> -108.994</td> <td> -89.522</td>\n", "</tr>\n", "<tr>\n", " <th>Married</th> <td> 2.7424</td> <td> 2.983</td> <td> 0.919</td> <td> 0.359</td> <td> -3.129</td> <td> 8.614</td>\n", "</tr>\n", "<tr>\n", " <th>Ethnicity</th> <td> -0.3005</td> <td> 1.745</td> <td> -0.172</td> <td> 0.863</td> <td> -3.735</td> <td> 3.134</td>\n", "</tr>\n", "<tr>\n", " <th>Balance</th> <td> 0.2316</td> <td> 0.004</td> <td> 63.330</td> <td> 0.000</td> <td> 0.224</td> <td> 0.239</td>\n", "</tr>\n", "</table>\n", "<table class=\"simpletable\">\n", "<tr>\n", " <th>Omnibus:</th> <td>43.876</td> <th> Durbin-Watson: </th> <td> 1.851</td>\n", "</tr>\n", "<tr>\n", " <th>Prob(Omnibus):</th> <td> 0.000</td> <th> Jarque-Bera (JB): </th> <td> 59.049</td>\n", "</tr>\n", "<tr>\n", " <th>Skew:</th> <td>-0.999</td> <th> Prob(JB): </th> <td>1.51e-13</td>\n", "</tr>\n", "<tr>\n", " <th>Kurtosis:</th> <td> 3.857</td> <th> Cond. No. </th> <td>4.61e+03</td>\n", "</tr>\n", "</table><br/><br/>Notes:<br/>[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.<br/>[2] The condition number is large, 4.61e+03. This might indicate that there are<br/>strong multicollinearity or other numerical problems." ], "text/plain": [ "<class 'statsmodels.iolib.summary.Summary'>\n", "\"\"\"\n", " OLS Regression Results \n", "==============================================================================\n", "Dep. Variable: Rating R-squared: 0.974\n", "Model: OLS Adj. R-squared: 0.973\n", "Method: Least Squares F-statistic: 1185.\n", "Date: Fri, 27 May 2022 Prob (F-statistic): 6.33e-223\n", "Time: 08:02:31 Log-Likelihood: -1385.4\n", "No. Observations: 300 AIC: 2791.\n", "Df Residuals: 290 BIC: 2828.\n", "Df Model: 9 \n", "Covariance Type: nonrobust \n", "==============================================================================\n", " coef std err t P>|t| [0.025 0.975]\n", "------------------------------------------------------------------------------\n", "const 139.4908 9.595 14.538 0.000 120.607 158.375\n", "Income 2.0946 0.048 43.507 0.000 2.000 2.189\n", "Cards -0.7769 1.080 -0.719 0.473 -2.903 1.349\n", "Age 0.1493 0.086 1.740 0.083 -0.020 0.318\n", "Education 0.1721 0.474 0.363 0.717 -0.761 1.105\n", "Gender 1.8529 2.919 0.635 0.526 -3.891 7.597\n", "Student -99.2582 4.947 -20.066 0.000 -108.994 -89.522\n", "Married 2.7424 2.983 0.919 0.359 -3.129 8.614\n", "Ethnicity -0.3005 1.745 -0.172 0.863 -3.735 3.134\n", "Balance 0.2316 0.004 63.330 0.000 0.224 0.239\n", "==============================================================================\n", "Omnibus: 43.876 Durbin-Watson: 1.851\n", "Prob(Omnibus): 0.000 Jarque-Bera (JB): 59.049\n", "Skew: -0.999 Prob(JB): 1.51e-13\n", "Kurtosis: 3.857 Cond. No. 4.61e+03\n", "==============================================================================\n", "\n", "Notes:\n", "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", "[2] The condition number is large, 4.61e+03. This might indicate that there are\n", "strong multicollinearity or other numerical problems.\n", "\"\"\"" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.summary()" ] }, { "cell_type": "code", "execution_count": null, "id": "a08f7238-5ef6-409e-866b-5bd4d30c22d7", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Balance 63.329758\n", "Income 43.507422\n", "Student 20.066064\n", "const 14.538499\n", "Age 1.740111\n", "Married 0.919321\n", "Cards 0.719127\n", "Gender 0.634877\n", "Education 0.363174\n", "Ethnicity 0.172217\n", "dtype: float64" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.tvalues.abs().sort_values(ascending=False)" ] }, { "cell_type": "code", "execution_count": null, "id": "8b899f23-2fb1-423c-9212-33c5722bbde6", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Balance 63.329758\n", "Income 43.507422\n", "Student -20.066064\n", "const 14.538499\n", "dtype: float64" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.tvalues[model.tvalues[model.pvalues <= 0.05].abs().sort_values(ascending=False).index]" ] }, { "cell_type": "code", "execution_count": null, "id": "7be36d13-32e1-4274-bd06-862895be0302", "metadata": {}, "outputs": [], "source": [ "# np.corr(model.fittedvalues,y.values)" ] }, { "cell_type": "code", "execution_count": null, "id": "a09f1fed-3174-4e7e-8c6d-686a4cbc8de0", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([42981258.44356128])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.correlate(model.fittedvalues, y.values)" ] }, { "cell_type": "code", "execution_count": null, "id": "20d2c6d5-1b33-43b4-95fa-d6b6e15fc023", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 255.364880\n", "1 488.244174\n", "2 501.990296\n", "3 681.185956\n", "4 346.698891\n", " ... \n", "295 208.450617\n", "296 358.837627\n", "297 312.436387\n", "298 197.666967\n", "299 371.866045\n", "Length: 300, dtype: float64" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.fittedvalues" ] }, { "cell_type": "code", "execution_count": null, "id": "84e39447-13cf-464a-9c8a-ec2d1f9fc151", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[1. , 0.9866719],\n", " [0.9866719, 1. ]])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.corrcoef(model.fittedvalues.values, y.values)" ] }, { "cell_type": "markdown", "id": "d4ddb1b1-bf69-4d3f-ad5b-630cc70071a2", "metadata": {}, "source": [ "### Limited Variables Income, Cards, Married" ] }, { "cell_type": "code", "execution_count": null, "id": "45f58e37-fc62-440f-8a45-2568e0f2d7a5", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>const</th>\n", " <th>Income</th>\n", " <th>Cards</th>\n", " <th>Married</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>1.0</td>\n", " <td>14.891</td>\n", " <td>2</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>1.0</td>\n", " <td>106.025</td>\n", " <td>3</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>1.0</td>\n", " <td>104.593</td>\n", " <td>4</td>\n", " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>1.0</td>\n", " <td>148.924</td>\n", " <td>3</td>\n", " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>1.0</td>\n", " <td>55.882</td>\n", " <td>2</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>295</th>\n", " <td>1.0</td>\n", " <td>27.272</td>\n", " <td>5</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>296</th>\n", " <td>1.0</td>\n", " <td>65.896</td>\n", " <td>1</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>297</th>\n", " <td>1.0</td>\n", " <td>55.054</td>\n", " <td>3</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>298</th>\n", " <td>1.0</td>\n", " <td>20.791</td>\n", " <td>1</td>\n", " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>299</th>\n", " <td>1.0</td>\n", " <td>24.919</td>\n", " <td>3</td>\n", " <td>1</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>300 rows × 4 columns</p>\n", "</div>" ], "text/plain": [ " const Income Cards Married\n", "0 1.0 14.891 2 1\n", "1 1.0 106.025 3 1\n", "2 1.0 104.593 4 0\n", "3 1.0 148.924 3 0\n", "4 1.0 55.882 2 1\n", ".. ... ... ... ...\n", "295 1.0 27.272 5 1\n", "296 1.0 65.896 1 1\n", "297 1.0 55.054 3 1\n", "298 1.0 20.791 1 0\n", "299 1.0 24.919 3 1\n", "\n", "[300 rows x 4 columns]" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_red = X[['const', 'Income', 'Cards', 'Married']]\n", "X_red" ] }, { "cell_type": "code", "execution_count": null, "id": "c07628d2-e64e-4b80-8adc-acbaf46aecea", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "d18b567f-4f32-40bf-98ed-1088b5237f99", "metadata": {}, "outputs": [], "source": [ "model2 = sm.OLS(y, X_red).fit()" ] }, { "cell_type": "code", "execution_count": null, "id": "8869c58c-7201-4689-a268-8cb3a0d158b2", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<table class=\"simpletable\">\n", "<caption>OLS Regression Results</caption>\n", "<tr>\n", " <th>Dep. Variable:</th> <td>Rating</td> <th> R-squared: </th> <td> 0.602</td>\n", "</tr>\n", "<tr>\n", " <th>Model:</th> <td>OLS</td> <th> Adj. R-squared: </th> <td> 0.598</td>\n", "</tr>\n", "<tr>\n", " <th>Method:</th> <td>Least Squares</td> <th> F-statistic: </th> <td> 149.0</td>\n", "</tr>\n", "<tr>\n", " <th>Date:</th> <td>Fri, 27 May 2022</td> <th> Prob (F-statistic):</th> <td>7.56e-59</td>\n", "</tr>\n", "<tr>\n", " <th>Time:</th> <td>08:02:39</td> <th> Log-Likelihood: </th> <td> -1792.1</td>\n", "</tr>\n", "<tr>\n", " <th>No. Observations:</th> <td> 300</td> <th> AIC: </th> <td> 3592.</td>\n", "</tr>\n", "<tr>\n", " <th>Df Residuals:</th> <td> 296</td> <th> BIC: </th> <td> 3607.</td>\n", "</tr>\n", "<tr>\n", " <th>Df Model:</th> <td> 3</td> <th> </th> <td> </td> \n", "</tr>\n", "<tr>\n", " <th>Covariance Type:</th> <td>nonrobust</td> <th> </th> <td> </td> \n", "</tr>\n", "</table>\n", "<table class=\"simpletable\">\n", "<tr>\n", " <td></td> <th>coef</th> <th>std err</th> <th>t</th> <th>P>|t|</th> <th>[0.025</th> <th>0.975]</th> \n", "</tr>\n", "<tr>\n", " <th>const</th> <td> 165.2144</td> <td> 16.641</td> <td> 9.928</td> <td> 0.000</td> <td> 132.464</td> <td> 197.964</td>\n", "</tr>\n", "<tr>\n", " <th>Income</th> <td> 3.4196</td> <td> 0.164</td> <td> 20.896</td> <td> 0.000</td> <td> 3.098</td> <td> 3.742</td>\n", "</tr>\n", "<tr>\n", " <th>Cards</th> <td> 8.2699</td> <td> 4.099</td> <td> 2.018</td> <td> 0.045</td> <td> 0.203</td> <td> 16.336</td>\n", "</tr>\n", "<tr>\n", " <th>Married</th> <td> 11.8404</td> <td> 11.339</td> <td> 1.044</td> <td> 0.297</td> <td> -10.474</td> <td> 34.155</td>\n", "</tr>\n", "</table>\n", "<table class=\"simpletable\">\n", "<tr>\n", " <th>Omnibus:</th> <td>133.940</td> <th> Durbin-Watson: </th> <td> 1.873</td>\n", "</tr>\n", "<tr>\n", " <th>Prob(Omnibus):</th> <td> 0.000</td> <th> Jarque-Bera (JB): </th> <td> 17.170</td>\n", "</tr>\n", "<tr>\n", " <th>Skew:</th> <td> 0.044</td> <th> Prob(JB): </th> <td>0.000187</td>\n", "</tr>\n", "<tr>\n", " <th>Kurtosis:</th> <td> 1.831</td> <th> Cond. No. </th> <td> 179.</td>\n", "</tr>\n", "</table><br/><br/>Notes:<br/>[1] Standard Errors assume that the covariance matrix of the errors is correctly specified." ], "text/plain": [ "<class 'statsmodels.iolib.summary.Summary'>\n", "\"\"\"\n", " OLS Regression Results \n", "==============================================================================\n", "Dep. Variable: Rating R-squared: 0.602\n", "Model: OLS Adj. R-squared: 0.598\n", "Method: Least Squares F-statistic: 149.0\n", "Date: Fri, 27 May 2022 Prob (F-statistic): 7.56e-59\n", "Time: 08:02:39 Log-Likelihood: -1792.1\n", "No. Observations: 300 AIC: 3592.\n", "Df Residuals: 296 BIC: 3607.\n", "Df Model: 3 \n", "Covariance Type: nonrobust \n", "==============================================================================\n", " coef std err t P>|t| [0.025 0.975]\n", "------------------------------------------------------------------------------\n", "const 165.2144 16.641 9.928 0.000 132.464 197.964\n", "Income 3.4196 0.164 20.896 0.000 3.098 3.742\n", "Cards 8.2699 4.099 2.018 0.045 0.203 16.336\n", "Married 11.8404 11.339 1.044 0.297 -10.474 34.155\n", "==============================================================================\n", "Omnibus: 133.940 Durbin-Watson: 1.873\n", "Prob(Omnibus): 0.000 Jarque-Bera (JB): 17.170\n", "Skew: 0.044 Prob(JB): 0.000187\n", "Kurtosis: 1.831 Cond. No. 179.\n", "==============================================================================\n", "\n", "Notes:\n", "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", "\"\"\"" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model2.summary()" ] }, { "cell_type": "code", "execution_count": null, "id": "8c15f308-a5bd-420a-9060-aae8a3a558e4", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Income 20.895784\n", "const 9.928059\n", "Cards 2.017633\n", "dtype: float64" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model2.tvalues[model2.tvalues[model2.pvalues <= 0.05].abs().sort_values(ascending=False).index]" ] }, { "cell_type": "markdown", "id": "73e5e5eb-5d84-4cf4-917d-2224ee66fb5a", "metadata": {}, "source": [ "## HR Example" ] }, { "cell_type": "code", "execution_count": null, "id": "b8a4a2cb-5ae8-434d-a6e6-44d8f7f8e51e", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>S</th>\n", " <th>LPE</th>\n", " <th>NP</th>\n", " <th>ANH</th>\n", " <th>TIC</th>\n", " <th>Newborn</th>\n", " <th>left</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>0.38</td>\n", " <td>0.53</td>\n", " <td>2</td>\n", " <td>157</td>\n", " <td>3</td>\n", " <td>0</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>0.80</td>\n", " <td>0.86</td>\n", " <td>5</td>\n", " <td>262</td>\n", " <td>6</td>\n", " <td>0</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>0.11</td>\n", " <td>0.88</td>\n", " <td>7</td>\n", " <td>272</td>\n", " <td>4</td>\n", " <td>0</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>0.72</td>\n", " <td>0.87</td>\n", " <td>5</td>\n", " <td>223</td>\n", " <td>5</td>\n", " <td>0</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>0.37</td>\n", " <td>0.52</td>\n", " <td>2</td>\n", " <td>159</td>\n", " <td>3</td>\n", " <td>0</td>\n", " <td>1</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " S LPE NP ANH TIC Newborn left\n", "0 0.38 0.53 2 157 3 0 1\n", "1 0.80 0.86 5 262 6 0 1\n", "2 0.11 0.88 7 272 4 0 1\n", "3 0.72 0.87 5 223 5 0 1\n", "4 0.37 0.52 2 159 3 0 1" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_hr = pd.read_csv(\"DATA_3.02_HR2.csv\"); df_hr.head()" ] }, { "cell_type": "code", "execution_count": null, "id": "b3c97757-53c6-4c41-b6d3-8f8937d66370", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "<class 'pandas.core.frame.DataFrame'>\n", "RangeIndex: 12000 entries, 0 to 11999\n", "Data columns (total 7 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 S 12000 non-null float64\n", " 1 LPE 12000 non-null float64\n", " 2 NP 12000 non-null int64 \n", " 3 ANH 12000 non-null int64 \n", " 4 TIC 12000 non-null int64 \n", " 5 Newborn 12000 non-null int64 \n", " 6 left 12000 non-null int64 \n", "dtypes: float64(2), int64(5)\n", "memory usage: 656.4 KB\n" ] } ], "source": [ "df_hr.info()" ] }, { "cell_type": "code", "execution_count": null, "id": "b631c30c-cc89-40e0-8fb8-652392f52d36", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>S</th>\n", " <th>LPE</th>\n", " <th>NP</th>\n", " <th>ANH</th>\n", " <th>TIC</th>\n", " <th>Newborn</th>\n", " <th>left</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>count</th>\n", " <td>12000.000000</td>\n", " <td>12000.000000</td>\n", " <td>12000.000000</td>\n", " <td>12000.000000</td>\n", " <td>12000.000000</td>\n", " <td>12000.000000</td>\n", " <td>12000.000000</td>\n", " </tr>\n", " <tr>\n", " <th>mean</th>\n", " <td>0.629463</td>\n", " <td>0.716558</td>\n", " <td>3.801833</td>\n", " <td>200.437917</td>\n", " <td>3.228750</td>\n", " <td>0.154167</td>\n", " <td>0.166667</td>\n", " </tr>\n", " <tr>\n", " <th>std</th>\n", " <td>0.241100</td>\n", " <td>0.168368</td>\n", " <td>1.163906</td>\n", " <td>48.740178</td>\n", " <td>1.056811</td>\n", " <td>0.361123</td>\n", " <td>0.372694</td>\n", " </tr>\n", " <tr>\n", " <th>min</th>\n", " <td>0.090000</td>\n", " <td>0.360000</td>\n", " <td>2.000000</td>\n", " <td>96.000000</td>\n", " <td>2.000000</td>\n", " <td>0.000000</td>\n", " <td>0.000000</td>\n", " </tr>\n", " <tr>\n", " <th>25%</th>\n", " <td>0.480000</td>\n", " <td>0.570000</td>\n", " <td>3.000000</td>\n", " <td>157.000000</td>\n", " <td>2.000000</td>\n", " <td>0.000000</td>\n", " <td>0.000000</td>\n", " </tr>\n", " <tr>\n", " <th>50%</th>\n", " <td>0.660000</td>\n", " <td>0.720000</td>\n", " <td>4.000000</td>\n", " <td>199.500000</td>\n", " <td>3.000000</td>\n", " <td>0.000000</td>\n", " <td>0.000000</td>\n", " </tr>\n", " <tr>\n", " <th>75%</th>\n", " <td>0.820000</td>\n", " <td>0.860000</td>\n", " <td>5.000000</td>\n", " <td>243.000000</td>\n", " <td>4.000000</td>\n", " <td>0.000000</td>\n", " <td>0.000000</td>\n", " </tr>\n", " <tr>\n", " <th>max</th>\n", " <td>1.000000</td>\n", " <td>1.000000</td>\n", " <td>7.000000</td>\n", " <td>310.000000</td>\n", " <td>6.000000</td>\n", " <td>1.000000</td>\n", " <td>1.000000</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " S LPE NP ANH TIC \\\n", "count 12000.000000 12000.000000 12000.000000 12000.000000 12000.000000 \n", "mean 0.629463 0.716558 3.801833 200.437917 3.228750 \n", "std 0.241100 0.168368 1.163906 48.740178 1.056811 \n", "min 0.090000 0.360000 2.000000 96.000000 2.000000 \n", "25% 0.480000 0.570000 3.000000 157.000000 2.000000 \n", "50% 0.660000 0.720000 4.000000 199.500000 3.000000 \n", "75% 0.820000 0.860000 5.000000 243.000000 4.000000 \n", "max 1.000000 1.000000 7.000000 310.000000 6.000000 \n", "\n", " Newborn left \n", "count 12000.000000 12000.000000 \n", "mean 0.154167 0.166667 \n", "std 0.361123 0.372694 \n", "min 0.000000 0.000000 \n", "25% 0.000000 0.000000 \n", "50% 0.000000 0.000000 \n", "75% 0.000000 0.000000 \n", "max 1.000000 1.000000 " ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_hr.describe()" ] }, { "cell_type": "code", "execution_count": null, "id": "bd2a4d7b-96db-42bf-86a3-ea8a1ff9153f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "<AxesSubplot:ylabel='Frequency'>" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZoAAAD7CAYAAABT2VIoAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8/fFQqAAAACXBIWXMAAAsTAAALEwEAmpwYAAAX8ElEQVR4nO3de2xT9/3G8cc2kMIAhWRJMJcWoSFIhwZqwtAqUNcEMNAkrAgWlJato5RubcelHQOxHwkFxBouXbluMKYyrQi0SiuXtEoyytaMDTroChQCYYRLsyWQ4IBCKZBgn98fVTPYejmx/fUh9vslIeHztXOejyB+fI6dE5dlWZYAADDE7XQAAEBso2gAAEZRNAAAoygaAIBRFA0AwCiKBgBgFEUDADCqg9MB7laXL19TMNj2HzFKTu4qv/8jA4nuXswcH+Jt5nibVwpvZrfbpR49vvKZaxTN5wgGrZCK5tPHxhtmjg/xNnO8zSuZmZlTZwAAoygaAIBRFA0AwCiKBgBgVFSKpri4WFlZWRo4cKBOnTrVuv3s2bPKz8+Xz+dTfn6+zp07Z3QNABB9USma7Oxsbd26Vb17975je1FRkQoKClRWVqaCggIVFhYaXQMARF9UiiYzM1Ner/eObX6/X5WVlcrJyZEk5eTkqLKyUo2NjUbWAADOcOznaOrq6pSWliaPxyNJ8ng8Sk1NVV1dnSzLivhaUlJSm/IlJ3cNaa7mloBSUrqF9NhwNLcE1KmjJ+r7/ZQTMzuNmWNfvM0rmZmZH9j8HH7/RyH94FJKSjflvrDTQKIvtnvVBDU0XI36fqVPZnZq305h5tgXb/NK4c3sdrs+9wW6Y0Xj9Xp18eJFBQIBeTweBQIB1dfXy+v1yrKsiK8BAJzh2Mebk5OTlZ6erpKSEklSSUmJ0tPTlZSUZGQNAOAMl2VZxi/ms3TpUpWXl+vSpUvq0aOHEhMT9eabb6q6ulrz589XU1OTunfvruLiYvXv31+SjKy1BafO7OMUQ3yIt5njbV7J3KmzqBRNe0TR2Mc3ZHyIt5njbV7JXNFwZQAAgFEUDQDAKIoGAGAURQMAMIqiAQAYRdEAAIyiaAAARlE0AACjKBoAgFEUDQDAKIoGAGAURQMAMIqiAQAYRdEAAIyiaAAARlE0AACjKBoAgFEUDQDAKIoGAGAURQMAMIqiAQAYRdEAAIyiaAAARlE0AACjKBoAgFEUDQDAKIoGAGAURQMAMIqiAQAYRdEAAIyiaAAARlE0AACj7oqi+dOf/qTvfOc7mjBhgvLy8lReXi5JOnv2rPLz8+Xz+ZSfn69z5861PibUNQBAdDleNJZl6ac//amWL1+unTt3avny5Zo3b56CwaCKiopUUFCgsrIyFRQUqLCwsPVxoa4BAKLL8aKRJLfbratXr0qSrl69qtTUVF2+fFmVlZXKycmRJOXk5KiyslKNjY3y+/0hrQEAoq+D0wFcLpdeeeUVPfPMM+rSpYuuXbumTZs2qa6uTmlpafJ4PJIkj8ej1NRU1dXVybKskNaSkpIcmxMA4pXjRXPr1i1t3LhRGzZsUEZGht577z3Nnj1by5cvdzRXcnJXR/cfipSUbnG5b6cwc+yLt3klMzM7XjQnTpxQfX29MjIyJEkZGRnq3LmzEhISdPHiRQUCAXk8HgUCAdXX18vr9cqyrJDW2sLv/0jBoNXmeZz8j9nQcNWR/aakdHNs305h5tgXb/NK4c3sdrs+9wW64+/R9OzZUxcuXNCZM2ckSdXV1fL7/brvvvuUnp6ukpISSVJJSYnS09OVlJSk5OTkkNYAANHnsiyr7S/bI2zXrl369a9/LZfLJUmaOXOmRo0aperqas2fP19NTU3q3r27iouL1b9/f0kKec2ucI5ocl/Y2ebHhWv3qgkc0UQRM8e+eJtXMndEc1cUzd2IorGPb8j4EG8zx9u8UgyfOgMAxDaKBgBgFEUDADCKogEAGEXRAACMomgAAEZRNAAAoygaAIBRFA0AwCiKBgBgFEUDADCKogEAGEXRAACMomgAAEZRNAAAoygaAIBRFA0AwCiKBgBgFEUDADCKogEAGEXRAACMomgAAEZRNAAAoygaAIBRtotmz549unXrlsksAIAYZLto1qxZoxEjRmjx4sU6cuSIyUwAgBhiu2h27dqlLVu2KCEhQT/+8Y/l8/m0YcMG/etf/zKZDwDQzrXpPZpBgwZp3rx5euedd1RUVKTS0lKNHj1ajz32mHbt2qVgMGgqJwCgnerQ1gd8+OGH2rVrl3bt2iWXy6WZM2fK6/Vq69atKi8v17p160zkBAC0U7aLZuvWrdq5c6fOnz+vcePGafny5Ro6dGjrus/n04MPPmgiIwCgHbNdNBUVFfrBD36g7OxsderU6X/WO3furLVr10Y0HACg/bNdNGvWrJHb7VbHjh1bt7W0tMiyrNbiGTFiROQTAgDaNdsfBpg2bZqOHz9+x7bjx4/rySefDDvEzZs3VVRUpDFjxig3N1cLFy6UJJ09e1b5+fny+XzKz8/XuXPnWh8T6hoAILpsF01VVZWGDBlyx7ZvfOMbOnnyZNghVqxYoYSEBJWVlWn37t2aNWuWJKmoqEgFBQUqKytTQUGBCgsLWx8T6hoAILpsF0337t116dKlO7ZdunRJnTt3DivAtWvXtGPHDs2aNUsul0uS9NWvflV+v1+VlZXKycmRJOXk5KiyslKNjY0hrwEAos920YwZM0YvvPCCTp06pevXr6uqqkrz5s3TuHHjwgpQU1OjxMRErVu3ThMnTtTUqVN16NAh1dXVKS0tTR6PR5Lk8XiUmpqqurq6kNcAANFn+8MAc+bM0UsvvaTJkyerublZCQkJmjhxop5//vmwAgQCAdXU1Oj+++/XvHnzdOTIEf3whz/U6tWrw/q64UpO7uro/kORktItLvftFGaOffE2r2RmZttFk5CQoKKiIhUWFury5cvq0aNH66mucHi9XnXo0KH1VNeQIUPUo0cP3XPPPbp48aICgYA8Ho8CgYDq6+vl9XplWVZIa23h93+kYNBq8zxO/sdsaLjqyH5TUro5tm+nMHPsi7d5pfBmdrtdn/sCvU2XoLl69ao++OADVVVV6cCBA9q/f7/2798fUqhPJSUlafjw4frrX/8q6ZNPjPn9fvXr10/p6ekqKSmRJJWUlCg9PV1JSUlKTk4OaQ0AEH0uy7JsvWz/wx/+oMWLF6tLly665557/vMFXC69/fbbYYWoqanRggULdOXKFXXo0EGzZ8/WQw89pOrqas2fP19NTU3q3r27iouL1b9/f0kKec2ucI5ocl/Y2ebHhWv3qgkc0UQRM8e+eJtXMndEY7toRo4cqaVLl+qhhx4KKUR7Q9HYxzdkfIi3meNtXukuOHUWCAT4yX8AQJvZLpqnnnpKv/zlL/lVAACANrH9qbMtW7bo0qVL2rx5sxITE+9Y+/Of/xzhWACAWGG7aFasWGEyBwAgRtkumm9+85smcwAAYpTt92iam5v1i1/8QtnZ2crIyJAk7du3T6+99pqxcACA9s920SxbtkynTp3SypUrW68IMGDAAG3bts1YOABA+2f71NmePXtUXl6uLl26yO3+pJ/S0tJ08eJFY+EAAO2f7SOajh07KhAI3LGtsbHxfz6BBgDA7WwXzdixYzVv3jzV1NRIkurr67V48WI98sgjxsIBANo/20UzZ84c9enTR3l5eWpqapLP51NqaqqeffZZk/kAAO2c7fdoOnXqpAULFmjBggVqbGyM2K8JAADENttF8+kps09du3at9e99+/aNXCIAQEyxXTSjR4+Wy+XS7Rd7/vSI5sSJE5FPBgCICbaL5uTJk3fcbmho0Lp165SZmRnxUACA2NGm37B5u5SUFP3sZz/Tyy+/HMk8AIAYE3LRSNKZM2d0/fr1SGUBAMQg26fOCgoK7viU2fXr13X69Gk+3gwA+EK2i2by5Ml33O7cubMGDRqkfv36RToTACCG2C6aRx991GQOAECMsl00q1evtnW/WbNmhRwGABB7bBfN+fPnVV5ersGDB6t3796qra3VBx98oDFjxighIcFkRgBAO2a7aCzL0qpVq+Tz+Vq3lZeXq7S0VD//+c+NhAMAtH+2P95cUVGhUaNG3bEtKytL77zzTsRDAQBih+2iue+++7R169Y7tm3btk333ntvxEMBAGKH7VNnS5cu1XPPPafNmze3/mbNDh06aO3atSbzAQDaOdtFc//996usrExHjhxRfX29UlJSNHToUHXs2NFkPgBAOxfyJWiGDRumlpYWffzxx5HMAwCIMbaPaKqqqvSjH/1InTp10sWLFzV+/HgdPHhQb7zxhl555RWDEQEA7ZntI5pFixZp5syZKi0tVYcOn/TTsGHD9N577xkLBwBo/2wXzenTpzVhwgRJ//mFZ126dNHNmzfNJAMAxATbRdO7d28dO3bsjm1Hjx7l480AgC9k+z2aWbNm6emnn9aUKVPU0tKijRs3avv27VqyZInJfACAds72Ec3DDz+szZs3q7GxUcOGDdO///1vrV27ViNGjIhYmHXr1mngwIE6deqUJOnw4cPKy8uTz+fTtGnT5Pf7W+8b6hoAILpsFU0gENCoUaP0ta99TYsWLdKmTZu0ePFiDR48OGJBjh8/rsOHD6t3796SpGAwqLlz56qwsFBlZWXKzMzUypUrw1oDAESfraLxeDzyeDzG3vhvbm7W4sWLtWjRotZtx44dU0JCgjIzMyVJU6ZMUWlpaVhrAIDos/0ezfe+9z3Nnj1bTz/9tHr27HnHr3Xu27dvWCFWr16tvLw89enTp3VbXV2devXq1Xo7KSlJwWBQV65cCXktMTHRdqbk5K5hzeSElJRucblvpzBz7Iu3eSUzM39p0TQ0NCglJaX1Tf+//e1vsiyrdd3lcunEiRMhB3j//fd17Ngx/eQnPwn5a5jg93+kYND68jv+Fyf/YzY0XHVkvykp3Rzbt1OYOfbF27xSeDO73a7PfYH+pUXj8/n0j3/8QydPnpQkPfvss1q/fn1IQT7LwYMHVV1drezsbEnShQsX9OSTT2rq1Kmqra1tvV9jY6PcbrcSExPl9XpDWgMARN+Xvkdz+9GL9EkxRNKMGTO0b98+7d27V3v37lXPnj31m9/8RtOnT9eNGzd06NAhSdL27ds1duxYSdLgwYNDWgMARN+XHtHc/l6M9L/FY4rb7dby5ctVVFSkmzdvqnfv3lqxYkVYawCA6PvSogkEAjpw4EBrwfz3bUn61re+FbFAe/fubf37Aw88oN27d3/m/UJdAwBE15cWTXJyshYsWNB6OzEx8Y7bLpdLb7/9tpl0AIB270uL5vYjDAAA2irkX3wGAIAdFA0AwCiKBgBgFEUDADCKogEAGEXRAACMomgAAEZRNAAAoygaAIBRFA0AwCiKBgBgFEUDADCKogEAGEXRAACMomgAAEZRNAAAoygaAIBRFA0AwCiKBgBgFEUDADCKogEAGEXRAACMomgAAEZRNAAAoygaAIBRFA0AwCiKBgBgFEUDADCKogEAGEXRAACMcrxoLl++rKeeeko+n0+5ubl67rnn1NjYKEk6fPiw8vLy5PP5NG3aNPn9/tbHhboGAIgux4vG5XJp+vTpKisr0+7du9W3b1+tXLlSwWBQc+fOVWFhocrKypSZmamVK1dKUshrAIDoc7xoEhMTNXz48NbbQ4cOVW1trY4dO6aEhARlZmZKkqZMmaLS0lJJCnkNABB9jhfN7YLBoLZt26asrCzV1dWpV69erWtJSUkKBoO6cuVKyGsAgOjr4HSA2y1ZskRdunTR448/rj/+8Y+OZklO7uro/kORktItLvftFGaOfU7N29wSUKeOHkf2a2Lmu6ZoiouLdf78ef3qV7+S2+2W1+tVbW1t63pjY6PcbrcSExNDXmsLv/8jBYNWm+dw8huxoeGqI/tNSenm2L6dwsyxz8l5U1K6KfeFnVHf7+5VE0Ke2e12fe4L9Lvi1NnLL7+sY8eOaf369erUqZMkafDgwbpx44YOHTokSdq+fbvGjh0b1hoAIPocP6L55z//qY0bN6pfv36aMmWKJKlPnz5av369li9frqKiIt28eVO9e/fWihUrJElutzukNQBA9DleNAMGDFBVVdVnrj3wwAPavXt3RNcAANF1V5w6AwDELooGAGAURQMAMIqiAQAYRdEAAIyiaAAARlE0AACjKBoAgFEUDQDAKIoGAGAURQMAMIqiAQAYRdEAAIyiaAAARlE0AACjKBoAgFEUDQDAKIoGAGAURQMAMIqiAQAYRdEAAIyiaAAARlE0AACjKBoAgFEUDQDAKIoGAGAURQMAMIqiAQAYRdEAAIyiaAAARlE0AACjKBoAgFEUDQDAqJgtmrNnzyo/P18+n0/5+fk6d+6c05EAIC7FbNEUFRWpoKBAZWVlKigoUGFhodORACAudXA6gAl+v1+VlZV69dVXJUk5OTlasmSJGhsblZSUZOtruN2ukPef2qNzyI8NRziZ2/O+ncLMsc/Jedvb88gXPS4mi6aurk5paWnyeDySJI/Ho9TUVNXV1dkumh49vhLy/n/zf2NCfmw4kpO7OrJfp/ftFGaOfU7OG0vPIzF76gwAcHeIyaLxer26ePGiAoGAJCkQCKi+vl5er9fhZAAQf2KyaJKTk5Wenq6SkhJJUklJidLT022fNgMARI7LsizL6RAmVFdXa/78+WpqalL37t1VXFys/v37Ox0LAOJOzBYNAODuEJOnzgAAdw+KBgBgFEUDADCKogEAGEXRhMDOBTsDgYBefPFFjRo1SqNHj9brr78e/aARZGfm9evX65FHHlFubq4mTpyov/zlL9EPGkFtuTDrmTNnNGTIEBUXF0cvoAF2Z37rrbeUm5urnJwc5ebm6tKlS9ENGiF25vX7/ZoxY4Zyc3M1btw4LVq0SLdu3Yp+2AgpLi5WVlaWBg4cqFOnTn3mfSL+/GWhzaZOnWrt2LHDsizL2rFjhzV16tT/uc8bb7xhTZs2zQoEApbf77dGjhxp1dTURDtqxNiZuaKiwvr4448ty7KsEydOWBkZGdb169ejmjOS7MxsWZZ169Yt6/HHH7eef/5566WXXopmxIizM/PRo0etcePGWfX19ZZlWVZTU5N148aNqOaMFDvzLl26tPXftbm52Zo0aZL15ptvRjVnJB08eNCqra21Hn74Yauqquoz7xPp5y+OaNro0wt25uTkSPrkgp2VlZVqbGy8435vvfWWJk+eLLfbraSkJI0aNUqlpaVORA6b3ZlHjhypzp0/uRDgwIEDZVmWrly5Eu24EWF3ZknatGmTvv3tb6tfv35RThlZdmfesmWLpk2bppSUFElSt27dlJCQEPW84bI7r8vl0rVr1xQMBtXc3KyWlhalpaU5ETkiMjMzv/QqKZF+/qJo2uiLLtj53/fr1atX622v16sLFy5ENWuk2J35djt27NC9996rnj17RitmRNmd+eTJk9q3b5+eeOIJB1JGlt2Zq6urVVNTo8cee0yPPvqoNmzYIKsd/jie3XmfeeYZnT17ViNGjGj9k5GR4UTkqIn08xdFg4j7+9//rtWrV2vVqlVORzGqpaVFCxcu1Isvvtj6ZBUPAoGAqqqq9Oqrr+p3v/udKioqtHPnTqdjGVNaWqqBAwdq3759qqio0KFDh9rt2QmnUDRtZPeCnV6vV7W1ta236+rq2u2r+7ZcpPT999/X3LlztX79+nZ9yR87Mzc0NOjDDz/UjBkzlJWVpd/+9rf6/e9/r4ULFzoVOyx2/5179eqlsWPHqlOnTuratauys7N19OhRJyKHxe68r732mvLy8uR2u9WtWzdlZWXp3XffdSJy1ET6+YuiaSO7F+wcO3asXn/9dQWDQTU2NmrPnj3y+XxORA6b3ZmPHj2qOXPmaM2aNfr617/uRNSIsTNzr1699O6772rv3r3au3evvv/97+u73/2ulixZ4lTssNj9d87JydG+fftkWZZaWlp04MABDRo0yInIYbE7b58+fVRRUSFJam5u1v79+zVgwICo542miD9/hfwxgjh2+vRpa9KkSdaYMWOsSZMmWdXV1ZZlWdb06dOto0ePWpb1ySeRCgsLrezsbCs7O9vavn27k5HDZmfmiRMnWsOHD7fy8vJa/5w8edLJ2GGxM/Pt1qxZ0+4/dWZn5kAgYC1btswaO3asNX78eGvZsmVWIBBwMnbI7Mx7/vx564knnrBycnKscePGWYsWLbJaWlqcjB2WJUuWWCNHjrTS09OtBx980Bo/frxlWWafv7ioJgDAKE6dAQCMomgAAEZRNAAAoygaAIBRFA0AwCiKBgBgFEUDADCKogEAGPX/DOJu0hB5BZYAAAAASUVORK5CYII=\n", "text/plain": [ "<Figure size 432x288 with 1 Axes>" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df_hr['left'].plot.hist()" ] }, { "cell_type": "code", "execution_count": null, "id": "84e823bb-82e6-467b-b67b-24dedbda07f4", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "<AxesSubplot:ylabel='Frequency'>" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZMAAAD7CAYAAACvzHniAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8/fFQqAAAACXBIWXMAAAsTAAALEwEAmpwYAAAY5klEQVR4nO3de1BU5x3G8Wd3EdQqg1DAFU2s1kFSWx0vddIx0wY1aILYtLU4JPZio2nTVE1ai2MaQHFsQE3itdWxo23imKkzjRfMADW2UtPEaiZqDaKNGrUBBRYdjTdw9/QPxzU2gsd92V0Wvp8ZZ9zz7uH89mXZZ8/tfR2WZVkCAMCAM9wFAAAiH2ECADBGmAAAjBEmAABjhAkAwBhhAgAwRpgAAIxFhbuAcDp37pJ8vvZxm01CQjd5PJ+Gu4w2gb64gX64hb64xaQvnE6HevT4wh3bOnSY+HxWuwkTSe3qtZiiL26gH26hL24JRl+E5DBXUVGR0tPTlZqaqqNHj/qXnzhxQtnZ2crIyFB2drY+/vhj4zYAQOiFJExGjx6tDRs2KCUl5bbl+fn5ysnJUVlZmXJycpSXl2fcBgAIvZCEyfDhw+V2u29b5vF4VFlZqczMTElSZmamKisr1dDQEHAbACA8wnbOpKamRsnJyXK5XJIkl8ulpKQk1dTUyLKsgNri4+PD9XIAoEPr0CfgExK6hbuEVpWY2D3cJbQZ9MUN9MMt9MUtweiLsIWJ2+3W2bNn5fV65XK55PV6VVtbK7fbLcuyAmq7Vx7Pp+3mCo/ExO6qq7sY7jLaBPriBvrhFvriFpO+cDodzX4JD9tNiwkJCUpLS1NJSYkkqaSkRGlpaYqPjw+4DQAQHo5QTI61YMEClZeXq76+Xj169FBcXJy2b9+uY8eOac6cObpw4YJiY2NVVFSkfv36SVLAbfeCPZP2ib64IZL6oXtsF3WOCexAydVr13XxwpUWnxNJfRFswdozCUmYtFWESftEX9wQSf2QmNhdE365JaB1ty2ZeNfXGUl9EWzt7jAXAKD9IEwAAMYIEwCAMcIEAGCMMAEAGCNMAADGCBMAgDHCBABgjDABABgjTAAAxggTAICxDj2fCYCOzWSAScneIJMdBWECoMPqHBMV8ACT0o1BJhk+8gYOcwEAjBEmAABjhAkAwBhhAgAwRpgAAIwRJgAAY4QJAMAYYQIAMEaYAACMESYAAGOECQDAGGECADBGmAAAjDFqMNDGmAyLzpDoCBfCBAgC03kyAh0WnSHRES6ECRAEJvNkbFsysZWrAYKPcyYAAGOECQDAGIe5APhx8h+BIkwA+Jme6+Hkf8fVJg5z/e1vf9O3v/1tTZw4UVlZWSovL5cknThxQtnZ2crIyFB2drY+/vhj/zottQEAQivsYWJZln7961+ruLhYW7ZsUXFxsXJzc+Xz+ZSfn6+cnByVlZUpJydHeXl5/vVaagMAhFbYw0SSnE6nLl68sYN88eJFJSUl6dy5c6qsrFRmZqYkKTMzU5WVlWpoaJDH42m2DQAQemE/Z+JwOPTqq6/qmWeeUdeuXXXp0iWtWbNGNTU1Sk5OlsvlkiS5XC4lJSWppqZGlmU12xYfHx/OlwMAHVLYw+T69etavXq1Vq1apWHDhun999/XrFmzVFxcHPRtJyR0C/o2QikxsXu4S2gzOnJffPa1h7ofwtXvdrYbrNoi8b0WjJrDHiaHDx9WbW2thg0bJkkaNmyYunTpopiYGJ09e1Zer1cul0ter1e1tbVyu92yLKvZtnvh8Xwqn88KxssKucTE7qqr41oaqW30RTg/YG6+9kD6wbTuQPs92Nttri9a4/cU6GsO12XYJn8fTqej2S/hYQ+Tnj176syZMzp+/Lj69eunY8eOyePx6P7771daWppKSko0ceJElZSUKC0tzX8Yq6U2AGjr2ttl2GEPk8TERBUUFGjmzJlyOBySpIULFyouLk4FBQWaM2eOVq1apdjYWBUVFfnXa6kNABBaYQ8TScrKylJWVtbnlvfv31+bNm264zottQEAQqtNXBoMAIhshAkAwBhhAgAwRpgAAIwRJgAAY23iai4AgH2NTd6Ab7hsbPK2cjU3ECYAEGGiO7mMbngMBg5zAQCMESYAAGMc5gIQ0eyeP4jE0X0jCWECIKK1xfMHHRGHuQAAxggTAIAxwgQAYIwwAQAY4wQ8gFZhclc2Ih9hAqBVdMSrqgjQWwgTAAhQRwzQ5hAmQDvy/9+U+daMUCFMgHbE5Juy1P6+LSN0uJoLAGCMMAEAGCNMAADGCBMAgDHCBABgjDABABgjTAAAxmyHyY4dO3T9+vVg1gIAiFC2w2TZsmUaNWqU5s+frwMHDgSzJgBAhLEdJlu3btX69esVExOjX/ziF8rIyNCqVav03//+N5j1AQAiwD2dMxk4cKByc3O1a9cu5efnq7S0VGPHjtUTTzyhrVu3yufzBatOAEAbds9jc506dUpbt27V1q1b5XA4NGPGDLndbm3YsEHl5eVasWJFMOoEALRhtsNkw4YN2rJli06ePKnx48eruLhYQ4YM8bdnZGToG9/4RjBqBAC0cbbDpKKiQj/+8Y81evRoRUdHf669S5cuWr58eUBFXLt2TQsXLtS7776rmJgYDRkyRIWFhTpx4oTmzJmj8+fPKy4uTkVFRerbt68ktdgGAAgt22GybNkyOZ1OderUyb+sqalJlmX5w2XUqFEBFbFo0SLFxMSorKxMDodD9fX1kqT8/Hzl5ORo4sSJ2rJli/Ly8vSnP/3prm2AJHWP7aLOMYHPsnD12nVdvHClFSsC2i/bf2lTp07V7Nmzbzu09eGHH2rJkiV67bXXAi7g0qVL2rx5s3bt2iWHwyFJ+uIXvyiPx6PKykqtW7dOkpSZmanCwkI1NDTIsqxm2+Lj4wOuBe1L55go47k9LrZiPUB7ZjtMjhw5osGDB9+27Gtf+5qqqqqMCjh9+rTi4uK0YsUK7dmzR1/4whc0c+ZMde7cWcnJyXK5XJIkl8ulpKQk1dTUyLKsZtsIEwAIPdthEhsbq/r6eiUmJvqX1dfXq0uXLkYFeL1enT59Wg888IByc3N14MAB/fSnP9XSpUuNfq4dCQndgr6NUGKK1ltaqy/oU7RHwXhf2w6TRx55RL/85S/1m9/8Rn369NGpU6f00ksvafz48UYFuN1uRUVFKTMzU5I0ePBg9ejRQ507d9bZs2fl9Xrlcrnk9XpVW1srt9sty7KabbsXHs+n8vkso/rbisTE7qqr46CMdKsvWuMPJtA+JYTQlgX6vnY6Hc1+Cbd90+Jzzz2n/v37a9KkSRo6dKiys7P1pS99Sc8//3xARd0UHx+vkSNH6p133pF04yotj8ejvn37Ki0tTSUlJZKkkpISpaWlKT4+XgkJCc22AQBCz/aeSUxMjPLz85WXl6dz586pR48e/hPmpubNm6e5c+eqqKhIUVFRKi4uVmxsrAoKCjRnzhytWrVKsbGxKioq8q/TUhsAILTu6brJixcv6sSJE7p06dJtyx988EGjIvr06XPHK8L69++vTZs23XGdltoAAKFlO0z+8pe/aP78+eratas6d+7sX+5wOPT2228HpTgAQGSwHSavvPKKli5dqm9+85vBrCcimNwMx41wANoj25+IXq834Dvc2xuTm+G4EQ5Ae2T7aq5p06bpd7/7HcPMAwA+x/aeyfr161VfX6+1a9cqLi7utra///3vrVwWACCS2A6TRYsWBbMOAEAEsx0mX//614NZBwAggtk+Z9LY2KhXXnlFo0eP1rBhwyRJu3fv1uuvvx604gAAkcF2mCxcuFBHjx7V4sWL/Xe+DxgwQBs3bgxacQCAyGD7MNeOHTtUXl6url27yum8kUHJyck6e/Zs0IoDAEQG23smnTp1ktfrvW1ZQ0PD567sAgB0PLbDZNy4ccrNzdXp06clSbW1tZo/f74ee+yxoBUHAIgM9zQEfe/evZWVlaULFy4oIyNDSUlJ+vnPfx7M+gAAEcD2OZPo6GjNnTtXc+fOVUNDQ6sOQQ8AiGy2w+Tm4a2bPjsMfZ8+fVqvIgBAxLEdJmPHjpXD4ZBl3Zrm9uaeyeHDh1u/MgBAxLAdJlVVVbc9rqur04oVKzR8+PBWLwoAEFlsn4D/f4mJiXrhhRf08ssvt2Y9AIAIFHCYSNLx48d15QoTPQFAR2f7MFdOTs5tV29duXJFH330EZcGI6gCndUyMbF7EKoB0Bzbf6WTJk267XGXLl00cOBA9e3bt7VrAvxMZ7UEEBq2w+Txxx8PZh0AgAhmO0yWLl1q63kzZ84MuBgAQGSyHSYnT55UeXm5Bg0apJSUFFVXV+vf//63HnnkEcXExASzRgBAG2c7TCzL0pIlS5SRkeFfVl5ertLSUv32t78NSnEAgMhg+9LgiooKjRkz5rZl6enp2rVrV6sXBQCILLbD5P7779eGDRtuW7Zx40bdd999rV4UACCy2D7MtWDBAj377LNau3atf4bFqKgoLV++PJj1AQAigO0weeCBB1RWVqYDBw6otrZWiYmJGjJkiDp16hTM+gAAESDg4VRGjBihpqYmXb58uTXrAQBEINt7JkeOHNHPfvYzRUdH6+zZs3r00Ue1d+9evfnmm3r11VeDWCIAoK2zvWdSUFCgGTNmqLS0VFFRNzJoxIgRev/994NWHAAgMtgOk48++kgTJ94Y6+jmgI9du3bVtWvXglMZACBi2A6TlJQUHTp06LZlBw8e5NJgAID9MJk5c6aefvppLVu2TE1NTVq9erVmzpypWbNmtVoxK1asUGpqqo4ePSpJ2r9/v7KyspSRkaGpU6fK4/H4n9tSGwAgtGyHycMPP6y1a9eqoaFBI0aM0CeffKLly5dr1KhRrVLIhx9+qP379yslJUWS5PP5NHv2bOXl5amsrEzDhw/X4sWL79oGAAg9W2Hi9Xo1ZswYffnLX1ZBQYHWrFmj+fPna9CgQa1SRGNjo+bPn6+CggL/skOHDikmJsY/x/zkyZNVWlp61zYAQOjZujTY5XLJ5XLp2rVrio6ObvUili5dqqysLPXu3du/rKamRr169fI/jo+Pl8/n0/nz51tsi4uLs73dhIRurVL/vQrWLIDMLtj66FO0R8F4X9u+z+QHP/iBZs2apaefflo9e/a8bQrfPn36BFzABx98oEOHDulXv/pVwD8jUB7Pp/L5rHtez/QXUVd30Wj9O0lM7B6Unxtu4fwwb2zyKrqTK2zbB4Il0M8Kp9PR7Jfwu4ZJXV2dEhMTVVhYKEn65z//Kcu69QHscDh0+PDhgAqTpL179+rYsWMaPXq0JOnMmTP6yU9+oilTpqi6utr/vIaGBjmdTsXFxcntdjfbBrSW6E4upgwGbLrrOZOb85dUVVWpqqpK6enp/v9XVVUZBYkkTZ8+Xbt379bOnTu1c+dO9ezZU3/4wx/01FNP6erVq9q3b58k6Y033tC4ceMkSYMGDWq2DQAQenfdM/nsXoh0Y08iFJxOp4qLi5Wfn69r164pJSVFixYtumsbACD07homnz03In0+XFrbzp07/f8fOnSotm3bdsfntdQGAAitu4aJ1+vVe++95w+R/38sSQ8++GDwKgQAtHl3DZOEhATNnTvX/zguLu62xw6HQ2+//XZwqgMARIS7hslnDzsBAHAnAU+OBQDATYQJAMAYYQIAMEaYAACMESYAAGOECQDAGGECADBGmAAAjBEmAABjhAkAwBhhAgAwZnvaXrSOxiZvwFPRXr12XRcvXGnligDAHGESYqZTwba/Wd4BtAcc5gIAGCNMAADGCBMAgDHCBABgjDABABgjTAAAxggTAIAxwgQAYIwwAQAYI0wAAMYYTgVB1z22izrH8FYD2jP+whF0nWOijMYjA9D2cZgLAGCMMAEAGOMwVwS521wod5snhflQAAQLYRJBTOZCkZgPBUDwcJgLAGAs7GFy7tw5TZs2TRkZGZowYYKeffZZNTQ0SJL279+vrKwsZWRkaOrUqfJ4PP71WmoDAIRW2MPE4XDoqaeeUllZmbZt26Y+ffpo8eLF8vl8mj17tvLy8lRWVqbhw4dr8eLFktRiGwAg9MIeJnFxcRo5cqT/8ZAhQ1RdXa1Dhw4pJiZGw4cPlyRNnjxZpaWlktRiGwAg9NrUCXifz6eNGzcqPT1dNTU16tWrl78tPj5ePp9P58+fb7EtLi7O9vYSErq1Zvlt3t2uBrvbutGdXK1cEYBwCPRzoCVtKkwKCwvVtWtXPfnkk/rrX/8a9O15PJ/K57Pueb1g/CJCweRqsG1LJqquLrBrwSK1v4D2KtC/ZafT0eyX8DYTJkVFRTp58qR+//vfy+l0yu12q7q62t/e0NAgp9OpuLi4FtsAAKEX9nMmkvTyyy/r0KFDWrlypaKjoyVJgwYN0tWrV7Vv3z5J0htvvKFx48bdtQ0AEHph3zP5z3/+o9WrV6tv376aPHmyJKl3795auXKliouLlZ+fr2vXriklJUWLFi2SJDmdzmbbAAChF/YwGTBggI4cOXLHtqFDh2rbtm333AYACK02cZgLABDZCBMAgDHCBABgjDABABgjTAAAxggTAIAxwgQAYIwwAQAYC/tNi4gMJiMOA2j/CBPYYjriMID2jcNcAABjhAkAwBhhAgAwRpgAAIwRJgAAY4QJAMAYYQIAMEaYAACMESYAAGOECQDAGGECADBGmAAAjBEmAABjhAkAwBhhAgAwRpgAAIwRJgAAY4QJAMAYYQIAMEaYAACMESYAAGOECQDAGGECADAW0WFy4sQJZWdnKyMjQ9nZ2fr444/DXRIAdEgRHSb5+fnKyclRWVmZcnJylJeXF+6SAKBDigp3AYHyeDyqrKzUunXrJEmZmZkqLCxUQ0OD4uPjbf0Mp9MR8PaTenSJuHXDue1IXDec2+Y1R8a64dy2ybqBfva1tJ7Dsiwr0ILC6dChQ8rNzdX27dv9yx599FEtWrRIX/nKV8JYGQB0PBF9mAsA0DZEbJi43W6dPXtWXq9XkuT1elVbWyu32x3mygCg44nYMElISFBaWppKSkokSSUlJUpLS7N9vgQA0Hoi9pyJJB07dkxz5szRhQsXFBsbq6KiIvXr1y/cZQFAhxPRYQIAaBsi9jAXAKDtIEwAAMYIEwCAMcIEAGCMMIkgdga2XLlypR577DFNmDBB3/nOd/SPf/wj9IWGwL0M8nn8+HENHjxYRUVFoSswhOz2xVtvvaUJEyYoMzNTEyZMUH19fWgLDTI7/eDxeDR9+nRNmDBB48ePV0FBga5fvx76YoOsqKhI6enpSk1N1dGjR+/4HK/Xq3nz5mnMmDEaO3asNm3aZLZRCxFjypQp1ubNmy3LsqzNmzdbU6ZM+dxzKioqrMuXL1uWZVmHDx+2hg0bZl25ciWkdYaCnb6wLMu6fv269eSTT1rPP/+89dJLL4WyxJCx0xcHDx60xo8fb9XW1lqWZVkXLlywrl69GtI6g81OPyxYsMD/PmhsbLS+973vWdu3bw9pnaGwd+9eq7q62nr44YetI0eO3PE5b775pjV16lTL6/VaHo/Heuihh6zTp08HvE32TCLEzYEtMzMzJd0Y2LKyslINDQ23Pe+hhx5Sly43BoBLTU2VZVk6f/58qMsNKrt9IUlr1qzRt771LfXt2zfEVYaG3b5Yv369pk6dqsTERElS9+7dFRMTE/J6g8VuPzgcDl26dEk+n0+NjY1qampScnJyOEoOquHDh991NJC33npLkyZNktPpVHx8vMaMGaPS0tKAt0mYRIiamholJyfL5XJJklwul5KSklRTU9PsOps3b9Z9992nnj17hqrMkLDbF1VVVdq9e7d+9KMfhaHK0LDbF8eOHdPp06f1xBNP6PHHH9eqVatktaNbzOz2wzPPPKMTJ05o1KhR/n/Dhg0LR8lhV1NTo169evkfu91unTlzJuCfR5i0U//617+0dOlSLVmyJNylhEVTU5NefPFFzZs3z/8B05F5vV4dOXJE69at02uvvaaKigpt2bIl3GWFXGlpqVJTU7V7925VVFRo3759Rt/GcQthEiHuZWDLDz74QLNnz9bKlSvb5fAydvqirq5Op06d0vTp05Wenq4//vGP+vOf/6wXX3wxXGUHhd33Ra9evTRu3DhFR0erW7duGj16tA4ePBiOkoPCbj+8/vrrysrKktPpVPfu3ZWenq49e/aEo+Swc7vdqq6u9j+uqakxOopBmEQIuwNbHjx4UM8995yWLVvWbud1sdMXvXr10p49e7Rz507t3LlTP/zhD/X9739fhYWF4So7KOy+LzIzM7V7925ZlqWmpia99957GjhwYDhKDgq7/dC7d29VVFRIkhobG/Xuu+9qwIABIa+3LRg3bpw2bdokn8+nhoYG7dixQxkZGQH/PMbmiiDNDWw5bdo0zZgxQ1/96lf13e9+V5988sltJxWLi4uVmpoaxspbn52++Kzly5fr8uXLys3NDVPFwWOnL3w+n4qKilRRUSGn06lRo0YpNzdXTmf7+T5ppx9OnTql/Px81dfXy+v1auTIkXrhhRcUFRWxk87e0YIFC1ReXq76+nr16NFDcXFx2r59+2194fV6NX/+fL3zzjuSpGnTpik7OzvgbRImAABj7edrCQAgbAgTAIAxwgQAYIwwAQAYI0wAAMYIEwCAMcIEAGCMMAEAGPsfwKI/5gABcAUAAAAASUVORK5CYII=\n", "text/plain": [ "<Figure size 432x288 with 1 Axes>" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df_hr['S'].plot.hist(bins=20)" ] }, { "cell_type": "code", "execution_count": null, "id": "2e7d2e0f-b5dd-4d01-a159-64bd49d06e28", "metadata": {}, "outputs": [], "source": [ "y = df_hr.pop('left')\n", "X = df_hr.copy()" ] }, { "cell_type": "code", "execution_count": null, "id": "66b02a04-fd6c-44e5-8803-e25be209c3d7", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>S</th>\n", " <th>LPE</th>\n", " <th>NP</th>\n", " <th>ANH</th>\n", " <th>TIC</th>\n", " <th>Newborn</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>0.38</td>\n", " <td>0.53</td>\n", " <td>2</td>\n", " <td>157</td>\n", " <td>3</td>\n", " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>0.80</td>\n", " <td>0.86</td>\n", " <td>5</td>\n", " <td>262</td>\n", " <td>6</td>\n", " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>0.11</td>\n", " <td>0.88</td>\n", " <td>7</td>\n", " <td>272</td>\n", " <td>4</td>\n", " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>0.72</td>\n", " <td>0.87</td>\n", " <td>5</td>\n", " <td>223</td>\n", " <td>5</td>\n", " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>0.37</td>\n", " <td>0.52</td>\n", " <td>2</td>\n", " <td>159</td>\n", " <td>3</td>\n", " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>11995</th>\n", " <td>0.90</td>\n", " <td>0.55</td>\n", " <td>3</td>\n", " <td>259</td>\n", " <td>2</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>11996</th>\n", " <td>0.74</td>\n", " <td>0.95</td>\n", " <td>5</td>\n", " <td>266</td>\n", " <td>4</td>\n", " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>11997</th>\n", " <td>0.85</td>\n", " <td>0.54</td>\n", " <td>3</td>\n", " <td>185</td>\n", " <td>3</td>\n", " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>11998</th>\n", " <td>0.33</td>\n", " <td>0.65</td>\n", " <td>3</td>\n", " <td>172</td>\n", " <td>5</td>\n", " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>11999</th>\n", " <td>0.50</td>\n", " <td>0.73</td>\n", " <td>4</td>\n", " <td>180</td>\n", " <td>3</td>\n", " <td>0</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>12000 rows × 6 columns</p>\n", "</div>" ], "text/plain": [ " S LPE NP ANH TIC Newborn\n", "0 0.38 0.53 2 157 3 0\n", "1 0.80 0.86 5 262 6 0\n", "2 0.11 0.88 7 272 4 0\n", "3 0.72 0.87 5 223 5 0\n", "4 0.37 0.52 2 159 3 0\n", "... ... ... .. ... ... ...\n", "11995 0.90 0.55 3 259 2 1\n", "11996 0.74 0.95 5 266 4 0\n", "11997 0.85 0.54 3 185 3 0\n", "11998 0.33 0.65 3 172 5 0\n", "11999 0.50 0.73 4 180 3 0\n", "\n", "[12000 rows x 6 columns]" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X" ] }, { "cell_type": "code", "execution_count": null, "id": "59f26d5b-6e3e-4fd4-b4a1-a065d06989a2", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 1\n", "1 1\n", "2 1\n", "3 1\n", "4 1\n", " ..\n", "11995 0\n", "11996 0\n", "11997 0\n", "11998 0\n", "11999 0\n", "Name: left, Length: 12000, dtype: int64" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y" ] }, { "cell_type": "code", "execution_count": null, "id": "25f796be-6d89-4a65-a279-f23c79542383", "metadata": {}, "outputs": [], "source": [ "X = sm.add_constant(X)" ] }, { "cell_type": "code", "execution_count": null, "id": "01477663-857c-4f90-b814-635cbb1bffb5", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Optimization terminated successfully.\n", " Current function value: 0.354538\n", " Iterations 7\n" ] } ], "source": [ "model_hr = sm.Logit(y, X).fit()" ] }, { "cell_type": "code", "execution_count": null, "id": "9f327cbc-aa9e-4966-9b8b-a5f92f387168", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<table class=\"simpletable\">\n", "<caption>Logit Regression Results</caption>\n", "<tr>\n", " <th>Dep. Variable:</th> <td>left</td> <th> No. Observations: </th> <td> 12000</td> \n", "</tr>\n", "<tr>\n", " <th>Model:</th> <td>Logit</td> <th> Df Residuals: </th> <td> 11993</td> \n", "</tr>\n", "<tr>\n", " <th>Method:</th> <td>MLE</td> <th> Df Model: </th> <td> 6</td> \n", "</tr>\n", "<tr>\n", " <th>Date:</th> <td>Fri, 27 May 2022</td> <th> Pseudo R-squ.: </th> <td>0.2131</td> \n", "</tr>\n", "<tr>\n", " <th>Time:</th> <td>08:04:49</td> <th> Log-Likelihood: </th> <td> -4254.5</td>\n", "</tr>\n", "<tr>\n", " <th>converged:</th> <td>True</td> <th> LL-Null: </th> <td> -5406.7</td>\n", "</tr>\n", "<tr>\n", " <th>Covariance Type:</th> <td>nonrobust</td> <th> LLR p-value: </th> <td> 0.000</td> \n", "</tr>\n", "</table>\n", "<table class=\"simpletable\">\n", "<tr>\n", " <td></td> <th>coef</th> <th>std err</th> <th>z</th> <th>P>|z|</th> <th>[0.025</th> <th>0.975]</th> \n", "</tr>\n", "<tr>\n", " <th>const</th> <td> -1.2412</td> <td> 0.160</td> <td> -7.751</td> <td> 0.000</td> <td> -1.555</td> <td> -0.927</td>\n", "</tr>\n", "<tr>\n", " <th>S</th> <td> -3.8163</td> <td> 0.121</td> <td> -31.607</td> <td> 0.000</td> <td> -4.053</td> <td> -3.580</td>\n", "</tr>\n", "<tr>\n", " <th>LPE</th> <td> 0.5044</td> <td> 0.181</td> <td> 2.788</td> <td> 0.005</td> <td> 0.150</td> <td> 0.859</td>\n", "</tr>\n", "<tr>\n", " <th>NP</th> <td> -0.3592</td> <td> 0.026</td> <td> -13.569</td> <td> 0.000</td> <td> -0.411</td> <td> -0.307</td>\n", "</tr>\n", "<tr>\n", " <th>ANH</th> <td> 0.0038</td> <td> 0.001</td> <td> 6.067</td> <td> 0.000</td> <td> 0.003</td> <td> 0.005</td>\n", "</tr>\n", "<tr>\n", " <th>TIC</th> <td> 0.6188</td> <td> 0.027</td> <td> 22.820</td> <td> 0.000</td> <td> 0.566</td> <td> 0.672</td>\n", "</tr>\n", "<tr>\n", " <th>Newborn</th> <td> -1.4851</td> <td> 0.113</td> <td> -13.157</td> <td> 0.000</td> <td> -1.706</td> <td> -1.264</td>\n", "</tr>\n", "</table>" ], "text/plain": [ "<class 'statsmodels.iolib.summary.Summary'>\n", "\"\"\"\n", " Logit Regression Results \n", "==============================================================================\n", "Dep. Variable: left No. Observations: 12000\n", "Model: Logit Df Residuals: 11993\n", "Method: MLE Df Model: 6\n", "Date: Fri, 27 May 2022 Pseudo R-squ.: 0.2131\n", "Time: 08:04:49 Log-Likelihood: -4254.5\n", "converged: True LL-Null: -5406.7\n", "Covariance Type: nonrobust LLR p-value: 0.000\n", "==============================================================================\n", " coef std err z P>|z| [0.025 0.975]\n", "------------------------------------------------------------------------------\n", "const -1.2412 0.160 -7.751 0.000 -1.555 -0.927\n", "S -3.8163 0.121 -31.607 0.000 -4.053 -3.580\n", "LPE 0.5044 0.181 2.788 0.005 0.150 0.859\n", "NP -0.3592 0.026 -13.569 0.000 -0.411 -0.307\n", "ANH 0.0038 0.001 6.067 0.000 0.003 0.005\n", "TIC 0.6188 0.027 22.820 0.000 0.566 0.672\n", "Newborn -1.4851 0.113 -13.157 0.000 -1.706 -1.264\n", "==============================================================================\n", "\"\"\"" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model_hr.summary()" ] }, { "cell_type": "code", "execution_count": null, "id": "8046b296-fed4-47da-90f4-272f3593f146", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\u001b[0;31mSignature:\u001b[0m \u001b[0mmodel_hr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexog\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtransform\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mDocstring:\u001b[0m\n", "Call self.model.predict with self.params as the first argument.\n", "\n", "Parameters\n", "----------\n", "exog : array_like, optional\n", " The values for which you want to predict. see Notes below.\n", "transform : bool, optional\n", " If the model was fit via a formula, do you want to pass\n", " exog through the formula. Default is True. E.g., if you fit\n", " a model y ~ log(x1) + log(x2), and transform is True, then\n", " you can pass a data structure that contains x1 and x2 in\n", " their original form. Otherwise, you'd need to log the data\n", " first.\n", "*args\n", " Additional arguments to pass to the model, see the\n", " predict method of the model for the details.\n", "**kwargs\n", " Additional keywords arguments to pass to the model, see the\n", " predict method of the model for the details.\n", "\n", "Returns\n", "-------\n", "array_like\n", " See self.model.predict.\n", "\n", "Notes\n", "-----\n", "The types of exog that are supported depends on whether a formula\n", "was used in the specification of the model.\n", "\n", "If a formula was used, then exog is processed in the same way as\n", "the original data. This transformation needs to have key access to the\n", "same variable names, and can be a pandas DataFrame or a dict like\n", "object that contains numpy arrays.\n", "\n", "If no formula was used, then the provided exog needs to have the\n", "same number of columns as the original exog in the model. No\n", "transformation of the data is performed except converting it to\n", "a numpy array.\n", "\n", "Row indices as in pandas data frames are supported, and added to the\n", "returned prediction.\n", "\u001b[0;31mFile:\u001b[0m /opt/anaconda/envs/aiking/lib/python3.9/site-packages/statsmodels/base/model.py\n", "\u001b[0;31mType:\u001b[0m method\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "model_hr.predict?" ] }, { "cell_type": "code", "execution_count": null, "id": "177f018a-d746-4fb8-b1dc-83f71699ef6d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "7.641666666666667" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cutoff = 0.5\n", "(model_hr.predict(X) > cutoff).sum()*100/len(y)" ] }, { "cell_type": "code", "execution_count": null, "id": "5cc0ea1a-eae5-4979-af3d-c5b5e511e7fe", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th>left</th>\n", " <th>0</th>\n", " <th>1</th>\n", " </tr>\n", " <tr>\n", " <th>row_0</th>\n", " <th></th>\n", " <th></th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>False</th>\n", " <td>9464</td>\n", " <td>1619</td>\n", " </tr>\n", " <tr>\n", " <th>True</th>\n", " <td>536</td>\n", " <td>381</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ "left 0 1\n", "row_0 \n", "False 9464 1619\n", "True 536 381" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.crosstab(model_hr.predict(X) >cutoff, y)" ] }, { "cell_type": "code", "execution_count": null, "id": "b3563e23-3c27-410d-8a72-81e32b2a60ce", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(0.9464, 0.1905)" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "9464/(9464+536), 381/(1619+381)" ] }, { "cell_type": "code", "execution_count": null, "id": "55ab4840-1389-491a-aac8-7318dfffbaad", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.9235833333333333" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(9464+1619)/12000" ] }, { "cell_type": "code", "execution_count": null, "id": "64ca6dff-f5c6-4d1f-bfa0-64b40d55d091", "metadata": {}, "outputs": [], "source": [ "accuracy = (9464+381)/(9464+381+526+1619); accuracy" ] }, { "cell_type": "code", "execution_count": null, "id": "47df970f-6673-41e3-bf10-a06dbbbcfa55", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "S -31.606505\n", "TIC 22.820109\n", "NP -13.569440\n", "Newborn -13.156788\n", "const -7.751316\n", "ANH 6.067180\n", "LPE 2.788130\n", "dtype: float64" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model_hr.tvalues[model_hr.tvalues[model_hr.pvalues <= 0.05].abs().sort_values(ascending=False).index]" ] }, { "cell_type": "code", "execution_count": null, "id": "d474039f-918e-4dcb-869a-782ebdef665d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>S</th>\n", " <th>LPE</th>\n", " <th>NP</th>\n", " <th>ANH</th>\n", " <th>TIC</th>\n", " <th>Newborn</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>0.38</td>\n", " <td>0.53</td>\n", " <td>2</td>\n", " <td>157</td>\n", " <td>3</td>\n", " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>0.80</td>\n", " <td>0.86</td>\n", " <td>5</td>\n", " <td>262</td>\n", " <td>6</td>\n", " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>0.11</td>\n", " <td>0.88</td>\n", " <td>7</td>\n", " <td>272</td>\n", " <td>4</td>\n", " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>0.72</td>\n", " <td>0.87</td>\n", " <td>5</td>\n", " <td>223</td>\n", " <td>5</td>\n", " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>0.37</td>\n", " <td>0.52</td>\n", " <td>2</td>\n", " <td>159</td>\n", " <td>3</td>\n", " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>11995</th>\n", " <td>0.90</td>\n", " <td>0.55</td>\n", " <td>3</td>\n", " <td>259</td>\n", " <td>2</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>11996</th>\n", " <td>0.74</td>\n", " <td>0.95</td>\n", " <td>5</td>\n", " <td>266</td>\n", " <td>4</td>\n", " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>11997</th>\n", " <td>0.85</td>\n", " <td>0.54</td>\n", " <td>3</td>\n", " <td>185</td>\n", " <td>3</td>\n", " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>11998</th>\n", " <td>0.33</td>\n", " <td>0.65</td>\n", " <td>3</td>\n", " <td>172</td>\n", " <td>5</td>\n", " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>11999</th>\n", " <td>0.50</td>\n", " <td>0.73</td>\n", " <td>4</td>\n", " <td>180</td>\n", " <td>3</td>\n", " <td>0</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>12000 rows × 6 columns</p>\n", "</div>" ], "text/plain": [ " S LPE NP ANH TIC Newborn\n", "0 0.38 0.53 2 157 3 0\n", "1 0.80 0.86 5 262 6 0\n", "2 0.11 0.88 7 272 4 0\n", "3 0.72 0.87 5 223 5 0\n", "4 0.37 0.52 2 159 3 0\n", "... ... ... .. ... ... ...\n", "11995 0.90 0.55 3 259 2 1\n", "11996 0.74 0.95 5 266 4 0\n", "11997 0.85 0.54 3 185 3 0\n", "11998 0.33 0.65 3 172 5 0\n", "11999 0.50 0.73 4 180 3 0\n", "\n", "[12000 rows x 6 columns]" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "98ada868-95bb-4351-9c35-4f2678e89f76", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>S</th>\n", " <th>LPE</th>\n", " <th>NP</th>\n", " <th>ANH</th>\n", " <th>TIC</th>\n", " <th>Newborn</th>\n", " <th>left</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>0.38</td>\n", " <td>0.53</td>\n", " <td>2</td>\n", " <td>157</td>\n", " <td>3</td>\n", " <td>0</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>0.80</td>\n", " <td>0.86</td>\n", " <td>5</td>\n", " <td>262</td>\n", " <td>6</td>\n", " <td>0</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>0.11</td>\n", " <td>0.88</td>\n", " <td>7</td>\n", " <td>272</td>\n", " <td>4</td>\n", " <td>0</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>0.72</td>\n", " <td>0.87</td>\n", " <td>5</td>\n", " <td>223</td>\n", " <td>5</td>\n", " <td>0</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>0.37</td>\n", " <td>0.52</td>\n", " <td>2</td>\n", " <td>159</td>\n", " <td>3</td>\n", " <td>0</td>\n", " <td>1</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " S LPE NP ANH TIC Newborn left\n", "0 0.38 0.53 2 157 3 0 1\n", "1 0.80 0.86 5 262 6 0 1\n", "2 0.11 0.88 7 272 4 0 1\n", "3 0.72 0.87 5 223 5 0 1\n", "4 0.37 0.52 2 159 3 0 1" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_hr = pd.read_csv(\"DATA_3.02_HR2.csv\"); df_hr.head()" ] }, { "cell_type": "code", "execution_count": null, "id": "fac951c6-1207-4d7a-95c1-90506764e53f", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "*c* argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with *x* & *y*. Please use the *color* keyword-argument or provide a 2D array with a single row if you intend to specify the same RGB or RGBA value for all points.\n" ] }, { "data": { "text/plain": [ "<AxesSubplot:xlabel='Time in Company', ylabel='Attrition'>" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYkAAAEQCAYAAABFtIg2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8/fFQqAAAACXBIWXMAAAsTAAALEwEAmpwYAAApxklEQVR4nO3dfVRUdeI/8PfceeBBQGAccFAUJdPx+WnVIrU0RXMQslxccvvtqrg9rHksSzaPgLLuhu2axWq1Wq5GteUpNUnT0rXEFLN8SsRcBUEdHgQNERS4c39/+HXWCS/O4MydUd6vczoHZj7cz9tphjdz753PVUmSJIGIiOgmBE8HICIi78WSICIiWSwJIiKSxZIgIiJZLAkiIpLFkiAiIlmKlURhYSESExMRGxuLxMREFBUV3XTc5s2bERcXB7PZjLi4OJw/f16piERE9AsqpT4n8eSTT+Kxxx5DfHw8Nm7ciE8++QRr1661G3PkyBHMmzcPa9asgcFgwKVLl6DT6eDj46NERCIi+gVF3klUVlYiPz8fZrMZAGA2m5Gfn4+qqiq7cf/6178wbdo0GAwGAEBgYCALgojIgxQpCYvFgvDwcKjVagCAWq1GWFgYLBaL3biTJ0+ipKQETzzxBB599FGsWLEC/EA4EZHnaDwd4EaiKOL48eNYvXo16uvrMWPGDERERCAhIcHT0YiIWiVFSsJoNKKsrAyiKEKtVkMURZSXl8NoNNqNi4iIwLhx46DT6aDT6TB69GgcPnzYqZK4cOEyrFbn333o9QGorKxx+ufcjbmcw1zO89ZszOWcluYSBBVCQtrI3q9ISej1ephMJuTk5CA+Ph45OTkwmUwIDQ21G2c2m/H1118jPj4ejY2N2Lt3L2JjY52ay2qVWlQS13/WGzGXc5jLed6ajbmc445cip0Cm56ejuzsbMTGxiI7OxsLFy4EACQnJ+PIkSMAgAkTJkCv1+ORRx5BQkIC7rnnHjz++ONKRSQiol9Q7BRYpVRW1rSoTQ2GQFRUXHJDotvDXM5hLud5azbmck5LcwmCCnp9gPz9txOKiIjubiwJIiKSxZIgIiJZLAkiIpLFkiAiIlksCSIiksWSICIiWSwJIiKSxZIgIiJZLAkiIpLFkiAiIlksCSIiksWSICIiWSwJIiKSxZIgIiJZLAkiIpLFkiAiIlksCSIiksWSICIiWSwJIiKSxZIgIiJZLAkiIpLFkiAiIlksCSIiksWSICIiWSwJIiKSxZIgIiJZGqUmKiwsREpKCi5evIjg4GBkZmYiKirKbkxWVhY++OADhIWFAQAGDhyItLQ0pSISEdEvKFYSaWlpSEpKQnx8PDZu3IjU1FSsXbu2ybiEhATMmzdPqVhERNQMRXY3VVZWIj8/H2azGQBgNpuRn5+PqqoqJaYnIqIWUqQkLBYLwsPDoVarAQBqtRphYWGwWCxNxn7++eeIi4vDtGnTcODAASXiERGRDMV2NzliypQpeOqpp6DVarF7924888wz2Lx5M0JCQhzehl4f0OL5DYbAFv+sOzGXc5jLed6ajbmc445cipSE0WhEWVkZRFGEWq2GKIooLy+H0Wi0G2cwGGxfx8TEwGg04sSJExgyZIjDc1VW1sBqlZzOaDAEoqLiktM/527M5Rzmcp63ZmMu57Q0lyComv3jWpHdTXq9HiaTCTk5OQCAnJwcmEwmhIaG2o0rKyuzfX3s2DGcPXsWXbp0USIiERHdhGK7m9LT05GSkoIVK1YgKCgImZmZAIDk5GQ899xz6NOnD5YuXYqjR49CEARotVosWbLE7t0FEREpS7GSiI6Oxrp165rcvnLlStvX14uDiIi8Az9xTUREslgSREQkiyVBRESyvOpzEkTUulklCVXVV3C1wYpGlQCV1Qq1wL9lPYklQUQedbVexJ6jpdh58CzOna+FIACCSgUAaGi0IizEDw/0MWJ4vwgE+Gk9nLb1YUkQkUdIkoSdB87i4//8F4AKVxvEa3eI9uMslbXYmFuIDbmFGDekE+JioqBR892FUlgSRKS42iuNWLbuEIrLL6G+wXrL8fWN18Zs/a4Y3xWUY+6U/ggN8nV3TAIPXBORwuquNuIv7+1HUWm1QwVxo/oGK8ov1GHRmv24cOmqmxLSjVgSRKSolZvyUX6xDo2i82usAdcObtfUNeC1jw+2aJ02cg5LgogU8/3xCuSfrmpxQVxntUoov1iHrfuKXZSM5LAkiEgRkiTh39tPOL2LSU59gxWf7S5CfYN468HUYiwJIlLETyUXUVPX4PLt7jtW7vJt0v+wJIhIEd8fr3D5X/1XG0Tk5Ze6dJtkjyVBRIr46cxFuOMw8+myGjdsla5jSRCRIs7/fMUt271c14BG0TXHOagplgQRKUKS3HO6qkoFngrrRiwJIlKETqN205ZV0Gj4q8xd+MgSkSIiwwLcsl1DsK9tQUByPZYEESnC1DkEGrVrf5mrVEC3yGCXbpPssSSISBHDerWHCq4tCa1GwIP9O7h0m2SPJUFEiggJ9IEpKgSC4Lqi0Af5oosx0GXbo6ZYEkSkmN+O7e6yXU5ajYAZ5p5Q8XiEW7EkiEgx+ra+eGLMvdDd5tlIOq2AsYMj0cUY5KJkJIclQUSKGt43Aub7O7e4KHRaAUN6hGPSyK4uTkY3wyvTEZHizPd3gSHYD2u+OI6GRitEBz4Mp1IBWrWAR4d3xdhfRXI3k0JYEkTkEUN7tkePTiFYt/Mkvisoh6C64TrXN9BpBFgloHeXUCSOugfhof4eSNt6sSSIyGPaBvhghrknkh6+F4f+ex4nzvyMwtJq1DeI8PXRIELvj24dg9HvnnZo20bn6bitEkuCiDzO31eD+3q3x32929tuMxgCUVFxyYOpCFDwwHVhYSESExMRGxuLxMREFBUVyY49deoU+vXrh8zMTKXiERHRTShWEmlpaUhKSsLWrVuRlJSE1NTUm44TRRFpaWl4+OGHlYpGREQyFCmJyspK5Ofnw2w2AwDMZjPy8/NRVVXVZOw///lPPPjgg4iKilIiGhERNUORYxIWiwXh4eFQq68tFaxWqxEWFgaLxYLQ0FDbuIKCAuTm5mLt2rVYsWJFi+bS61u+0qTB4J0f72cu5zCX87w1G3M5xx25vObAdUNDAxYsWIC//vWvtjJpicrKmhZdgMRbD5Ixl3OYy3nemo25nNPSXIKgavaPa0VKwmg0oqysDKIoQq1WQxRFlJeXw2g02sZUVFSguLgYM2fOBABUV1dDkiTU1NQgIyNDiZhERPQLipSEXq+HyWRCTk4O4uPjkZOTA5PJZLerKSIiAnl5ebbvs7KyUFtbi3nz5ikRkYiIbkKxs5vS09ORnZ2N2NhYZGdnY+HChQCA5ORkHDlyRKkYRETkBMWOSURHR2PdunVNbl+5cuVNx8+aNcvdkYiI6Ba4CiwREcliSRARkSyWBBERyWJJEBGRLJYEERHJYkkQEZEslgQREcliSRARkSyWBBERyXLoE9clJSVYtmwZjh07htraWrv7du7c6Y5cRETkBRwqiblz5yIyMhLz5s2Dn5+fuzMREZGXcKgkTpw4gQ8//BCCwL1TREStiUO/9X/1q18hPz/f3VmIiMjLOPROokOHDpgxYwbGjBmDdu3a2d03e/ZstwQjIiLPc6gk6urq8NBDD6GxsRGlpaXuzkRERF7CoZL461//6u4cRETkhRy+6FBRURFycnJQXl6OsLAwmM1mREVFuTEaERF5mkMHrnfs2IFJkyahsLAQbdu2RWFhIR577DFs377d3fmIiMiDHHon8dprr2HFihUYNmyY7ba8vDxkZGRg9OjRbgtHRESe5dA7idLSUgwePNjutkGDBvEgNhHRXc6hkujRowfeffddu9tWr14Nk8nkllBEROQdHNrdlJ6ejqeffhpr166F0WiExWKBn58f3nrrLXfnIyIiD3KoJKKjo7F582YcPHjQdnZTv379oNVq3Z2PiIg8yOFTYDUaTZPjEkREdHeTLYnx48djy5YtAICRI0dCpVLddByXCiciunvJlkRGRobt61dffVWRMERE5F1kS+LGXUuVlZUYP358kzFffPGFe1IREZFXcOgU2Pnz59/09tTUVIcnKiwsRGJiImJjY5GYmIiioqImYz755BPExcUhPj4ecXFxWLt2rcPbJyIi12v2wHVJSQkAQJIk29c33qfT6RyeKC0tDUlJSYiPj8fGjRuRmprapARiY2MxadIkqFQq1NTUIC4uDkOGDEGPHj0cnoeIiFyn2ZIYM2YMVCoVJEnCmDFj7O5r164dZs2a5dAklZWVyM/Px+rVqwEAZrMZGRkZqKqqQmhoqG1cQECA7esrV66goaFB9oA5ERG5X7MlUVBQAACYOnUqsrOzWzyJxWJBeHg41Go1AECtViMsLAwWi8WuJABg+/btWLp0KYqLi/HCCy+ge/fuLZ6XiIhuj0Ofk7idgnDW6NGjMXr0aJw7dw7PPvssRowYga5duzr883p9wK0HyTAYAlv8s+7EXM5hLud5azbmco47csmWxPTp0/HOO+8AAJKSkmR3+7z//vu3nMRoNKKsrAyiKEKtVkMURZSXl8NoNMr+TEREBPr06YOdO3c6VRKVlTWwWiWHx19nMASiouKS0z/nbszlHOZynrdmYy7ntDSXIKia/eNatiQSEhJsX0+ePNnpiW+k1+thMpmQk5OD+Ph45OTkwGQyNdnVdPLkSURHRwMAqqqqkJeXh7Fjx97W3ERE1HKyJREXFwcAEEURxcXFePrpp506m+mX0tPTkZKSghUrViAoKAiZmZkAgOTkZDz33HPo06cPPvroI+zevRsajQaSJGHq1Kl44IEHWjwnERHdHpUkSbfcNzN06FDs2bMHguDQxyo8irublMFczvHWXID3ZmMu57hrd5NDv/UTEhLw4YcfOj05ERHd2Rw6u+nw4cPIzs7GO++8g/bt29sdxHbkwDUREd2ZHCqJX//61/j1r3/t7ixERORlHCqJrl27ol+/fk1uP3z4sMsDERGR93CoJH7/+9/jhx9+aHL7jBkzsG/fPpeHIrrTWSUJJWU1KCqtRkl5DVSCAEgSOoUFIMoYhI6GNlxyhu4IzZaE1WqFJEl2/11XXFxsW2aDiK6pu9qI/xw4g237SnC1wQoJEuobrLb7fbQCABX8fDQYNzQSI/t3gI+WryPyXs2WRM+ePW1/7fTs2dPuPkEQ8NRTT7kvGdEd5lhRFd767Ciu1ItoaLTedMzV/yuMqw0iPv36FL7IK8bTCb3RrWOwgkmJHNdsSWzfvh2SJOG3v/2t3fpNKpUKoaGh8PX1dXtAojvBtu+K8enXp1AvUw43U99oRX1NPf7+74NIGnMvRvSLcGNCopZptiQ6dOgA4NoqsNe/vtHq1avx+9//3j3JiO4QOw+edbogblTfaMUHX/4EnVbAsJ7tXZyO6PY49GG65cuX3/T2N99806VhiO40pVW1+PdXJ1pcENfVN1qxZksBqqqvuCgZkWs0+05iz549AK6t37R37167A9clJSVo06aNe9MRebm3PzuKBvH2CuK6BlHCqpx8vJQ00CXbI3KFZkvi+rWt6+vr8fLLL9tuV6lUMBgMWLBggXvTEXmxQks1LOcv49arnznGapVw8lw1LJWXYdTzDzDyDs2WxI4dOwAAL730EpYsWWK7vaCgwHad6lGjRrk3IZGX+vK7Epe9i7hOtFqx/fszmDqWV2Qk7+DQh+mWLFmCqqoqbNq0CRs2bEBBQQEGDx5se6dB1BodO33BZe8irrNagR8Lq1y7UaLb0GxJNDQ0YMeOHVi/fj1yc3PRqVMnTJgwAWfPnsWyZcug1+uVyknkVequNqKmrsEt2678+QoaRSs0au9fmp/ufs2WRExMDFQqFSZNmoRZs2ahV69eAMBlw6nVq75cD61GgFgvunzbgqDC5SuNaNum5Rf5InKVZv9U6d69Oy5duoRDhw7hyJEj+Pnnn5XKReTVXLyXqen2Xb0fi6iFmi2J9957D19++SViYmLw7rvvIiYmBk899RRqa2vR2NioVEYirxPor5VdeuN2Wa0S2vg6dLiQyO1uudOzQ4cOePbZZ7Ft2zb861//gsFggCAImDhxot0ZT0StSRtfLfzd9Is8JNAHWg0X/SPv4NSRscGDByMjIwO7d+/GggUL8NNPP7krF5HXuzcy2OXbFFQqmDqHuHy7RC3Voj+FfHx8YDabYTabXZ2H6I4xZnAkfjxVhasNrjt4rVGrMHpQR5dtj+5uFy5dxd6jpTj/8xW0C/VH9w5t0TUiyKVzcMcnUQt169gWoUE+sFTWumR7KhUQ0a4NOoUHumR7dPeqbxDx7uZj+OGnCgBAoyhBBUCrFaAP8sUfJ/Vx2af2eSI2UQupVCr8YWIvaDWueRlp1QJmTuzlkm3R3atRtOLvHx3EgRPn0ShKaBSvnQknAahvsKK0shaL136P8guu+eOFJUF0GzqFB2JiTBR0t1kUOo2AyQ/dg/ah/i5KRnervPwyFJddkj27TgJQV9+ItVuPu2Q+lgTRbXpkWGc8PDgSOm3LXk46jYC4mCgeiyCHbN572naFQzmSBPxU8rNLlp5nSRDdJpVKhccfjMb0CT3hq1NDLagc+jmNWgV/Hw2eiu+NCfdFuTck3RUaGq0orXJsN5JGrcJ/z97+B6B54JrIRX7VIwzdI4ORs6cIuw5ZoFJde1GL1v99elotqKDTCJAAPDigAx4Z1hkBflrPhaY7itUqQQUVJAc+8y9JsHvutZRiJVFYWIiUlBRcvHgRwcHByMzMRFRUlN2Y5cuXY/PmzRAEAVqtFnPmzMHw4cOVikgOaBStqL3aCOHnOtTUNcDfVwNB5dhfzq1BUBsdkh6+F5MfjMbx4osoLK1GkeUSJJUKAiR0MQYhyhiE7pHBXMCPnKbTCvDzUePyFUdWvJBccoxLsZJIS0tDUlIS4uPjbdeiWLt2rd2Yvn37Ytq0afDz80NBQQGmTp2K3Nxc+Pr6KhWTfsFqlXDo5Hn88FMF/nvmZ5z/+QoEQQWVSgWr1QqVSgWj3h/3dgzGfb3bo4vRtedo36m0GjV6d9Wjd9drKyUbDIGoqLjk4VR0p1OpVBg1sCO25J22ndUkJzjAB1Htb/90akVKorKyEvn5+Vi9ejUAwGw2IyMjA1VVVQgNDbWNu/FdQ/fu3SFJEi5evIj27XlxeKVdbRCxbV8xvtx/Bo2iFVduWO3U/i2shOKyGpSU1eCbw+cQGuiLuPujMLRXON9hELnB6MEd8Z8DZ5tdql6rETBldDeoXPAaVOT9rsViQXh4ONTqa+vRqNVqhIWFwWKxyP7Mhg0b0KlTJxaEB/xUchEpb+9Bzp7TqKlrsCsIObZztKtqsXZrAf7y3vc4f7HO/WGJWpkgfx1SnhiIIH8tfLT2a3xp1CpoNQJ+G9sd/e5p55L5vPLA9b59+/D666/j3Xffdfpn9fqAFs9rMHjnJ12VyiVJEtZuPobPdp1E/S1OsWvO1QYrikovYcG7+/BC0kDc1yfChSlvrbX/f2wJb83GXPLzv5sai9yDZ7Fp1ylUVV+Bj06N4f07YEJMF+jb+rlsLkVKwmg0oqysDKIoQq1WQxRFlJeXw2g0Nhl74MABvPjii1ixYgW6du3q9FyVlTWwtuCIvrfuM1YqlyRJWLv1OPYcLb2tgrjOapVwtV7Eq9nf43fjL+O+Xsq8I2zt/x9bwluzMdet9Y0KQd+oQQD+l8ta3+hUPkFQNfvHtSK7m/R6PUwmE3JycgAAOTk5MJlMdscjAODw4cOYM2cO3njjDdtV8EgZG3MLXVYQN2potGLNlgIcLeJ1m4nuRIqdg5eeno7s7GzExsYiOzsbCxcuBAAkJyfjyJEjAICFCxfiypUrSE1NRXx8POLj43H8uGs+Wk7yCi3V2JJX7PKCuK6+0Yq3Nx5FrUOn7RGRN1HsmER0dDTWrVvX5PaVK1favv7kk0+UikP/p6HRijc3/Oi2q6xdd6VexHvbjuMPXMCO6I7CT/O0cnuOlqK6tt7t8zSKVvzwUwXOnb/s9rmIyHVYEq2YJEnYvOe023Yz/ZJoteKr/SWKzEVErsGSaMVOWapx8fJVxeazWoFvfyx16ZXciMi9WBKtWH5RFcRbfLTf1dSCCkWWakXnJKKWY0m0YseLL7pklUhnNIhWnC71jnPMiejWWBKtWEl5jeJzNooSfjpzUfF5iahlWBKt2JWrnjk2UH1ZfmEyIvIuLIlWzCopu6vJNq/Cu7iIqOVYEq2YRu2Zpbxbei1oIlIeX62tmD5I+Ys5qQBEhrV8pV4iUhZLohXrFhms+Jw+OjWiO7RVfF4iahmWRCvWrWPbJhctcTfRKvESp0R3EJZEKzagm0Hxg9fhIX4wBLvugihE5F4siVbMz0eDIaYwCIIyB7B9dWo8MqyzInMRkWuwJFq58UM7Q61QSWjUAgZ1D1NkLiJyDZZEKxfRrg3GDI50+2mpOo2AP0zsBa2GTzmiOwlfsYSE4V0QHOADlZveUGg1Agb1CEOvLqG3HkxEXoUlQdCoBTyf2B/+Pq6/UKFGrUKEvg2ejO3u8m0TkfuxJAgAEBbsh5d/OwgBfloILnpWaDUCOhoC8FLSAMVPtSUi12BJkI1R3wYLpw1Bt47B8LnNYxRajYDhfY1IeWIg/NzwDoWIlMFXL9kJCfTBS78ZgNwjFnz41QlIEpy6kpyPVo02fhr8YWIvdOsY7L6gRKQIlgQ1oVKpMLxvBIb1DMe+Y+XYsvc0yi7UQacVUN9gtbtQkVYjQCOoUN9oRfdOwRg/tDNMUSEQ3HUUnIgUxZIgWVqNGjF9jIjpY0RNXQNOl15CUWk1qqqvQq1RQ7KKaB/aBp3bByIyLIDHHYjuQiwJckiAnxa9uoTaTmM1GAJRUcHLkBLd7XjgmoiIZLEkiIhIFkuCiIhkKVYShYWFSExMRGxsLBITE1FUVNRkTG5uLiZNmoTevXsjMzNTqWhERCRDsZJIS0tDUlIStm7diqSkJKSmpjYZExkZicWLF2P69OlKxSIiomYoUhKVlZXIz8+H2WwGAJjNZuTn56OqqspuXOfOnWEymaDR8KQrIiJvoEhJWCwWhIeHQ62+dh69Wq1GWFgYLBaLEtMTEVEL3XV/suv1AS3+WYMh0IVJXIe5nMNczvPWbMzlHHfkUqQkjEYjysrKIIoi1Go1RFFEeXk5jEajy+eqrKyB1er8dZu99cNhzOUc5nKet2ZjLue0NJcgqJr941qR3U16vR4mkwk5OTkAgJycHJhMJoSG8iI0RETeTLGzm9LT05GdnY3Y2FhkZ2dj4cKFAIDk5GQcOXIEALB//36MGDECq1evxr///W+MGDECu3btUioiERH9gmLHJKKjo7Fu3bomt69cudL29eDBg/HNN98oFYmIiG6Bn7gmIiJZLAkiIpLFkiAiIlksCSIiksWSICIiWSwJIiKSxZIgIiJZLAkiIpLFkiAiIlksCSIiksWSICIiWSwJIiKSxZIgIiJZLAkiIpLFkiAiIlksCSIiksWSICIiWSwJIiKSxZIgIiJZLAkiIpLFkiAiIlksCSIiksWSICIiWSwJIiKSpfF0AE87U1GDbftK8HNdA3p2CsaD/TvAR6f2dCwiIq/Qqkui4PQFLFt3CI2iFVYJKCiqwu4jFiz4f7+CVsM3WURErfo34ftf/oT6xmsFAQANjVZUXLyC/QXlng1GROQlWnVJnKu83OS2qw0iTpz52QNpiIi8j2IlUVhYiMTERMTGxiIxMRFFRUVNxoiiiIULF+Lhhx/GmDFjsG7dOrdmCgn0aXKbTiOgg8HfrfMSEd0pFCuJtLQ0JCUlYevWrUhKSkJqamqTMZs2bUJxcTG2bduGjz76CFlZWThz5ozbMj02Mhq6G449CCrAV6fGfb2MbpuTiOhOosiB68rKSuTn52P16tUAALPZjIyMDFRVVSE0NNQ2bvPmzZg8eTIEQUBoaCgefvhhfPHFF5gxY4bDcwmCyuGxMX2MCA7wwVf7S1BzpRHRHYIwbkgnBPhrHf/HKcCZf5OSmMs53poL8N5szOWcluS61c8oUhIWiwXh4eFQq6+dWqpWqxEWFgaLxWJXEhaLBREREbbvjUYjSktLnZorJKSNU+NH6AMwYnAnp35GaXp9gKcj3BRzOcdbcwHem425nOOOXK36wDURETVPkZIwGo0oKyuDKIoArh2gLi8vh9FobDLu3Llztu8tFgvat2+vREQiIroJRUpCr9fDZDIhJycHAJCTkwOTyWS3qwkAxo0bh3Xr1sFqtaKqqgpfffUVYmNjlYhIREQ3oZIkSVJiopMnTyIlJQXV1dUICgpCZmYmunbtiuTkZDz33HPo06cPRFHEokWLsHv3bgBAcnIyEhMTlYhHREQ3oVhJEBHRnYcHromISBZLgoiIZLEkiIhIFkuCiIhktZrrSVy4cAEvvfQSiouLodPp0LlzZyxatKjJabh1dXX405/+hKNHj0KtVmPevHl46KGHvCJbSkoKvv32W4SEhAC4dsrw008/7dZszzzzDM6cOQNBEODv748FCxbAZDLZjRFFEX/+85+xa9cuqFQqzJw5E5MnT/Z4rqysLHzwwQcICwsDAAwcOBBpaWluzQUA//jHP5CVlYVNmzbh3nvvtbvPE88vR3J54rkFAKNGjYJOp4OPz7XFNufOnYvhw4fbjfHEY+ZILk88ZlevXsVf/vIX7NmzBz4+Pujfvz8yMjLsxrj89Si1EhcuXJD27t1r+/6VV16R/vSnPzUZl5WVJc2fP1+SJEkqLCyU7r//fqmmpsYrss2bN09677333Jrll6qrq21ff/nll1JCQkKTMevXr5emTZsmiaIoVVZWSsOHD5dKSko8nuuNN96QXnnlFbfm+KUff/xRmj59uvTQQw9Jx48fb3K/J55fjuTyxHNLkiTZPDfyxGPmSC5PPGYZGRnS4sWLJavVKkmSJFVUVDQZ4+rXY6vZ3RQcHIyhQ4favu/fv7/dp7uv27Jli+2zGVFRUejduze++eYbr8jmCYGBgbava2pqoFI1XQxMbmFGT+dSWn19PRYtWoT09HTZMZ54fjmSy5t54jHzRpcvX8aGDRswe/Zs2/O9Xbt2Tca5+vXYanY33chqteLDDz/EqFGjmtx37tw5dOjQwfZ9SxYZdFc2AFi9ejU++ugjREZG4oUXXkB0dLTbM82fPx+7d++GJElYtWpVk/tdsTCjO3IBwOeff47c3FwYDAbMmjULAwYMcFue119/HRMnTkTHjh1lx3ji+eVILsAzzy3g2q4cSZIwaNAgPP/88wgKCrK731OvyVvlApR9zEpKShAcHIx//OMfyMvLQ5s2bTB79mwMHjzYbpyrX4+t5p3EjTIyMuDv74+pU6d6OkoTzWWbM2cOvvzyS2zatAljx47FjBkzbOthudPixYuxc+dOzJkzB0uWLHH7fI66Va4pU6Zg+/bt2LRpE6ZPn45nnnkGFy5ccEuWAwcO4Mcff0RSUpJbtt9Sjuby1HPr/fffx2effYZPPvkEkiRh0aJFbp/TEY7kUvoxE0URJSUl6NmzJz799FPMnTsXs2bNQk1NjdvmBFphSWRmZuL06dNYtmwZBKHpPz8iIgJnz561fa/kIoO3yhYeHm67PSEhAbW1tYq+y0lISEBeXl6TX7SeXphRLpfBYIBWe+3aIDExMTAajThx4oRbMnz33Xc4efIkRo8ejVGjRqG0tBTTp09Hbm6u3Tiln1+O5vLUc+v6Ip86nQ5JSUn44YcfmozxxGvSkVxKP2ZGoxEajQZmsxkA0K9fP4SEhKCwsLDJOFe+HltVSSxduhQ//vgjli9fDp1Od9Mx48aNw0cffQQAKCoqwpEjR5qc1eCpbGVlZbavd+3aBUEQEB4e7rZMly9fhsVisX2/Y8cOtG3bFsHBwXbjlF6Y0dFcNz5ex44dw9mzZ9GlSxe3ZJo5cyZyc3OxY8cO7NixA+3bt8c777yDBx54wG6c0s8vR3Mp/dwCgNraWly6dAkAIEkSNm/e3OQMNUD5x8zRXEo/ZqGhoRg6dKhtbbvCwkJUVlaic+fOduNc/XpsNcckTpw4gbfffhtRUVGYMmUKAKBjx45Yvnw54uPj8c9//hPh4eGYPn06UlJSMGbMGAiCgEWLFiEgwL0XGHE027x581BZWQmVSoWAgAC8+eab0Gjc97+wrq4Os2fPRl1dHQRBQNu2bfHWW29BpVLZLcwYHx+PQ4cOYezYsQCAZ599FpGRkR7PtXTpUhw9ehSCIECr1WLJkiUwGAxuyyXH088vR3Ip/dwCrl2xctasWRBFEVarFdHR0bZTlD35mDmayxOP2cKFC/Hyyy8jMzMTGo0GS5YsQVBQkFtfj1zgj4iIZLWq3U1EROQclgQREcliSRARkSyWBBERyWJJEBGRLJYE3ZEmTJiAvLw8t8/z1ltvYf78+W6fh8hb8RRY8ko3rrFUV1cHnU4HtVoN4Nq54hMnTvRUNKccPnwYWVlZOHDgAARBQKdOnfCb3/wGjz32mKejETmEJUFeb9SoUfjzn/+M+++/39NRnHLgwAFMmzYNTz/9NB5//HGEhITg6NGjWLlyJV5//XVPxyNyCHc30R1p1KhR+PbbbwFcu7jQc889h7lz52LAgAGIi4tDYWEh3n77bdx3330YOXKk3VpFly5dwssvv4wHHngAw4cPx2uvvSa7MFtWVhbmzp0LADhz5gy6d++O9evX48EHH8TQoUPx5ptvymZcsmQJEhISMHPmTISGhkKlUqF37952BfHxxx9jzJgxGDJkCJ566im7pR66d++O999/H2PHjsWAAQOwbNkyFBcXY8qUKRg4cCBmz56N+vp6AEBeXh5GjBiBt956C0OHDsWoUaPw2Wef2ba1c+dOJCQkYODAgRg5ciSysrJs9zX376qoqEC/fv3s1sU6evQohg0bhoaGhlv/j6I7HkuC7gr/+c9/EB8fj++++w4mkwnTp0+H1WrFN998g2effRapqam2sSkpKdBoNNi2bRs2bNiA3bt3Y926dQ7P9f333+OLL77AmjVrsHz5cpw8ebLJmLq6Ohw8eLDZNXP27NmDv//971i2bBlyc3PRoUMHPP/883ZjcnNz8emnn+Ljjz/GqlWrsGDBArz66qv4+uuvceLECXz++ee2sefPn8eFCxewa9cuvPLKK0hNTcWpU6cAAH5+fsjMzMT+/fvx9ttv48MPP8RXX311y3+XwWDAkCFDsGXLFtu4jRs3YsKECbbFE+nuxpKgu8LgwYMxfPhwaDQajBs3DhcuXMDMmTOh1WrxyCOP4OzZs6iursb58+fx9ddf4+WXX4a/vz/0ej1+97vf2f2yvZU//vGP8PX1RY8ePdCjRw8UFBQ0GVNdXQ2r1drsWlGbNm3CY489hl69ekGn0+H555/HwYMHcebMGduYGTNmICAgAN26dcO9996LmJgYREZGIjAwECNGjEB+fr7dNmfPng2dTochQ4Zg5MiRtl/uQ4cORffu3SEIAnr06IEJEyZg3759Dv27Hn30Udu7ElEU8fnnnyM+Pt7hx4vubK1mgT+6u+n1etvXvr6+CAkJsR3o9vX1BXBtdc/y8nI0NjbarYJqtVptS0M74sargfn5+aG2trbJmKCgIAiCgIqKCtkL0ZSXl6NXr16279u0aYPg4GCUlZXZLhB041w+Pj5Nvj9//rzdnP7+/rbvIyIiUF5eDgA4dOgQ/va3v+HEiRNoaGhAfX09xo0b59C/a/To0UhLS0NJSQkKCwsREBCAvn37NvMI0d2EJUGtSvv27aHT6bB37163rtjp5+eH/v37Y9u2bRg2bNhNx4SFhdldJ6G2thYXL15s8XLT1dXVqK2ttRWFxWJBt27dAAAvvPACpk6dilWrVsHHxweLFy92+AJMPj4+GD9+PD777DOcOnWK7yJaGe5uolYlLCwMMTExeOWVV1BTUwOr1Yri4uImu15c4cUXX8T69euxatUq2y/kgoICzJkzBwBgNpvx6aef4tixY6ivr8fSpUvRt2/fW15mtDlZWVmor6/H/v37sXPnTtu7hcuXL6Nt27bw8fHB4cOHkZOT49R24+PjsX79euzYsYMl0crwnQS1OkuWLMHf/vY3PPLII7h8+TIiIyORnJzs8nkGDhyINWvW4I033sCbb74JtVqNzp0744knngAA3H///Zg9ezZmzZqF6upqDBgwAK+99lqL52vXrh2CgoIwfPhw+Pn5IT093barKy0tDZmZmVi0aBGGDBmC8ePHo7q62uFtDxo0CIIgoFevXnbXm6a7Hz8nQXQXyMvLw4svvohvvvnGbXM8+eSTiIuLw+TJk902B3kf7m4iols6fPgw8vPzMX78eE9HIYVxdxMRNWvevHn46quvMH/+fI9dapU8h7ubiIhIFnc3ERGRLJYEERHJYkkQEZEslgQREcliSRARkSyWBBERyfr/MTLQu+oZ91EAAAAASUVORK5CYII=\n", "text/plain": [ "<Figure size 432x288 with 1 Axes>" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df_hr.groupby(['TIC'])['left'].agg(['mean', 'sum']).reset_index().plot.scatter(y='mean',x='TIC', s='sum', xlabel='Time in Company', ylabel='Attrition',ylim=(0,0.6) )" ] }, { "cell_type": "code", "execution_count": null, "id": "7d62f30a-835b-44d1-a4c8-3c040797b727", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>TIC</th>\n", " <th>mean</th>\n", " <th>sum</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>2</td>\n", " <td>0.010262</td>\n", " <td>31</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>3</td>\n", " <td>0.165727</td>\n", " <td>882</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>4</td>\n", " <td>0.240777</td>\n", " <td>496</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>5</td>\n", " <td>0.444240</td>\n", " <td>482</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>6</td>\n", " <td>0.212891</td>\n", " <td>109</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " TIC mean sum\n", "0 2 0.010262 31\n", "1 3 0.165727 882\n", "2 4 0.240777 496\n", "3 5 0.444240 482\n", "4 6 0.212891 109" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_hr.groupby(['TIC'])['left'].agg(['mean', 'sum']).reset_index()" ] }, { "cell_type": "code", "execution_count": null, "id": "f7a3908c-8464-4e72-9136-5d868c082973", "metadata": {}, "outputs": [], "source": [ "df_hr['S_ranked'] = -np.ceil(df_hr['S'].rank(method='max')/600)" ] }, { "cell_type": "code", "execution_count": null, "id": "a5798cfc-4a2f-4b6d-bab7-3f16ecfccd20", "metadata": {}, "outputs": [], "source": [ "df_hr['attrition'] = df_hr.groupby('S_ranked')['left'].transform('mean')" ] }, { "cell_type": "code", "execution_count": null, "id": "dd1af100-2d5a-4151-a5a2-f1b1dd1f28e1", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "*c* argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with *x* & *y*. Please use the *color* keyword-argument or provide a 2D array with a single row if you intend to specify the same RGB or RGBA value for all points.\n" ] }, { "data": { "text/plain": [ "<AxesSubplot:xlabel='S_ranked', ylabel='attrition'>" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYkAAAEMCAYAAAAxoErWAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8/fFQqAAAACXBIWXMAAAsTAAALEwEAmpwYAAAdVklEQVR4nO3dfVRUdeLH8Q8zSthPXIMAh54s3Yyt1LUHe9DWTAMLDuCGGD2ZR8pK2+20KGWhrC3mbr/aFnU7m6EZPUkpLqOpmbtlZZrWBifKY4ZZMoCCbj7gYsP9/eFP1hEuDMwj8H6d4znD8L1zPw4XPnO/987cEMMwDAEA0AJLoAMAAIIXJQEAMEVJAABMURIAAFOUBADAFCUBADBFSQAATPUIdABvO3DgiBob2//Wj8jI3qqtPeyDRN5BPs+Qz3PBnpF8HWOxhOiss/7H9PtdriQaG40OlcTJZYMZ+TxDPs8Fe0byeR/TTQAAU5QEAMAUJQEAMEVJAABMdbkD1wDQ3Ux+emPT7YLs0V59bPYkAKATO7UgWvraU5QEAHRSZoXgzaKgJAAApigJAIApSgIAOimzg9TePHhNSQBAJ3Z6IXj77CZOgQWATs7bxXAq9iQAAKYoCQCAKUoCAGCKkgAAmKIkAACmKAkAgClKAgBgipIAAJiiJAAApvz2juuKigplZ2fr4MGD6tu3r+bPn6/+/fu7jKmtrdVjjz0mh8Ohn376ScOHD9cTTzyhHj14YzgABILf9iRmz56tjIwMrVu3ThkZGcrJyWk25oUXXtCAAQNUUlKiv//97/ryyy+1fv16f0UEAJzGLyVRW1ur8vJyJSYmSpISExNVXl6uuro6l3EhISE6cuSIGhsb1dDQoOPHjysmJsYfEQEALfBLSTgcDsXExMhqtUqSrFaroqOj5XA4XMY9+OCDqqio0IgRI5r+XXHFFf6ICABoQVBN9q9du1aDBg3Syy+/rCNHjigzM1Nr165VQkKC248RGdm7w+uPigrv8LL+QD7PkM9zwZ6RfN7nl5Kw2Wyqrq6W0+mU1WqV0+lUTU2NbDaby7jCwkLl5eXJYrEoPDxco0eP1pYtW9pVErW1h9XYaLQ7Y1RUuPbtO9Tu5fyFfJ4hn+eCPSP5OsZiCWn1xbVfppsiIyMVFxcnu90uSbLb7YqLi1NERITLuHPPPVcffPCBJKmhoUGbN2/Wz3/+c39EBAC0wG9nN82ZM0eFhYWKj49XYWGhcnNzJUmZmZkqKyuTJD3++OPavn27kpKSlJKSov79+2vChAn+iggAOE2IYRjtn5sJYkw3BQb5PBPs+aTgz0i+jgmK6SYAQOdESQAATFESAABTlAQAwBQlAQAwRUkAAExREgAAU5QEAMAUJQEAMEVJAABMURIAAFOUBADAFCUBADBFSQAATAXV5UsBIBAmP72x6XZB9ugAJgk+7EkA6NZOLYiWvu7uKAkA3ZZZIVAU/0VJAABMURIAAFOUBIBuy+wgNQev/4uSANCtnV4IFIQrToEF0O1RDObYkwAAmKIkAACmKAkAgClKAgBgipIAAJiiJAAApigJAIApSgIAYIqSAACY4h3XADo9LhrkO+xJAOjUuGiQb1ESADotLhrke5QEAMAUJQEAMOW3kqioqFB6erri4+OVnp6u3bt3tzhuzZo1SkpKUmJiopKSkrR//35/RQTQyXDRIN/zW0nMnj1bGRkZWrdunTIyMpSTk9NsTFlZmRYsWKCCggLZ7Xa99tprCg8P91dEAJ0QFw3yLb+cAltbW6vy8nItWbJEkpSYmKi5c+eqrq5OERERTeOWLl2qyZMnKyoqSpIoCABuKcgeraiocO3bdyjQUbocv+xJOBwOxcTEyGq1SpKsVquio6PlcDhcxu3atUvff/+97rjjDqWmpmrRokUyDMMfEQEALQiqN9M5nU7t2LFDS5YsUUNDg6ZMmaLY2FilpKS4/RiRkb07vP6oqODecyGfZ8jnuWDPSD7v80tJ2Gw2VVdXy+l0ymq1yul0qqamRjabzWVcbGysEhISFBoaqtDQUN10000qLS1tV0nU1h5WY2P79z6CfVeVfJ4hn+eCPSP5OsZiCWn1xbVfppsiIyMVFxcnu90uSbLb7YqLi3M5HiGdOFbx4YcfyjAMHT9+XJ988okuueQSf0QEALTAb2c3zZkzR4WFhYqPj1dhYaFyc3MlSZmZmSorK5Mk3XrrrYqMjNQtt9yilJQUDRw4ULfddpu/IgIAThNidLEjw0w3BQb5PBPs+aTgz0i+jgmK6SYAQOdESQAATFESAABTlAQAwBQlAQAwRUkAAExREgAAU5QEAMAUJQEAMNWuD/j79ttv9fXXX+vo0aMu9/PRGQDQNbldEi+88IIWLlyoSy65RGFhYU33h4SEUBIA0EW5XRIvv/yyioqK+FRWAOhG3D4mERYWposuusiXWQAAQcbtkvjNb36jp556SjU1NWpsbHT5BwDomtyebsrOzpYkFRUVNd1nGIZCQkL01VdfeT8ZACDg3C6J9957z5c5AABByO2SOOeccyRJjY2N2r9/v84++2xZLLzNAgC6Mrf/yh8+fFgzZszQ4MGDdcMNN2jw4MGaOXOmDh0KvistAQC8w+2SeOqpp1RfX6+SkhKVlpaqpKRE9fX1euqpp3yZDwAQQG5PN23atEkbNmxQr169JEkXXnih5s2bp7Fjx/osHAAgsNzekzjjjDNUV1fnct+BAwcUGhrq9VAAgODg9p7EbbfdpsmTJ2vSpEmKjY1VZWWlli5dqgkTJvgyHwAggNwuiQceeEDR0dGy2+2qqalRdHS0pkyZwuc2AUAX5nZJnPwgP0oBALqPVkuiuLhYKSkpkqS33nrLdBzFAQBdU6slsXr16qaSWLVqVYtj+KhwAOi6Wi2JF198sen2K6+84vMwAIDg4vYpsCf3KE43fvx4b2UBAAQZt0viu+++a3afYRj64YcfvBoIABA82jy7acaMGZKk48ePN90+ae/evRo4cKBvkgEAAq7Nkjj//PNbvC1Jw4YNU0JCgvdTAQCCQpslMW3aNEnSkCFDNHLkSJ8HAgAEj1ZL4tNPP9VVV111YmCPHtq8eXOL46699lrvJwMABFyrJZGbmyu73S5JmjVrVotjQkJCuGodAHRRrZbEyYKQpHfffVdWq9XngQAAwcOtU2CdTqd++ctfqqGhwdd5AABBxK2SsFqt6t+/vw4cONDhFVVUVCg9PV3x8fFKT0/X7t27Tcd+++23GjJkiObPn9/h9QEAPOf2p8AmJSVp6tSpuvvuu9WvXz+X77lz4Hr27NnKyMhQcnKyVq1apZycHC1btqzZOKfTqdmzZ2vMmDHuRgMA+IjbJfH6669LkvLz813ud+fAdW1trcrLy7VkyRJJUmJioubOnau6ujpFRES4jP3b3/6mUaNG6ejRozp69Ki78QAAPuB2SWzcuLHDK3E4HIqJiWk68G21WhUdHS2Hw+FSEl9//bU+/PBDLVu2TIsWLerw+gAA3tGuK9P99a9/bXb/tGnTtGDBAo+DHD9+XE8++aTmzZvn0VlUkZG9O7xsVFR4h5f1B/J5hnyeC/aM5PM+t0tiy5YtLd6/devWNpe12Wyqrq6W0+mU1WqV0+lUTU2NbDZb05h9+/Zpz549uu+++yRJP/74owzD0OHDhzV37lx3Y6q29rAaGw23x58UFRWuffsOtXs5fyGfZ8jnuWDPSL6OsVhCWn1x3WZJPP/885JOvNI/efuk77//XrGxsW2GiIyMVFxcnOx2u5KTk2W32xUXF+cy1RQbG+tSRPn5+Tp69KhmzpzZ5uMDAHyjzZKoqqqSdOJjwU/ePslms2n69OlurWjOnDnKzs7WokWL1KdPn6bTWzMzM/Xwww/r8ssvb292AICPhRiG4dbczPLlyzVhwgRf5/EY002BQT7PBHs+Kfgzkq9j2ppucvuiQ88880yL9/PhfgDQdbldEsePH2/xvsbGRq8GAgAEjzaPSWRkZCgkJEQNDQ264447XL5XVVWloUOH+iobACDA2iyJtLQ0GYahsrIy3XbbbU33h4SEKDIyUtdcc41PAwIAAqfNkkhNTZV04sp0P/vZz1RaWqoDBw7IMAxVV1dr1apVLuUBAOg63H4zXUVFhbKysnTBBRfom2++0cCBA7Vz504NGzaMkgCALsrtkvjzn/+svLw8jRs3TldddZWKi4v19ttv65tvvvFlPgBAALl9dlNlZaXGjRvncl9qaqqKi4u9nQkAECTcLonIyEjt379fknTOOefo888/1549ezgFFgC6MLdLIi0tTdu3b5ckTZo0SXfffbeSk5N1++23+ywcACCw3D4mcfLTWSUpJSVFV199terr6zVgwACfBAMABJ7bJXE6dz79FQDQubk93QQA6H4oCQCAKUoCAGCKkgAAmKIkAACmKAkAgClKAgBgipIAAJiiJAAApigJAIApSgIAYIqSAACYoiQAAKYoCQCAKUoCAGCKkgAAmKIkAACmKAkAgClKAgBgipIAAJiiJAAApigJAIApSgIAYIqSAACY6uGvFVVUVCg7O1sHDx5U3759NX/+fPXv399lzMKFC7VmzRpZLBb17NlTjzzyiEaOHOmviACA0/itJGbPnq2MjAwlJydr1apVysnJ0bJly1zGDB48WJMnT1avXr309ddf684779SHH36osLAwf8UEAJzCL9NNtbW1Ki8vV2JioiQpMTFR5eXlqqurcxk3cuRI9erVS5I0aNAgGYahgwcP+iMiAKAFfikJh8OhmJgYWa1WSZLValV0dLQcDofpMsXFxTr//PPVr18/f0QEALTAb9NN7bF161Y9//zzKigoaPeykZG9O7zeqKjwDi/rD+TzDPk8F+wZyed9fikJm82m6upqOZ1OWa1WOZ1O1dTUyGazNRv7+eefKysrS4sWLdJFF13U7nXV1h5WY6PR7uWiosK1b9+hdi/nL+TzDPk8F+wZydcxFktIqy+u/TLdFBkZqbi4ONntdkmS3W5XXFycIiIiXMaVlpbqkUce0V/+8hddeuml/ogGAGiF394nMWfOHBUWFio+Pl6FhYXKzc2VJGVmZqqsrEySlJubq2PHjiknJ0fJyclKTk7Wjh07/BURAHAavx2TGDBggIqKiprd/+KLLzbdfvvtt/0VBwDgBt5xDQAwFZRnNwFAZzL56Y1NtwuyRwcwifexJwEAHji1IFr6urOjJKDJT29s+gfAfWa/M13pd4nppiDg6a6qJ8u39Cqoq+0uA+g49iQCzNNdVU+WD5ZXQezJAMGLkgggT/9IB8sfeU909flcdG1me91daW+ckkDAdIWSA04vhK5UEBLHJLq1guzRLf5B9uS4RiB+QQK9fqArb3fsSQSQp7uq3tjV9fRVUKCniwK9fqCroyQCzNM/0t7Y1S3IHt30rz08nS7ytOSYrgJ8j+mmIODprmpn3tU9fcqrM/9fgK6IkkDAUQxA8GK6CR0W6NP/Ar1+oDugJOCRQJ/+F+j1A10d003wWEH26IBempFiAHyHPQkAgClKAgBgipIAAJiiJAAApigJAIApSgIAYIqSAACYoiQAAKYoCQCAKd5xjW6PT6EFzLEngW6NixYBraMk0G1x0SKgbUw3AUCABfOUJ3sSABBAwT7lSUmg2+KiRQi0zjDlSUmgW+OiRUDrOCbhBcE8n4i28TMDzFESHmppPpE/OvA3T1+odPblO6uC7NEtTi0F03PAdJMHOsN8Inxv8tMbm/51dPmkR1d5tHxrX3f15Tu7YJ/ypCQADwT6D6SnL1Q6+/JdRUH26KZ/wcZvJVFRUaH09HTFx8crPT1du3fvbjbG6XQqNzdXY8aM0dixY1VUVOSXbJ6+kkP3xB9IdAd+K4nZs2crIyND69atU0ZGhnJycpqNKSkp0Z49e7R+/Xq9+eabys/P1w8//ODTXJ68kuMUSgBdnV9Kora2VuXl5UpMTJQkJSYmqry8XHV1dS7j1qxZo7S0NFksFkVERGjMmDFau3atz3J545VcsM8nomvz9IVKZ18evueXs5scDodiYmJktVolSVarVdHR0XI4HIqIiHAZFxsb2/S1zWZTVVVVu9YVGdnbK5mjosLdHlvyv8leWWdb2pMpELpbvpL/TVbSo6tavN8fy5s9ji+Xb+k59Of629LdtkF/6HKnwNbWHlZjo+Hx4+zbd8gLabwnKio86DKdqrvmO/0UxoLs0e1aj6fLn7rcSb5avrXn0B/rb0t33QY9ZbGEtPri2i8lYbPZVF1dLafTKavVKqfTqZqaGtlstmbjKisrNXjwYEnN9yy8rTOco4zg5+n2UpA9Omj/gAB+OSYRGRmpuLg42e12SZLdbldcXJzLVJMkJSQkqKioSI2Njaqrq9OGDRsUHx/v02wcUwAAc36bbpozZ46ys7O1aNEi9enTR/Pnz5ckZWZm6uGHH9bll1+u5ORkffHFF7r55pslSQ899JDOO+88n2fjlRwAtCzEMAzPJ/CDSEePSQR7SZDPM+TzXLBnJF/HtHVMgndcAwBMURIAAFOUBADAVJd7n4TFEhKQZf2BfJ4hn+eCPSP52q+tTF3uwDUAwHuYbgIAmKIkAACmKAkAgClKAgBgipIAAJiiJAAApigJAIApSgIAYIqSAACY6nIfy9Ga3Nxcbd68WaGhoTrzzDM1a9YsXX755ZKk/fv3a8aMGdq7d6/OOOMMzZ07V0OGDGnxcRYuXKiVK1dKklJTU/XQQw95Jd+qVau0ePFi7dq1S48//rjuvPPOpu9NmjRJBw4ckCQ5nU7t3LlTq1at0iWXXOLyGFu2bNF9992n/v37S5JCQ0NVVFTk83zZ2dn6+OOPddZZZ0k6cQGpBx54oMXHCcTz19rP/lQrVqxQXl6ezjnnHEnSueeeq4ULF/o8X319vR577DF9+eWXslqtmjlzpm688cYWH2f58uV68cUXZRiGbrjhBj3xxBOyWLz7ei8YtrfWBMP21pZg2Oa8wuhGNm7caDQ0NDTdvummm5q+l52dbSxcuNAwDMP49NNPjbFjxxqNjY3NHmPr1q1GYmKiUV9fb9TX1xuJiYnG1q1bvZJvx44dxs6dO42srCzjlVdeMR337rvvGrfeemuL3/vkk0+M1NRUr+RpT76ZM2e2mvmkQD1/rf3sT/X2228b06dP90qe9uTLz883Zs2aZRiGYVRUVBjXXXedcfjw4WaPsWfPHmPkyJFGbW2t4XQ6jcmTJxsrV670Sd6TArW9tSYYtre2BMM25w3darrpxhtvVM+ePSVJQ4cOVVVVlRobGyVJa9eu1cSJEyVJV155pUJDQ1VWVtbsMdasWaOUlBSFhYUpLCxMKSkpWrNmjVfyXXzxxRo4cGCbrwrfeust/frXv/bKOtvD3XytCdTz19rP3l9ay/fOO+8oPT1dktS/f39ddtll+uCDD5qNW7duncaMGaOIiAhZLBalpaV57fkzE6jtzRt8ub21JRi2OW/oViVxqldffVWjRo2SxWLRgQMHZBiGyzW3bTabqqqqmi3ncDgUGxvrMs7hcPglsyTt27dPmzdvVnJysumY3bt3KzU1VWlpaU272f6wZMkSJSUl6cEHH9SuXbtaHBPo509y/dm3ZOvWrUpOTtYdd9yhf/7zn37JVFlZ2TTdILm//cXGxvr0+WN7845g3Obc1aWOSaSmpqqysrLF73388ceyWq2SpNWrV6ukpESvvvqqP+O5na81xcXFGjlypEuhnerSSy/V+++/r/DwcH3//fe69957FRMTo+uuu86n+R555BFFRUXJYrGouLhYU6ZM0YYNG9z6P7nLG89fWz/7UaNG6ZZbblFYWJjKy8uVmZmpZcuWacCAAX7J5y/uZvXl9uZJPn9sb55mdPfvjSfbnD90qZJw51XMu+++q+eee05Lly7V2WefLUlNB7/q6uqafhkcDof69evXbHmbzeayYTgcDtlsNq/la8uKFSs0Y8YM0+/37v3fa9Wed955GjNmjD777DO3fmk9yRcTE9N0OyUlRfPmzVNVVZXLq2MpsM9fSz/70536x/AXv/iFhg0bptLSUrd+YT3JFxsbq71797psf8OHD2827vTnr7Ky0u3nryNZfbm9eZLPH9ubpxkl329z/tCtppv+8Y9/aN68eXrppZd07rnnunwvISFBb7zxhiRp27ZtOnbsmC677LJmj5GQkKDi4mIdO3ZMx44dU3FxscaNG+eX/J999pkOHTqkG264wXRMTU2NjP+/RMjBgwf10UcfNTsjxReqq6ubbm/atEkWi8XlF/mkQD1/rf3sT3Xq/2Pv3r3617/+pUGDBvk8X0JCgt58801JJ6ZvysrKNHLkyGbj4uPjtWHDBtXV1amxsVFFRUU+e/7Y3jwT7Nucu7rVRYeuueYa9ezZ06W5ly5dqrPOOkv79u1TVlaWKisrdcYZZyg3N1fDhg2TJM2aNUujR4/WTTfdJEnKz89XcXGxpBOvYqZPn+6VfHa7XX/84x/1448/qmfPnurVq5cKCgo0cOBASdITTzyhvn376ne/+53Lcs8//7yio6N1++23q7CwUK+//rp69Oghp9OplJQUTZkyxef5Jk2apNraWoWEhKh3796aMWOGhg4dKik4nr/Wfvan5nv22Wf13nvvNU0V3HvvvUpNTfV5vqNHjyo7O1tfffWVLBaLsrKyNGbMGEmuP19JeuONN7R48WJJ0vXXX6+cnByfTLMEentrTTBsb20Jhm3OG7pVSQAA2qdbTTcBANqHkgAAmKIkAACmKAkAgClKAgBgipIAAJiiJIAA27JlS6tvWGuPQYMG6bvvvvPKYwESJQFo27Ztmjhxoq644gpdffXVmjhxokpLSwMdCwgKXeqzm4D2Onz4sKZOnao5c+Zo3LhxOn78uLZt26bQ0FC3H+Onn35Sjx78KqFrYk8C3VpFRYUkKTExUVarVWFhYRoxYkSrnz+0YsUKTZw4UXl5eRo+fLjy8/O1Z88e3X333Ro+fLiGDx+uRx99VD/++GPTMqNHj9ZLL72kpKQkXXHFFfrtb3+r//znPy0+/rJly3TLLbeoqqpKDQ0Nmj9/vkaNGqXrrrtOOTk5OnbsWNPYxYsXa8SIERoxYoTeeustLz0rwH9REujWLrzwwqbLhb7//vv697//7dZypaWlOu+88/TRRx/pgQcekGEYuv/++7Vp0ya98847qqqqUn5+vssy77zzjhYvXqz33ntPO3bs0IoVK5o97oIFC7Ry5UoVFhaqX79+euaZZ1RRUaHi4mKtX79eNTU1TZe2/OCDD1RQUKCCggKtX79emzdv9vwJAU5DSaBb6927t1577TWFhIToySef1LXXXqupU6dq//79rS4XHR2tu+66Sz169FBYWJguuOACXX/99QoNDVVERITuvfdeffrppy7L3HXXXYqJiVHfvn1144036quvvmr6nmEYmjdvnj766CMtW7ZMERERMgxDy5cv1+OPP66+ffuqd+/euv/++7V69WpJJ0pn/Pjxuvjii3XmmWdq2rRp3n+C0O0xkYpub8CAAXr66aclSbt27VJWVpby8vL07LPPmi5z+rVG9u/frz/84Q/atm2bjhw5IsMw1KdPH5cxUVFRTbd79eqlmpqapq8PHTqk5cuX67nnnlN4eLikE9c3qa+v1/jx45vGGYbRdAnMmpoal4+zP/1aCoA3sCcBnGLAgAEaP368du7c2eq4kJAQl6+fffZZhYSEqKSkRJ999pn+9Kc/qT0fsNynTx+98MILeuyxx7R9+3ZJJy6GFRYWptWrV2vbtm3atm2btm/frs8//1zSib2ZUy/FaXaVNMATlAS6tV27dqmgoKDpetIOh0N2u11Dhgxp1+McOXJEZ555psLDw1VdXd10vYf2GD58uJ555hlNnz5dpaWlslgsSktLU15enmprayWduEDNpk2bJJ24oM7KlSv1zTffqL6+XgsWLGj3OoG2UBLo1nr37q0vvvhCaWlpGjp0qCZMmKCLL75Y2dnZ7XqcadOmqby8XFdeeaXuu+8+3XzzzR3Kc/311ysvL09Tp07Vl19+qaysLF1wwQWaMGGChg0bpkmTJjWdkfWrX/1K99xzj+655x6NHTtW11xzTYfWCbSGiw4BAEyxJwEAMMXZTUALcnJyVFJS0uz+pKQk/f73vw9AIiAwmG4CAJhiugkAYIqSAACYoiQAAKYoCQCAKUoCAGDq/wDoDk0Uz/GBNgAAAABJRU5ErkJggg==\n", "text/plain": [ "<Figure size 432x288 with 1 Axes>" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df_hr.plot.scatter(x='S_ranked', y='attrition')" ] }, { "cell_type": "code", "execution_count": null, "id": "0f9b5e52-7e39-424d-b4f9-e07e98992658", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>Newborn</th>\n", " <th>mean</th>\n", " <th>sum</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>0</td>\n", " <td>0.186700</td>\n", " <td>1895</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>1</td>\n", " <td>0.056757</td>\n", " <td>105</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " Newborn mean sum\n", "0 0 0.186700 1895\n", "1 1 0.056757 105" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_hr.groupby(['Newborn'])['left'].agg(['mean', 'sum']).reset_index()" ] }, { "cell_type": "code", "execution_count": null, "id": "3034bf7e-97a4-4159-8ded-8a8214ab8e86", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "*c* argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with *x* & *y*. Please use the *color* keyword-argument or provide a 2D array with a single row if you intend to specify the same RGB or RGBA value for all points.\n" ] }, { "data": { "text/plain": [ "<AxesSubplot:xlabel='New Projects', ylabel='Attrition'>" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYkAAAEQCAYAAABFtIg2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8/fFQqAAAACXBIWXMAAAsTAAALEwEAmpwYAAAnIElEQVR4nO3de1xUdf4/8NfMGWa4KZfhNqBCWulo3gHzluaNLkOYVrRktXlp3fypW19LH1YgutkXa8ut1doema3hutWWmkRS6bpFa6aloaFlBoI4Ag5euMrMmfP7w3W+IgwNMHOGmXk9H48eD5j5nHPeH8J5cT7nnM9HIUmSBCIiojYo3V0AERF1XwwJIiKyiyFBRER2MSSIiMguhgQREdnFkCAiIrtkC4mSkhKkp6cjJSUF6enpKC0tbbNdfn4+UlNTYTAYkJqairNnz8pVIhERXUMh13MSDz30EGbOnIm0tDRs374dH3zwATZt2tSizeHDh7F06VL87W9/Q2RkJGpra6FWq6HRaOQokYiIriHLmYTJZEJxcTEMBgMAwGAwoLi4GDU1NS3avf3225g9ezYiIyMBAD169GBAEBG5kSwhYTQaER0dDUEQAACCICAqKgpGo7FFuxMnTqC8vBwPPPAA7r77bqxfvx58IJyIyH1U7i7gaqIo4scff8TGjRvR3NyMuXPnIjY2FtOnT3d3aUREPkmWkNDpdKisrIQoihAEAaIooqqqCjqdrkW72NhY3HbbbVCr1VCr1Zg8eTKKioo6FBLnztXDau342YdWGwyTqa7D23ky9tk3sM++obN9VioVCAsLsvu+LCGh1Wqh1+uRl5eHtLQ05OXlQa/XIzw8vEU7g8GAf//730hLS4PFYsHXX3+NlJSUDh3LapU6FRJXtvU17LNvYJ99gyv6LNstsCtWrEBubi5SUlKQm5uL7OxsAMC8efNw+PBhAMCdd94JrVaLO+64A9OnT8f111+Pe+65R64SiYjoGrLdAisXk6muQ2kqSRLMFivCwoNw4XwDVILvPF8YGdkD1dW17i5DVuyzb2CfHadUKqDVBtt9v1tduJaDVZLwQ0kNDh0/i+OnzuNMTQOsVgkKhQJWq4SgAD/0iQ7GgPgw3DwwGhEhAe4umYjIbXwmJJrNInZ9ewoF+8txySziUrPYssF/T6jqGs0oLj2Hn8rPY0dhKa7vFYK7xiagf58wN1RNRORePhESP5+6gNe2H0F9oxnNFqtD21hECYCEoyfP4UTFBYzsH4lZ0/ojQOMTPzIiIgBeHhKSJGHblyXY+U0ZzA6GQ1uaLVYcOFaFw7/UYMn9w9AnuocTqyQi6r689iqtJEnY/NlPKNjftYC4wixKqGs04383f4cS40UnVEhE1P15bUjk/acUhYeNaDZ3PSCu1tQs4oUtB1F9vtGp+yUi6o68MiTKKmuRt/ek0wPiimaziNe3H4HVu+4eJiJqxetCQrRasX7bEacMMdljlYCKs/XY/d0plx2DiKg78LqQOHT8LC7UNbv8OM1mKz789y8wW8Rfb0xE5KG8LiT2HKzAJbM8H9ySBOw/ViXLsYiI3MHrQqKm7pJsx7pkFvHJvjLZjkdEJDevCwm5Z340mhpkO3MhIpKb14WEKMobEmqVEuVVvjVvPRH5Dq8LCbmJVgnllb412yQR+Q6GRBdZRCvqmyzuLoOIyCUYEl0kSeBDdUTktRgSXSQICmj8BHeXQUTkEgyJLlKrlIiNsL+IOBGRJ/O6kFAq5D2e2WJFfAynDici7+R1IeGnknfoJ1CjQs9AtazHJCKSi9eFhCDjqYSfoMCtI3rJdjwiIrl5XUiMHRIDP0GeoJCgwMRhsbIci4jIHbwuJEYP0kGpdH23VIISSfoohARrXH4sIiJ38bqQCA7ww4MpN0Lj59qu+asFPDDlRpceg4jI3bwuJABg9KAYXB8XApWLhp3UKiXmpQ5EoL/KJfsnIuouvDIkFAoFHrt7MKLCAp0eFGqVEjMm9MXgvlqn7peIqDvyypAAgACNCstnjURsRBDUThp6UquUuPfWfpiW1Mcp+yMi6u5kGy8pKSnBsmXLcP78eYSGhiInJwcJCQkt2rz66qv4+9//jqioKADAiBEjkJWV1eljBvqr8MxDidj2ZQk+O1De6XWv1SolAv1VmJ92E27sHdrpeoiIPI1sIZGVlYWMjAykpaVh+/btyMzMxKZNm1q1mz59OpYuXeq046oEJe6Z2A9JA6KwqeAYTlXXQxStcGRtIo2fAEmSMGFYHGZM6Ms5mojI58gSEiaTCcXFxdi4cSMAwGAwYNWqVaipqUF4eLgcJSA+pgeefTgJp8/W47MD5fj+57OobTBD7aeEJF2+jgFJglUCRKsVOm0Qbh0eh9GDYqBRMxyIyDfJEhJGoxHR0dEQhMsftoIgICoqCkajsVVIfPzxxygsLERkZCQWLlyI4cOHO7WW2IggPHzbAABA4yULyiprUdtgRlCwBk0NzYgOD0RMeCCUck8CRUTUDXWrezjvv/9+zJ8/H35+fvjqq6/w2GOPIT8/H2FhYQ7vQ6sN7tAx+/RyfN/eKDLS9yYnZJ99A/vsHLKEhE6nQ2VlJURRhCAIEEURVVVV0Ol0LdpFRkbavh47dix0Oh2OHz+O5ORkh49lMtXB6sgFh2tERvZAdbVvLUPKPvsG9tk3dLbPSqWi3T+uZbkFVqvVQq/XIy8vDwCQl5cHvV7faqipsrLS9vXRo0dRUVGB6667To4SiYioDbINN61YsQLLli3D+vXr0bNnT+Tk5AAA5s2bh0WLFmHw4MF46aWX8MMPP0CpVMLPzw9r1qxpcXZBRETyUkiSdy3QzOEmx7HPvoF99g0ePdxERESeiSFBRER2MSSIiMiubvWcBBFRZ5QYL+Kz/eU4WVmLxksWqP1UCNAIuHlgNMYN0SHI38/dJXoshgQReSSrJGHvkTPI33sSptommC1W/N9tOM0AAOPZenz4xS8YeWMk7hyTgLiIILfV66kYEkTkccwWEeu2HsGPZedwyWx/dufm/878/M3RSnz3UzV+lzYIw2/gbfUdwWsSRORRLKIVf3r3EI6ebD8grmaVLgfGX7f/gO9+qnZxhd6FIUFEHuWdgh9Raqzt1PowzRYr3vjoB5RV+tYzFF3BkCAij3Gu9hK+/qHSNozUGWaLFdu+LHFiVd6NIUFEHuNfB0+hq1NESACOlNTgQn2zM0ryegwJIvIIFtGK3d9WwCJ2/iziCoUC+PehCidU5f0YEkTkEUqMF2F10lRzZosV/zlyxin78nYMCSLyCLUNZjhzvciGJosT9+a9GBJE5BHMFmuXr0dczRnDVr6AIUFEHiHQXwWFE08lNH6C83bmxRgSROQRosMCYBGddy4Row102r68GUOCiDxCVFggekXaXxynI/z9BKQk93HKvrwdQ4KIPMYdN/eBv7rrw0QqlRJD+mqdUJH3Y0gQkccYdkMEBGXXLkz4qZSYmtgLyi7ux1cwJIjIYwhKJR6bfhP8VJ376FIJCujCA3HbKA41OYohQUQeRZ8QjkfuGNDhoFAJSoT39MeS3wyHn4p3NjmK60kQkce5eWAMegaqsW7rYUgS0NQs2m2rVCigEhToFxeC/zdjMAI0/NjrCP60iMgjDUwIx9qF43DgWDXyvz6J6guNUCoUEK0SlEoFFABEq4SkAVGYltQbfaJ7uLtkj8SQICKP5acSMPqmGIy+KQblVXU4fbYeDU1mhIUGQrKI6N8nDIH+/JjrCv70iMgr9I4KRu+oy89RREb2QHU1FxZyBl64JiIiuxgSRERkF0OCiIjski0kSkpKkJ6ejpSUFKSnp6O0tNRu219++QVDhw5FTk6OXOUREVEbZAuJrKwsZGRkoKCgABkZGcjMzGyznSiKyMrKwpQpU+QqjYiI7JAlJEwmE4qLi2EwGAAABoMBxcXFqKmpadX2jTfewMSJE5GQkCBHaURE1A5ZboE1Go2Ijo6GIFx+FF4QBERFRcFoNCI8PNzW7tixYygsLMSmTZuwfv36Th1Lq+38VMKRkb73sA377BvYZ9/gij53m+ckzGYznn32WTz//PO2MOkMk6kOVmvHFybxxfuq2WffwD77hs72WalUtPvHtSwhodPpUFlZCVEUIQgCRFFEVVUVdDqdrU11dTXKysrw6KOPAgAuXrwISZJQV1eHVatWyVEmERFdQ5aQ0Gq10Ov1yMvLQ1paGvLy8qDX61sMNcXGxmLfvn2271999VU0NDRg6dKlcpRIRERtkO3uphUrViA3NxcpKSnIzc1FdnY2AGDevHk4fPiwXGUQEVEHKCRJct7K4t0Ar0k4jn32Deyzb3DVNQk+cU1ERHYxJIiIyC6GBBER2cWQICIiuxgSRERkF0OCiIjsYkgQEZFdDAkiIrKLIUFERHY5NHdTeXk51q5di6NHj6KhoaHFe3v27HFFXURE1A04FBJLlixB7969sXTpUgQEBLi6JiIi6iYcConjx49jy5YtUCo5OkVE5Esc+tRPSkpCcXGxq2shIqJuxqEzibi4OMydOxdTp05FREREi/cWL17sksKIiMj9HAqJxsZG3HrrrbBYLDhz5oyrayIiom7CoZB4/vnnXV0HERF1Qw4vX1paWoq8vDxUVVUhKioKBoMBCQkJLiyNiIjczaEL17t378aMGTNQUlKCkJAQlJSUYObMmdi1a5er6yMiIjdy6Ezi5Zdfxvr163HzzTfbXtu3bx9WrVqFyZMnu6w4IiJyL4fOJM6cOYPExMQWr40cOZIXsYmIvJxDITFgwAC89dZbLV7buHEj9Hq9S4oiIqLuwaHhphUrVuD3v/89Nm3aBJ1OB6PRiICAALz++uuuro+IiNzIoZDo168f8vPzcejQIdvdTUOHDoWfn5+r6yMiIjdy+BZYlUrV6roEERF5N7shcfvtt+OTTz4BAEyYMAEKhaLNdpwqnIjIe9kNiVWrVtm+fuGFF2QphoiIuhe7IXH10JLJZMLtt9/eqs3OnTtdUxUREXULDt0C+/TTT7f5emZmpsMHKikpQXp6OlJSUpCeno7S0tJWbT744AOkpqYiLS0Nqamp2LRpk8P7JyIi52v3wnV5eTkAQJIk29dXv6dWqx0+UFZWFjIyMpCWlobt27cjMzOzVQikpKRgxowZUCgUqKurQ2pqKpKTkzFgwACHj0NERM7TbkhMnToVCoUCkiRh6tSpLd6LiIjAwoULHTqIyWRCcXExNm7cCAAwGAxYtWoVampqEB4ebmsXHBxs+7qpqQlms9nuBXMiInK9dkPi2LFjAIBZs2YhNze30wcxGo2Ijo6GIAgAAEEQEBUVBaPR2CIkAGDXrl146aWXUFZWhv/5n/9B//79O31cIiLqGoeek+hKQHTU5MmTMXnyZJw+fRoLFizALbfcgr59+zq8vVYb/OuN7IiM7NHpbT0V++wb2Gff4Io+2w2JOXPmYMOGDQCAjIwMu8M+mzdv/tWD6HQ6VFZWQhRFCIIAURRRVVUFnU5nd5vY2FgMHjwYe/bs6VBImEx1sFolh9tfERnZA9XVtR3ezpOxz76BffYNne2zUqlo949ruyExffp029f33ntvhw98Na1WC71ej7y8PKSlpSEvLw96vb7VUNOJEyfQr18/AEBNTQ327duHadOmdenYRETUeXZDIjU1FQAgiiLKysrw+9//vkN3M11rxYoVWLZsGdavX4+ePXsiJycHADBv3jwsWrQIgwcPxrvvvouvvvoKKpUKkiRh1qxZGDduXKePSUREXaOQJOlXx2ZGjRqFvXv3Qql06LEKt+Jwk+PYZ9/APvsGVw03OfSpP336dGzZsqXDByciIs/m0N1NRUVFyM3NxYYNGxATE9PiIrYjF66JiMgzORQS9913H+677z5X10JERN2MQyHRt29fDB06tNXrRUVFTi+IiIi6D4euSTzyyCNtvj537lynFkNERN1Lu2cSVqsVkiS1+O+KsrIy2zQbRETkndoNiYEDB9ouUg8cOLDFe0qlEvPnz3ddZURE5HbthsSuXbsgSRIefPDBFvM3KRQKhIeHw9/f3+UFEhGR+7QbEnFxcQAuzwJ75eurbdy40e71CiIi8nwOXbhet25dm6+/9tprTi2GiIi6l3bPJPbu3Qvg8vxNX3/9dYsL1+Xl5QgKCnJtdURE5FbthsSVta2bm5uxfPly2+sKhQKRkZF49tlnXVsdERG5VbshsXv3bgDAU089hTVr1theP3bsmG2d6kmTJrm2QiIichuHnrhes2YNampqsGPHDmzbtg3Hjh1DYmKi7UyDiIi8U7shYTabsXv3bmzduhWFhYXo06cP7rzzTlRUVGDt2rXQarVy1UlERG7QbkiMHTsWCoUCM2bMwMKFCzFo0CAA4LThREQ+ot1bYPv374/a2lp8//33OHz4MC5cuCBXXURE1A20GxLvvPMOPvvsM4wdOxZvvfUWxo4di/nz56OhoQEWi0WuGomIyE1+9WG6uLg4LFiwAJ9++inefvttREZGQqlU4q677mpxxxMREXkfh+5uuiIxMRGJiYl45pln8Nlnn2Hbtm0uKouIiLqDDoXEFRqNBgaDAQaDwdn1EBFRN+LQ3E1EROSbGBJERGQXQ4KIiOxiSBARkV0MCSIisoshQUREdnXqFtjOKCkpwbJly3D+/HmEhoYiJycHCQkJLdqsW7cO+fn5UCqV8PPzw+OPP47x48fLVSIREV1DtpDIyspCRkYG0tLSbGtRbNq0qUWbIUOGYPbs2QgICMCxY8cwa9YsFBYWwt/fX64yiYjoKrIMN5lMJhQXF9sevjMYDCguLkZNTU2LduPHj0dAQACAy5MLSpKE8+fPy1EiERG1QZaQMBqNiI6OhiAIAABBEBAVFQWj0Wh3m23btqFPnz6IiYmRo0QiImqDbMNNHfHNN9/gz3/+M956660Ob6vVBnf6uJGRPTq9radin30D++wbXNFnWUJCp9OhsrISoihCEASIooiqqirodLpWbQ8ePIgnn3wS69evR9++fTt8LJOpDlar1OHtIiN7oLq6tsPbeTL22Tewz76hs31WKhXt/nEty3CTVquFXq9HXl4eACAvLw96vR7h4eEt2hUVFeHxxx/HK6+8YlsFj4iI3Ee25yRWrFiB3NxcpKSkIDc3F9nZ2QCAefPm4fDhwwCA7OxsNDU1ITMzE2lpaUhLS8OPP/4oV4lERHQNhSRJHR+b6cY43OQ49tk3sM++waOHm4iIyDN1y7ubiKjrLplFNDRZoNL4wWyxwk/Fvwmp4xgSRF7kUrOIr4vP4JN9ZTh7vhEq4XIwmEUrrtP1xO2j4jHsBi0EJQODHMOQIPICVquEf/77BHZ/ewoKhQKXzCIAoNlitbX55fRFbPi4GIJSgZkT+mLi8F7uKpc8CEOCyMOZLSJe+WcRjldcaBEKbWlqvhwe/9j1M06fbcBvptwAhUIhR5nkoXjOSeTBrJKE9VuP4KdTF9Bsbj8grtZsseKLotPYXljiwurIGzAkiDzYl9+fxtGyczD/yhlEW5rNVuzcV4YTFRdcUBl5C4YEkYeSJAkf7z3ZoTOIa5ktl4OCyB6GBJGH+rniAmobzF3ahwTg+xNncbGh2TlFkddhSBB5qM8OnELzf+9i6gqFQoH/HLY/bT/5NoYEkYeqqK6DM+bUMVusKK+qc8KeyBsxJIg81JXbWZ2hrrFrw1bkvRgSRB7KmdNsBKj5yBS1jSFB5KHCemicsh+lAtCG+jtlX+R9GBJEHurW4XHwVwtd3o8gKDHmptarRBIBDAkijzXixkg4Y0aN2IggxEUEdX1H5JUYEkQeSiUocevwOKiEzieFxk+J20f1cWJV5G0YEkQe7I6b4xESpIGyEznhJyiQoOuJkf0jnV8YeQ2GBJEHC/T3w9IHhiM4wA9CB5LCT6VEdHggFt8zhGtLULv420Hk4SJCApA9Oxkx2kBo/Nq/kK1UXA6IgfFheOahRPjz1lf6FfwNIfICIcEarJydjJ/Kz2PnvjL8UFoDlaCEJElQKhSQAIhWCaMGRmNaUm/0irS/8D3R1RgSRF5CoVCgf58w9O8Thgt1l1BirEV9kxmhIQEQzSJu6BWCAA3/yVPH8DeGyAuFBGsw7IbLD9tFRvZAdXWtmysiT8VrEkREZBdDgoiI7GJIEBGRXQwJIiKyS7aQKCkpQXp6OlJSUpCeno7S0tJWbQoLCzFjxgzcdNNNyMnJkas0IiKyQ7aQyMrKQkZGBgoKCpCRkYHMzMxWbXr37o3nnnsOc+bMkassIiJqhywhYTKZUFxcDIPBAAAwGAwoLi5GTU1Ni3bx8fHQ6/VQqXhnLhFRdyBLSBiNRkRHR0MQLk8ZIAgCoqKiYDRy8XUiou7M6/5k12o7P91AZGQPJ1biGdhn38A++wZX9FmWkNDpdKisrIQoihAEAaIooqqqCjqd81fDMpnqYLVKHd7OF59KZZ99A/vsGzrbZ6VS0e4f17IMN2m1Wuj1euTl5QEA8vLyoNfrER4eLsfhiYiok2S7u2nFihXIzc1FSkoKcnNzkZ2dDQCYN28eDh8+DAA4cOAAbrnlFmzcuBH/+Mc/cMstt+DLL7+Uq0QiIrqGQpKkjo/NdGMcbnKcL/X5YkMzLtY3IzQ0EOIlM0KCNe4uSTa+9P/5CvbZcb823OR1F66JrhCtVnz/swn5X59EWWUtVIISCoUCZosInTYId9wcj5H9I6ESOPEAkT0MCfJK1ecbsWbLQdQ1mHHJLAIALKJoe7+8qg5v7zyGLbuO48n7hyGOi/AQtYl/QpHXOXu+ESvf3o+ai022gGjLpWYRF+ub8dw736Kiuk7GCok8B0OCvIpVkvDCPw6i4ZIFjl5ta2oW8cKWgzBbrK4tjsgDMSTIqxz5pQYX680OB8QVl8xWHPixyjVFEXkwhgR5lU/2nWx3iMmeS2YR+XtPuqAiIs/GkCCv0XjJgp9PXej09lXnG2G60OTEiog8H0OCvEZdoxkqQdHp7VVKBS42NDuxIiLPx5AguqLz+ULktRgS5DV6BPrBInZ+AgGLKCEkSO3Eiog8H0OCvIa/WoUbeod0evvosACE9/R3YkVEno8hQV7ljlHx0PgJHd7OXy3gjpvjXVARkWdjSJBXGXhdOEKC1VB28PqC2k/AyP5RrimKyIMxJMirKBUKPHn/cAT4qxwKCgUun0U89Zvh8FPxnwPRtfivgryONsQfWb9NgjbEH/7tDD35qwX0DFbj2YcTERsRJGOFRJ6Ds8CSV4oICcDzj47G4V9M+GRfGU5UXIBKpYTyv1OF944Kxu2j4jHshghOFU7UDoYEeS2lUoGh10dg6PURqG8y42J9M8LCgmBuakaPQN7qSuQIhgT5hCB/PwT5+/nkimVEXcHzbCIisoshQUREdjEkiIjILoYEERHZxZAgIiK7eHeTjzp7oRENFgkapQRByb8ViKhtDAkfc/Z8I9ZtPYLTpnoISgUEpQIP3zYAiQM4bxERtcaQ8CFWq4T//ft3OFd7CZIEmP/7+pt5xYgI9UdCTE+31kdE3Q/HGXzIkZIaNDRZIF2zLo9ZtGLnvjL3FEVE3RpDwofUXGyCaG29cpskAVXnGt1QERF1d7KFRElJCdLT05GSkoL09HSUlpa2aiOKIrKzszFlyhRMnToV77//vlzl+YT4mB5QtDF9tqBU4PpenV/RjYi8l2whkZWVhYyMDBQUFCAjIwOZmZmt2uzYsQNlZWX49NNP8e677+LVV1/FqVOn5CrR6yXE9MB1MT3hJ7RMCrVKiZSkPm6qioi6M1kuXJtMJhQXF2Pjxo0AAIPBgFWrVqGmpgbh4eG2dvn5+bj33nuhVCoRHh6OKVOmYOfOnZg7d67Dx1J2dEkyJ23rGRRY8pvhKPimDPuPVsFstaJfbE+kjr0OkaEB7i5ONt7//7k19tk3dKbPv7aNLCFhNBoRHR0NQbi8AIwgCIiKioLRaGwREkajEbGxsbbvdTodzpw506FjhYV1fvEYrTa409t6kodTb8LDqe6uwn185f/z1dhn3+CKPvPCNRER2SVLSOh0OlRWVkIURQCXL1BXVVVBp9O1anf69Gnb90ajETExMXKUSEREbZAlJLRaLfR6PfLy8gAAeXl50Ov1LYaaAOC2227D+++/D6vVipqaGnz++edISUmRo0QiImqDQpKufbTKNU6cOIFly5bh4sWL6NmzJ3JyctC3b1/MmzcPixYtwuDBgyGKIlauXImvvvoKADBv3jykp6fLUR4REbVBtpAgIiLPwwvXRERkF0OCiIjsYkgQEZFdDAkiIrLL59eTOHfuHJ566imUlZVBrVYjPj4eK1eubHV7rrd57LHHcOrUKSiVSgQGBuLZZ5+FXq93d1ku95e//AWvvvoqduzYgRtvvNHd5bjUpEmToFarodFoAABLlizB+PHj3VyVa126dAmrV6/G3r17odFoMGzYMKxatcrdZbnMqVOnsGDBAtv3tbW1qKurwzfffOO0Y/h8SCgUCsydOxejRo0CAOTk5ODFF1/E6tWr3VyZa+Xk5KBHjx4AgM8//xzLly/H1q1b3VyVa/3www84dOgQ4uLi3F2KbF555RWvD8OrvfDCC9BoNCgoKIBCocDZs2fdXZJL9erVC9u3b7d9/9xzz9keWnYWnx9uCg0NtQUEAAwbNqzFU9/e6kpAAEBdXR0Ubc0h7kWam5uxcuVKrFixwt2lkIvU19dj27ZtWLx4se33OSIiws1Vyae5uRk7duzAzJkznbpfnz+TuJrVasWWLVswadIkd5cii6effhpfffUVJEnCm2++6e5yXOrPf/4z7rrrLvTq1cvdpchqyZIlkCQJI0eOxBNPPIGePb13idry8nKEhobiL3/5C/bt24egoCAsXrwYiYmJ7i5NFrt370Z0dDQGDRrk1P36/JnE1VatWoXAwEDMmjXL3aXI4rnnnsOePXvw+OOPY82aNe4ux2UOHjyII0eOICMjw92lyGrz5s346KOP8MEHH0CSJKxcudLdJbmUKIooLy/HwIED8eGHH2LJkiVYuHAh6urq3F2aLD744AOnn0UADAmbnJwcnDx5EmvXroVS6Vs/lunTp2Pfvn04d+6cu0txif379+PEiROYPHkyJk2ahDNnzmDOnDkoLCx0d2kudWUCTbVajYyMDHz33Xdursi1dDodVCoVDAYDAGDo0KEICwtDSUmJmytzvcrKSuzfvx+pqc5fA8C3Pg3teOmll3DkyBGsW7cOarXa3eW4XH19PYxGo+373bt3IyQkBKGhoe4ryoUeffRRFBYWYvfu3di9ezdiYmKwYcMGjBs3zt2luUxDQwNqa2sBAJIkIT8/3+vvXgsPD8eoUaNsc7+VlJTAZDIhPj7ezZW53tatWzFhwgSEhYU5fd8+f03i+PHj+Otf/4qEhATcf//9AC7fMbBu3To3V+Y6jY2NWLx4MRobG6FUKhESEoLXX3/d6y9e+xKTyYSFCxdCFEVYrVb069cPWVlZ7i7L5bKzs7F8+XLk5ORApVJhzZo1Xn0d5oqtW7fi6aefdsm+OcEfERHZxeEmIiKyiyFBRER2MSSIiMguhgQREdnFkCAiIrsYEkTd0PDhw1FeXu7uMogYEuR9Jk2ahNGjR6OhocH22vvvv48HH3zQ5ccdMmQIhg8fjjFjxmDZsmWor6/v1L4OHjyI3r17d6meBx98EO+//36X9kHEkCCvZLVasWnTJtmP+/rrr+PgwYPYunUrjhw5gtdee61VG4vFIntdRJ3FkCCvNGfOHLz11lu4ePFim++fOHECjzzyCJKTk5GSkoL8/HwAl2cSTUxMhNVqBQA888wzGD16tG27J598Em+//favHj86Ohrjx4/H8ePHAQD9+/fH5s2bMW3aNEybNg0A8N5772Hq1KlITk7G/PnzUVlZadu+f//+OHnyJIDLU0Dn5ORg4sSJGDNmDDIzM9HU1GRr+/nnnyMtLQ0jRozAlClT8MUXX+Dll1/GgQMHsHLlSgwfPhwrV66EJElYvXo1Ro8ejREjRiA1NRU//fRTB36q5IsYEuSVbrrpJiQnJ2PDhg2t3mtoaMDs2bNhMBjwn//8By+//DKys7Px888/o3fv3ggODkZxcTGAy5MDBgYG4sSJE7bvk5OTf/X4RqMRX3zxRYv5kj7//HO89957yM/Px969e/GnP/0Ja9euRWFhIeLi4vDEE0+0ua8XX3wRJSUl2LZtGz799FNUVVXZpo0pKirC0qVL8dRTT+HAgQPYvHkz4uLi8PjjjyMxMRGZmZk4ePAgMjMzUVhYiAMHDqCgoADffvst1q5d67XzdZHzMCTIay1atAi5ubmoqalp8fqePXsQFxeHmTNnQqVSYeDAgUhJScHOnTsBAElJSdi/fz+qq6sBACkpKfjmm29QXl6Ouro6DBgwwO4xFyxYgMTERGRkZCApKQnz58+3vffoo48iNDQU/v7+tsVhBg0aBLVajSeeeAKHDh3CqVOnWuxPkiS89957WL58OUJDQxEcHIzf/e53+PjjjwEA//znPzFz5kyMHTsWSqUS0dHR6NevX5u1qVQq1NfX45dffoEkSejXrx+ioqI6/oMln+LzE/yR97rxxhsxceJEvPHGGy0+OCsqKlBUVNRiMRpRFHHXXXcBAJKTk7Fr1y5ER0cjKSkJo0aNwvbt26HRaJCYmNjuVPLr1q3DmDFj2nzvytTdAFBVVdVicZigoCCEhoaisrKyxcJINTU1aGxsxIwZM2yvSZJkGw4zGo2YMGGCQz+P0aNH44EHHsDKlStRUVGBadOmYenSpQgODnZoe/JNDAnyaosWLcLdd9+N2bNn217T6XRISkrCxo0b29wmKSkJa9asQUxMDJKSkjBy5EhkZWVBo9EgKSmp07VcPctuVFQUKioqbN83NDTg/PnziI6ObrFNWFgY/P398fHHH7d670pfysrKHK7hoYcewkMPPQSTyYQ//OEPePPNN/GHP/yh450hn8HhJvJq8fHxuOOOO/DOO+/YXps4cSJKS0uxbds2mM1mmM1mFBUV2a47JCQkQKPR4KOPPkJycjKCg4Oh1WpRUFDQpZC4msFgwIcffoijR4+iubkZL730EoYMGdJqeVWlUol7770Xq1evhslkAnB5gZkvv/wSAHDPPffgww8/xN69e2G1WlFZWWnrR0RERItnLYqKivD999/DbDYjICAAarXa5xbYoo7jbwh5vQULFrR4ZiI4OBgbNmxAfn4+xo8fj3HjxuHFF19Ec3OzrU1ycjJCQ0NtQ0TJycmQJMlp6wePGTMGixcvxsKFCzFu3DiUl5fj5ZdfbrPtk08+ifj4eNx3330YMWIEfvvb39pWWxsyZAief/55rF69GiNHjsSsWbNw+vRpAJfPGq4E2x//+EfU19fjmWeeQXJyMm699VaEhoZizpw5TukPeS+uJ0HUzVitVuj1evzrX/9CbGysu8shH8czCaJu5qeffoJGo0FERIS7SyFiSBB1JwUFBXj44YexZMkSn1hvnbo/DjcREZFdPJMgIiK7GBJERGQXQ4KIiOxiSBARkV0MCSIisoshQUREdv1/l+6oiqhoDz0AAAAASUVORK5CYII=\n", "text/plain": [ "<Figure size 432x288 with 1 Axes>" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df_hr.groupby(['NP'])['left'].agg(['mean', 'sum']).reset_index().plot.scatter(y='mean',x='NP', s='sum', xlabel='New Projects', ylabel='Attrition',ylim=(0,0.6) )" ] }, { "cell_type": "code", "execution_count": null, "id": "91a3053d-25a9-4c9c-881a-78fc43ac486e", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<table class=\"simpletable\">\n", "<caption>Logit Regression Results</caption>\n", "<tr>\n", " <th>Dep. Variable:</th> <td>left</td> <th> No. Observations: </th> <td> 12000</td> \n", "</tr>\n", "<tr>\n", " <th>Model:</th> <td>Logit</td> <th> Df Residuals: </th> <td> 11993</td> \n", "</tr>\n", "<tr>\n", " <th>Method:</th> <td>MLE</td> <th> Df Model: </th> <td> 6</td> \n", "</tr>\n", "<tr>\n", " <th>Date:</th> <td>Fri, 27 May 2022</td> <th> Pseudo R-squ.: </th> <td>0.2131</td> \n", "</tr>\n", "<tr>\n", " <th>Time:</th> <td>08:46:06</td> <th> Log-Likelihood: </th> <td> -4254.5</td>\n", "</tr>\n", "<tr>\n", " <th>converged:</th> <td>True</td> <th> LL-Null: </th> <td> -5406.7</td>\n", "</tr>\n", "<tr>\n", " <th>Covariance Type:</th> <td>nonrobust</td> <th> LLR p-value: </th> <td> 0.000</td> \n", "</tr>\n", "</table>\n", "<table class=\"simpletable\">\n", "<tr>\n", " <td></td> <th>coef</th> <th>std err</th> <th>z</th> <th>P>|z|</th> <th>[0.025</th> <th>0.975]</th> \n", "</tr>\n", "<tr>\n", " <th>const</th> <td> -1.2412</td> <td> 0.160</td> <td> -7.751</td> <td> 0.000</td> <td> -1.555</td> <td> -0.927</td>\n", "</tr>\n", "<tr>\n", " <th>S</th> <td> -3.8163</td> <td> 0.121</td> <td> -31.607</td> <td> 0.000</td> <td> -4.053</td> <td> -3.580</td>\n", "</tr>\n", "<tr>\n", " <th>LPE</th> <td> 0.5044</td> <td> 0.181</td> <td> 2.788</td> <td> 0.005</td> <td> 0.150</td> <td> 0.859</td>\n", "</tr>\n", "<tr>\n", " <th>NP</th> <td> -0.3592</td> <td> 0.026</td> <td> -13.569</td> <td> 0.000</td> <td> -0.411</td> <td> -0.307</td>\n", "</tr>\n", "<tr>\n", " <th>ANH</th> <td> 0.0038</td> <td> 0.001</td> <td> 6.067</td> <td> 0.000</td> <td> 0.003</td> <td> 0.005</td>\n", "</tr>\n", "<tr>\n", " <th>TIC</th> <td> 0.6188</td> <td> 0.027</td> <td> 22.820</td> <td> 0.000</td> <td> 0.566</td> <td> 0.672</td>\n", "</tr>\n", "<tr>\n", " <th>Newborn</th> <td> -1.4851</td> <td> 0.113</td> <td> -13.157</td> <td> 0.000</td> <td> -1.706</td> <td> -1.264</td>\n", "</tr>\n", "</table>" ], "text/plain": [ "<class 'statsmodels.iolib.summary.Summary'>\n", "\"\"\"\n", " Logit Regression Results \n", "==============================================================================\n", "Dep. Variable: left No. Observations: 12000\n", "Model: Logit Df Residuals: 11993\n", "Method: MLE Df Model: 6\n", "Date: Fri, 27 May 2022 Pseudo R-squ.: 0.2131\n", "Time: 08:46:06 Log-Likelihood: -4254.5\n", "converged: True LL-Null: -5406.7\n", "Covariance Type: nonrobust LLR p-value: 0.000\n", "==============================================================================\n", " coef std err z P>|z| [0.025 0.975]\n", "------------------------------------------------------------------------------\n", "const -1.2412 0.160 -7.751 0.000 -1.555 -0.927\n", "S -3.8163 0.121 -31.607 0.000 -4.053 -3.580\n", "LPE 0.5044 0.181 2.788 0.005 0.150 0.859\n", "NP -0.3592 0.026 -13.569 0.000 -0.411 -0.307\n", "ANH 0.0038 0.001 6.067 0.000 0.003 0.005\n", "TIC 0.6188 0.027 22.820 0.000 0.566 0.672\n", "Newborn -1.4851 0.113 -13.157 0.000 -1.706 -1.264\n", "==============================================================================\n", "\"\"\"" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model_hr.summary()" ] }, { "cell_type": "code", "execution_count": null, "id": "01476cae-b429-48e3-bd59-6dac6d4294a9", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" } }, "nbformat": 4, "nbformat_minor": 5 }