{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "2c90de90-1e3e-4bf1-8705-b2b85ed8a387",
   "metadata": {},
   "source": [
    "# Regression : Understanding effect and cause"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "49d86280-47eb-471e-becf-33affb9f56c5",
   "metadata": {},
   "source": [
    "## Imports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5cb2af73-246e-4a95-ad43-1816b79d3985",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import scipy as sp\n",
    "import matplotlib.pyplot as plt\n",
    "import pandas as pd\n",
    "from sklearn.linear_model import LinearRegression, LogisticRegression\n",
    "from sklearn.preprocessing import StandardScaler, LabelEncoder, OrdinalEncoder\n",
    "from sklearn.pipeline import Pipeline, make_pipeline\n",
    "import statsmodels.api as sm\n",
    "import seaborn as sns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f1e147e1-14c7-4dbf-beab-02a3ed42c760",
   "metadata": {},
   "outputs": [],
   "source": [
    "sns.set()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "17a98d1e-3959-405c-bc52-ffcc9f53fb9a",
   "metadata": {},
   "source": [
    "## Credit Score Rating Example"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3f142627-eaa6-40f9-830b-90594c450469",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Income</th>\n",
       "      <th>Rating</th>\n",
       "      <th>Cards</th>\n",
       "      <th>Age</th>\n",
       "      <th>Education</th>\n",
       "      <th>Gender</th>\n",
       "      <th>Student</th>\n",
       "      <th>Married</th>\n",
       "      <th>Ethnicity</th>\n",
       "      <th>Balance</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>14.891</td>\n",
       "      <td>283</td>\n",
       "      <td>2</td>\n",
       "      <td>34</td>\n",
       "      <td>11</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Caucasian</td>\n",
       "      <td>333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>106.025</td>\n",
       "      <td>483</td>\n",
       "      <td>3</td>\n",
       "      <td>82</td>\n",
       "      <td>15</td>\n",
       "      <td>Female</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Asian</td>\n",
       "      <td>903</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>104.593</td>\n",
       "      <td>514</td>\n",
       "      <td>4</td>\n",
       "      <td>71</td>\n",
       "      <td>11</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>No</td>\n",
       "      <td>Asian</td>\n",
       "      <td>580</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>148.924</td>\n",
       "      <td>681</td>\n",
       "      <td>3</td>\n",
       "      <td>36</td>\n",
       "      <td>11</td>\n",
       "      <td>Female</td>\n",
       "      <td>No</td>\n",
       "      <td>No</td>\n",
       "      <td>Asian</td>\n",
       "      <td>964</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>55.882</td>\n",
       "      <td>357</td>\n",
       "      <td>2</td>\n",
       "      <td>68</td>\n",
       "      <td>16</td>\n",
       "      <td>Male</td>\n",
       "      <td>No</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Caucasian</td>\n",
       "      <td>331</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Income  Rating  Cards  Age  Education  Gender Student Married  Ethnicity  \\\n",
       "0   14.891     283      2   34         11    Male      No     Yes  Caucasian   \n",
       "1  106.025     483      3   82         15  Female     Yes     Yes      Asian   \n",
       "2  104.593     514      4   71         11    Male      No      No      Asian   \n",
       "3  148.924     681      3   36         11  Female      No      No      Asian   \n",
       "4   55.882     357      2   68         16    Male      No     Yes  Caucasian   \n",
       "\n",
       "   Balance  \n",
       "0      333  \n",
       "1      903  \n",
       "2      580  \n",
       "3      964  \n",
       "4      331  "
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_credscore = pd.read_csv(\"DATA_3.01_CREDIT.csv\", dtype={'Gender':'category', \n",
    "                                                          'Student':'category',\n",
    "                                                          'Married':'category',\n",
    "                                                          'Ethnicity':'category'\n",
    "                                                         });df_credscore.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "58c58a15-5422-4cd9-9ea0-11e2eb912f98",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 300 entries, 0 to 299\n",
      "Data columns (total 10 columns):\n",
      " #   Column     Non-Null Count  Dtype   \n",
      "---  ------     --------------  -----   \n",
      " 0   Income     300 non-null    float64 \n",
      " 1   Rating     300 non-null    int64   \n",
      " 2   Cards      300 non-null    int64   \n",
      " 3   Age        300 non-null    int64   \n",
      " 4   Education  300 non-null    int64   \n",
      " 5   Gender     300 non-null    category\n",
      " 6   Student    300 non-null    category\n",
      " 7   Married    300 non-null    category\n",
      " 8   Ethnicity  300 non-null    category\n",
      " 9   Balance    300 non-null    int64   \n",
      "dtypes: category(4), float64(1), int64(5)\n",
      "memory usage: 15.6 KB\n"
     ]
    }
   ],
   "source": [
    "df_credscore.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cdb64a98-e23c-41ef-b0c0-5f672ac69e7b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Income</th>\n",
       "      <th>Rating</th>\n",
       "      <th>Cards</th>\n",
       "      <th>Age</th>\n",
       "      <th>Education</th>\n",
       "      <th>Gender</th>\n",
       "      <th>Student</th>\n",
       "      <th>Married</th>\n",
       "      <th>Ethnicity</th>\n",
       "      <th>Balance</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>300.000000</td>\n",
       "      <td>300.000000</td>\n",
       "      <td>300.000000</td>\n",
       "      <td>300.000000</td>\n",
       "      <td>300.000000</td>\n",
       "      <td>300</td>\n",
       "      <td>300</td>\n",
       "      <td>300</td>\n",
       "      <td>300</td>\n",
       "      <td>300.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>unique</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>top</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Female</td>\n",
       "      <td>No</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Caucasian</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>freq</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>168</td>\n",
       "      <td>268</td>\n",
       "      <td>183</td>\n",
       "      <td>141</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>44.054393</td>\n",
       "      <td>348.116667</td>\n",
       "      <td>3.026667</td>\n",
       "      <td>54.983333</td>\n",
       "      <td>13.393333</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>502.686667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>33.863066</td>\n",
       "      <td>150.871547</td>\n",
       "      <td>1.351064</td>\n",
       "      <td>17.216982</td>\n",
       "      <td>3.075193</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>466.991447</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>10.354000</td>\n",
       "      <td>93.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>24.000000</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>21.027500</td>\n",
       "      <td>235.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>41.000000</td>\n",
       "      <td>11.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>15.750000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>33.115500</td>\n",
       "      <td>339.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>55.000000</td>\n",
       "      <td>14.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>433.500000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>55.975500</td>\n",
       "      <td>433.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>69.000000</td>\n",
       "      <td>16.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>857.750000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>186.634000</td>\n",
       "      <td>949.000000</td>\n",
       "      <td>8.000000</td>\n",
       "      <td>91.000000</td>\n",
       "      <td>20.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1809.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            Income      Rating       Cards         Age   Education  Gender  \\\n",
       "count   300.000000  300.000000  300.000000  300.000000  300.000000     300   \n",
       "unique         NaN         NaN         NaN         NaN         NaN       2   \n",
       "top            NaN         NaN         NaN         NaN         NaN  Female   \n",
       "freq           NaN         NaN         NaN         NaN         NaN     168   \n",
       "mean     44.054393  348.116667    3.026667   54.983333   13.393333     NaN   \n",
       "std      33.863066  150.871547    1.351064   17.216982    3.075193     NaN   \n",
       "min      10.354000   93.000000    1.000000   24.000000    5.000000     NaN   \n",
       "25%      21.027500  235.000000    2.000000   41.000000   11.000000     NaN   \n",
       "50%      33.115500  339.000000    3.000000   55.000000   14.000000     NaN   \n",
       "75%      55.975500  433.000000    4.000000   69.000000   16.000000     NaN   \n",
       "max     186.634000  949.000000    8.000000   91.000000   20.000000     NaN   \n",
       "\n",
       "       Student Married  Ethnicity      Balance  \n",
       "count      300     300        300   300.000000  \n",
       "unique       2       2          3          NaN  \n",
       "top         No     Yes  Caucasian          NaN  \n",
       "freq       268     183        141          NaN  \n",
       "mean       NaN     NaN        NaN   502.686667  \n",
       "std        NaN     NaN        NaN   466.991447  \n",
       "min        NaN     NaN        NaN     0.000000  \n",
       "25%        NaN     NaN        NaN    15.750000  \n",
       "50%        NaN     NaN        NaN   433.500000  \n",
       "75%        NaN     NaN        NaN   857.750000  \n",
       "max        NaN     NaN        NaN  1809.000000  "
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_credscore.describe(include='all')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cf537297-931e-4db9-bba1-c24983cdc2b2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Income</th>\n",
       "      <th>Rating</th>\n",
       "      <th>Cards</th>\n",
       "      <th>Age</th>\n",
       "      <th>Education</th>\n",
       "      <th>Balance</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Income</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.771167</td>\n",
       "      <td>0.028875</td>\n",
       "      <td>0.123201</td>\n",
       "      <td>-0.070959</td>\n",
       "      <td>0.432327</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Rating</th>\n",
       "      <td>0.771167</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.095854</td>\n",
       "      <td>0.042377</td>\n",
       "      <td>-0.095433</td>\n",
       "      <td>0.859829</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Cards</th>\n",
       "      <td>0.028875</td>\n",
       "      <td>0.095854</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.054655</td>\n",
       "      <td>0.015176</td>\n",
       "      <td>0.123846</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Age</th>\n",
       "      <td>0.123201</td>\n",
       "      <td>0.042377</td>\n",
       "      <td>0.054655</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>-0.046178</td>\n",
       "      <td>-0.052426</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Education</th>\n",
       "      <td>-0.070959</td>\n",
       "      <td>-0.095433</td>\n",
       "      <td>0.015176</td>\n",
       "      <td>-0.046178</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>-0.073167</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Balance</th>\n",
       "      <td>0.432327</td>\n",
       "      <td>0.859829</td>\n",
       "      <td>0.123846</td>\n",
       "      <td>-0.052426</td>\n",
       "      <td>-0.073167</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             Income    Rating     Cards       Age  Education   Balance\n",
       "Income     1.000000  0.771167  0.028875  0.123201  -0.070959  0.432327\n",
       "Rating     0.771167  1.000000  0.095854  0.042377  -0.095433  0.859829\n",
       "Cards      0.028875  0.095854  1.000000  0.054655   0.015176  0.123846\n",
       "Age        0.123201  0.042377  0.054655  1.000000  -0.046178 -0.052426\n",
       "Education -0.070959 -0.095433  0.015176 -0.046178   1.000000 -0.073167\n",
       "Balance    0.432327  0.859829  0.123846 -0.052426  -0.073167  1.000000"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_credscore.corr() # Individual correlations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a4f86f79-7ab6-4fcb-8f4c-60fa461990bb",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Income       0.771167\n",
       "Rating       1.000000\n",
       "Cards        0.095854\n",
       "Age          0.042377\n",
       "Education   -0.095433\n",
       "Balance      0.859829\n",
       "Name: Rating, dtype: float64"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_credscore.corr()[\"Rating\"] # We need to understand interactions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "71f8d364-b3c7-442b-9d1f-a0177b224326",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Pipeline(steps=[('ordinalencoder', OrdinalEncoder()),\n",
       "                ('linearregression', LinearRegression())])"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pipeline = make_pipeline(OrdinalEncoder(),LinearRegression()); pipeline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "49bddc88-7e6e-4b6a-aa93-fb20e5755f17",
   "metadata": {},
   "outputs": [],
   "source": [
    "y = df_credscore.pop('Rating')\n",
    "X = df_credscore"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "738990d1-3603-4508-994f-6f7c9965e98d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 300 entries, 0 to 299\n",
      "Data columns (total 9 columns):\n",
      " #   Column     Non-Null Count  Dtype   \n",
      "---  ------     --------------  -----   \n",
      " 0   Income     300 non-null    float64 \n",
      " 1   Cards      300 non-null    int64   \n",
      " 2   Age        300 non-null    int64   \n",
      " 3   Education  300 non-null    int64   \n",
      " 4   Gender     300 non-null    category\n",
      " 5   Student    300 non-null    category\n",
      " 6   Married    300 non-null    category\n",
      " 7   Ethnicity  300 non-null    category\n",
      " 8   Balance    300 non-null    int64   \n",
      "dtypes: category(4), float64(1), int64(4)\n",
      "memory usage: 13.3 KB\n"
     ]
    }
   ],
   "source": [
    "X.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7483a8ff-7b00-482c-aa7e-a68aad62023b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Pipeline(steps=[('ordinalencoder', OrdinalEncoder()),\n",
       "                ('linearregression', LinearRegression())])"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pipeline.fit(X,y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "074306df-d771-4564-a985-569c6512e2cf",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "LinearRegression()"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model = pipeline['linearregression']\n",
    "model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "23a58fef-1ec2-46a1-9e65-9e993981c9de",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([  0.71532602,   1.41275992,   0.17419851,   0.61789045,\n",
       "         0.33006896, -91.64416173,   3.56809569,  -2.47231507,\n",
       "         1.6260681 ])"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.coef_"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e75338a2-9086-461e-9f30-3a51fa87a542",
   "metadata": {},
   "source": [
    "### Statsmodel api"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cc42cee7-4ae2-487c-ad8b-0e1d347f50ca",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Income</th>\n",
       "      <th>Cards</th>\n",
       "      <th>Age</th>\n",
       "      <th>Education</th>\n",
       "      <th>Gender</th>\n",
       "      <th>Student</th>\n",
       "      <th>Married</th>\n",
       "      <th>Ethnicity</th>\n",
       "      <th>Balance</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>14.891</td>\n",
       "      <td>2</td>\n",
       "      <td>34</td>\n",
       "      <td>11</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>106.025</td>\n",
       "      <td>3</td>\n",
       "      <td>82</td>\n",
       "      <td>15</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>903</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>104.593</td>\n",
       "      <td>4</td>\n",
       "      <td>71</td>\n",
       "      <td>11</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>580</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>148.924</td>\n",
       "      <td>3</td>\n",
       "      <td>36</td>\n",
       "      <td>11</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>964</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>55.882</td>\n",
       "      <td>2</td>\n",
       "      <td>68</td>\n",
       "      <td>16</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>331</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>295</th>\n",
       "      <td>27.272</td>\n",
       "      <td>5</td>\n",
       "      <td>67</td>\n",
       "      <td>10</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>296</th>\n",
       "      <td>65.896</td>\n",
       "      <td>1</td>\n",
       "      <td>49</td>\n",
       "      <td>17</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>293</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>297</th>\n",
       "      <td>55.054</td>\n",
       "      <td>3</td>\n",
       "      <td>74</td>\n",
       "      <td>17</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>188</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>298</th>\n",
       "      <td>20.791</td>\n",
       "      <td>1</td>\n",
       "      <td>70</td>\n",
       "      <td>18</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>299</th>\n",
       "      <td>24.919</td>\n",
       "      <td>3</td>\n",
       "      <td>76</td>\n",
       "      <td>11</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>711</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>300 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      Income  Cards  Age  Education  Gender  Student  Married  Ethnicity  \\\n",
       "0     14.891      2   34         11       0        0        1          2   \n",
       "1    106.025      3   82         15       1        1        1          1   \n",
       "2    104.593      4   71         11       0        0        0          1   \n",
       "3    148.924      3   36         11       1        0        0          1   \n",
       "4     55.882      2   68         16       0        0        1          2   \n",
       "..       ...    ...  ...        ...     ...      ...      ...        ...   \n",
       "295   27.272      5   67         10       1        0        1          2   \n",
       "296   65.896      1   49         17       1        0        1          2   \n",
       "297   55.054      3   74         17       0        0        1          1   \n",
       "298   20.791      1   70         18       1        0        0          0   \n",
       "299   24.919      3   76         11       1        0        1          0   \n",
       "\n",
       "     Balance  \n",
       "0        333  \n",
       "1        903  \n",
       "2        580  \n",
       "3        964  \n",
       "4        331  \n",
       "..       ...  \n",
       "295        0  \n",
       "296      293  \n",
       "297      188  \n",
       "298        0  \n",
       "299      711  \n",
       "\n",
       "[300 rows x 9 columns]"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def preprocess_categories(df):\n",
    "    df_out = df.copy()\n",
    "    for col in df.dtypes[df.dtypes=='category'].index:\n",
    "        df_out[col] = df[col].cat.codes\n",
    "    return df_out\n",
    "\n",
    "preprocess_categories(X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ad73cf4b-6928-4a3a-94db-b9751777b364",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>const</th>\n",
       "      <th>Income</th>\n",
       "      <th>Cards</th>\n",
       "      <th>Age</th>\n",
       "      <th>Education</th>\n",
       "      <th>Gender</th>\n",
       "      <th>Student</th>\n",
       "      <th>Married</th>\n",
       "      <th>Ethnicity</th>\n",
       "      <th>Balance</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>14.891</td>\n",
       "      <td>2</td>\n",
       "      <td>34</td>\n",
       "      <td>11</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.0</td>\n",
       "      <td>106.025</td>\n",
       "      <td>3</td>\n",
       "      <td>82</td>\n",
       "      <td>15</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>903</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.0</td>\n",
       "      <td>104.593</td>\n",
       "      <td>4</td>\n",
       "      <td>71</td>\n",
       "      <td>11</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>580</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.0</td>\n",
       "      <td>148.924</td>\n",
       "      <td>3</td>\n",
       "      <td>36</td>\n",
       "      <td>11</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>964</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1.0</td>\n",
       "      <td>55.882</td>\n",
       "      <td>2</td>\n",
       "      <td>68</td>\n",
       "      <td>16</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>331</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>295</th>\n",
       "      <td>1.0</td>\n",
       "      <td>27.272</td>\n",
       "      <td>5</td>\n",
       "      <td>67</td>\n",
       "      <td>10</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>296</th>\n",
       "      <td>1.0</td>\n",
       "      <td>65.896</td>\n",
       "      <td>1</td>\n",
       "      <td>49</td>\n",
       "      <td>17</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>293</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>297</th>\n",
       "      <td>1.0</td>\n",
       "      <td>55.054</td>\n",
       "      <td>3</td>\n",
       "      <td>74</td>\n",
       "      <td>17</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>188</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>298</th>\n",
       "      <td>1.0</td>\n",
       "      <td>20.791</td>\n",
       "      <td>1</td>\n",
       "      <td>70</td>\n",
       "      <td>18</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>299</th>\n",
       "      <td>1.0</td>\n",
       "      <td>24.919</td>\n",
       "      <td>3</td>\n",
       "      <td>76</td>\n",
       "      <td>11</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>711</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>300 rows × 10 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     const   Income  Cards  Age  Education  Gender  Student  Married  \\\n",
       "0      1.0   14.891      2   34         11       0        0        1   \n",
       "1      1.0  106.025      3   82         15       1        1        1   \n",
       "2      1.0  104.593      4   71         11       0        0        0   \n",
       "3      1.0  148.924      3   36         11       1        0        0   \n",
       "4      1.0   55.882      2   68         16       0        0        1   \n",
       "..     ...      ...    ...  ...        ...     ...      ...      ...   \n",
       "295    1.0   27.272      5   67         10       1        0        1   \n",
       "296    1.0   65.896      1   49         17       1        0        1   \n",
       "297    1.0   55.054      3   74         17       0        0        1   \n",
       "298    1.0   20.791      1   70         18       1        0        0   \n",
       "299    1.0   24.919      3   76         11       1        0        1   \n",
       "\n",
       "     Ethnicity  Balance  \n",
       "0            2      333  \n",
       "1            1      903  \n",
       "2            1      580  \n",
       "3            1      964  \n",
       "4            2      331  \n",
       "..         ...      ...  \n",
       "295          2        0  \n",
       "296          2      293  \n",
       "297          1      188  \n",
       "298          0        0  \n",
       "299          0      711  \n",
       "\n",
       "[300 rows x 10 columns]"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X = sm.add_constant(preprocess_categories(X))\n",
    "X"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8b43d7bd-46bd-412a-9645-43364bc89b3e",
   "metadata": {},
   "outputs": [],
   "source": [
    "model = sm.OLS(y, X).fit()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "af0e37a0-e8ab-41f0-b1aa-8f9b4a73a630",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"simpletable\">\n",
       "<caption>OLS Regression Results</caption>\n",
       "<tr>\n",
       "  <th>Dep. Variable:</th>         <td>Rating</td>      <th>  R-squared:         </th> <td>   0.974</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Model:</th>                   <td>OLS</td>       <th>  Adj. R-squared:    </th> <td>   0.973</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Method:</th>             <td>Least Squares</td>  <th>  F-statistic:       </th> <td>   1185.</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Date:</th>             <td>Fri, 27 May 2022</td> <th>  Prob (F-statistic):</th> <td>6.33e-223</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Time:</th>                 <td>08:02:31</td>     <th>  Log-Likelihood:    </th> <td> -1385.4</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>No. Observations:</th>      <td>   300</td>      <th>  AIC:               </th> <td>   2791.</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Residuals:</th>          <td>   290</td>      <th>  BIC:               </th> <td>   2828.</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Model:</th>              <td>     9</td>      <th>                     </th>     <td> </td>    \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Covariance Type:</th>      <td>nonrobust</td>    <th>                     </th>     <td> </td>    \n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "      <td></td>         <th>coef</th>     <th>std err</th>      <th>t</th>      <th>P>|t|</th>  <th>[0.025</th>    <th>0.975]</th>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>const</th>     <td>  139.4908</td> <td>    9.595</td> <td>   14.538</td> <td> 0.000</td> <td>  120.607</td> <td>  158.375</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Income</th>    <td>    2.0946</td> <td>    0.048</td> <td>   43.507</td> <td> 0.000</td> <td>    2.000</td> <td>    2.189</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Cards</th>     <td>   -0.7769</td> <td>    1.080</td> <td>   -0.719</td> <td> 0.473</td> <td>   -2.903</td> <td>    1.349</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Age</th>       <td>    0.1493</td> <td>    0.086</td> <td>    1.740</td> <td> 0.083</td> <td>   -0.020</td> <td>    0.318</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Education</th> <td>    0.1721</td> <td>    0.474</td> <td>    0.363</td> <td> 0.717</td> <td>   -0.761</td> <td>    1.105</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Gender</th>    <td>    1.8529</td> <td>    2.919</td> <td>    0.635</td> <td> 0.526</td> <td>   -3.891</td> <td>    7.597</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Student</th>   <td>  -99.2582</td> <td>    4.947</td> <td>  -20.066</td> <td> 0.000</td> <td> -108.994</td> <td>  -89.522</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Married</th>   <td>    2.7424</td> <td>    2.983</td> <td>    0.919</td> <td> 0.359</td> <td>   -3.129</td> <td>    8.614</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Ethnicity</th> <td>   -0.3005</td> <td>    1.745</td> <td>   -0.172</td> <td> 0.863</td> <td>   -3.735</td> <td>    3.134</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Balance</th>   <td>    0.2316</td> <td>    0.004</td> <td>   63.330</td> <td> 0.000</td> <td>    0.224</td> <td>    0.239</td>\n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "  <th>Omnibus:</th>       <td>43.876</td> <th>  Durbin-Watson:     </th> <td>   1.851</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Prob(Omnibus):</th> <td> 0.000</td> <th>  Jarque-Bera (JB):  </th> <td>  59.049</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Skew:</th>          <td>-0.999</td> <th>  Prob(JB):          </th> <td>1.51e-13</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Kurtosis:</th>      <td> 3.857</td> <th>  Cond. No.          </th> <td>4.61e+03</td>\n",
       "</tr>\n",
       "</table><br/><br/>Notes:<br/>[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.<br/>[2] The condition number is large, 4.61e+03. This might indicate that there are<br/>strong multicollinearity or other numerical problems."
      ],
      "text/plain": [
       "<class 'statsmodels.iolib.summary.Summary'>\n",
       "\"\"\"\n",
       "                            OLS Regression Results                            \n",
       "==============================================================================\n",
       "Dep. Variable:                 Rating   R-squared:                       0.974\n",
       "Model:                            OLS   Adj. R-squared:                  0.973\n",
       "Method:                 Least Squares   F-statistic:                     1185.\n",
       "Date:                Fri, 27 May 2022   Prob (F-statistic):          6.33e-223\n",
       "Time:                        08:02:31   Log-Likelihood:                -1385.4\n",
       "No. Observations:                 300   AIC:                             2791.\n",
       "Df Residuals:                     290   BIC:                             2828.\n",
       "Df Model:                           9                                         \n",
       "Covariance Type:            nonrobust                                         \n",
       "==============================================================================\n",
       "                 coef    std err          t      P>|t|      [0.025      0.975]\n",
       "------------------------------------------------------------------------------\n",
       "const        139.4908      9.595     14.538      0.000     120.607     158.375\n",
       "Income         2.0946      0.048     43.507      0.000       2.000       2.189\n",
       "Cards         -0.7769      1.080     -0.719      0.473      -2.903       1.349\n",
       "Age            0.1493      0.086      1.740      0.083      -0.020       0.318\n",
       "Education      0.1721      0.474      0.363      0.717      -0.761       1.105\n",
       "Gender         1.8529      2.919      0.635      0.526      -3.891       7.597\n",
       "Student      -99.2582      4.947    -20.066      0.000    -108.994     -89.522\n",
       "Married        2.7424      2.983      0.919      0.359      -3.129       8.614\n",
       "Ethnicity     -0.3005      1.745     -0.172      0.863      -3.735       3.134\n",
       "Balance        0.2316      0.004     63.330      0.000       0.224       0.239\n",
       "==============================================================================\n",
       "Omnibus:                       43.876   Durbin-Watson:                   1.851\n",
       "Prob(Omnibus):                  0.000   Jarque-Bera (JB):               59.049\n",
       "Skew:                          -0.999   Prob(JB):                     1.51e-13\n",
       "Kurtosis:                       3.857   Cond. No.                     4.61e+03\n",
       "==============================================================================\n",
       "\n",
       "Notes:\n",
       "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
       "[2] The condition number is large, 4.61e+03. This might indicate that there are\n",
       "strong multicollinearity or other numerical problems.\n",
       "\"\"\""
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a08f7238-5ef6-409e-866b-5bd4d30c22d7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Balance      63.329758\n",
       "Income       43.507422\n",
       "Student      20.066064\n",
       "const        14.538499\n",
       "Age           1.740111\n",
       "Married       0.919321\n",
       "Cards         0.719127\n",
       "Gender        0.634877\n",
       "Education     0.363174\n",
       "Ethnicity     0.172217\n",
       "dtype: float64"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.tvalues.abs().sort_values(ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8b899f23-2fb1-423c-9212-33c5722bbde6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Balance    63.329758\n",
       "Income     43.507422\n",
       "Student   -20.066064\n",
       "const      14.538499\n",
       "dtype: float64"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.tvalues[model.tvalues[model.pvalues <= 0.05].abs().sort_values(ascending=False).index]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7be36d13-32e1-4274-bd06-862895be0302",
   "metadata": {},
   "outputs": [],
   "source": [
    "# np.corr(model.fittedvalues,y.values)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a09f1fed-3174-4e7e-8c6d-686a4cbc8de0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([42981258.44356128])"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.correlate(model.fittedvalues, y.values)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "20d2c6d5-1b33-43b4-95fa-d6b6e15fc023",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0      255.364880\n",
       "1      488.244174\n",
       "2      501.990296\n",
       "3      681.185956\n",
       "4      346.698891\n",
       "          ...    \n",
       "295    208.450617\n",
       "296    358.837627\n",
       "297    312.436387\n",
       "298    197.666967\n",
       "299    371.866045\n",
       "Length: 300, dtype: float64"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.fittedvalues"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "84e39447-13cf-464a-9c8a-ec2d1f9fc151",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[1.       , 0.9866719],\n",
       "       [0.9866719, 1.       ]])"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.corrcoef(model.fittedvalues.values, y.values)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d4ddb1b1-bf69-4d3f-ad5b-630cc70071a2",
   "metadata": {},
   "source": [
    "### Limited Variables Income, Cards, Married"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "45f58e37-fc62-440f-8a45-2568e0f2d7a5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>const</th>\n",
       "      <th>Income</th>\n",
       "      <th>Cards</th>\n",
       "      <th>Married</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.0</td>\n",
       "      <td>14.891</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.0</td>\n",
       "      <td>106.025</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.0</td>\n",
       "      <td>104.593</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.0</td>\n",
       "      <td>148.924</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1.0</td>\n",
       "      <td>55.882</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>295</th>\n",
       "      <td>1.0</td>\n",
       "      <td>27.272</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>296</th>\n",
       "      <td>1.0</td>\n",
       "      <td>65.896</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>297</th>\n",
       "      <td>1.0</td>\n",
       "      <td>55.054</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>298</th>\n",
       "      <td>1.0</td>\n",
       "      <td>20.791</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>299</th>\n",
       "      <td>1.0</td>\n",
       "      <td>24.919</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>300 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     const   Income  Cards  Married\n",
       "0      1.0   14.891      2        1\n",
       "1      1.0  106.025      3        1\n",
       "2      1.0  104.593      4        0\n",
       "3      1.0  148.924      3        0\n",
       "4      1.0   55.882      2        1\n",
       "..     ...      ...    ...      ...\n",
       "295    1.0   27.272      5        1\n",
       "296    1.0   65.896      1        1\n",
       "297    1.0   55.054      3        1\n",
       "298    1.0   20.791      1        0\n",
       "299    1.0   24.919      3        1\n",
       "\n",
       "[300 rows x 4 columns]"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_red = X[['const', 'Income', 'Cards', 'Married']]\n",
    "X_red"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c07628d2-e64e-4b80-8adc-acbaf46aecea",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d18b567f-4f32-40bf-98ed-1088b5237f99",
   "metadata": {},
   "outputs": [],
   "source": [
    "model2 = sm.OLS(y, X_red).fit()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8869c58c-7201-4689-a268-8cb3a0d158b2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"simpletable\">\n",
       "<caption>OLS Regression Results</caption>\n",
       "<tr>\n",
       "  <th>Dep. Variable:</th>         <td>Rating</td>      <th>  R-squared:         </th> <td>   0.602</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Model:</th>                   <td>OLS</td>       <th>  Adj. R-squared:    </th> <td>   0.598</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Method:</th>             <td>Least Squares</td>  <th>  F-statistic:       </th> <td>   149.0</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Date:</th>             <td>Fri, 27 May 2022</td> <th>  Prob (F-statistic):</th> <td>7.56e-59</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Time:</th>                 <td>08:02:39</td>     <th>  Log-Likelihood:    </th> <td> -1792.1</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>No. Observations:</th>      <td>   300</td>      <th>  AIC:               </th> <td>   3592.</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Residuals:</th>          <td>   296</td>      <th>  BIC:               </th> <td>   3607.</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Model:</th>              <td>     3</td>      <th>                     </th>     <td> </td>   \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Covariance Type:</th>      <td>nonrobust</td>    <th>                     </th>     <td> </td>   \n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "     <td></td>        <th>coef</th>     <th>std err</th>      <th>t</th>      <th>P>|t|</th>  <th>[0.025</th>    <th>0.975]</th>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>const</th>   <td>  165.2144</td> <td>   16.641</td> <td>    9.928</td> <td> 0.000</td> <td>  132.464</td> <td>  197.964</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Income</th>  <td>    3.4196</td> <td>    0.164</td> <td>   20.896</td> <td> 0.000</td> <td>    3.098</td> <td>    3.742</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Cards</th>   <td>    8.2699</td> <td>    4.099</td> <td>    2.018</td> <td> 0.045</td> <td>    0.203</td> <td>   16.336</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Married</th> <td>   11.8404</td> <td>   11.339</td> <td>    1.044</td> <td> 0.297</td> <td>  -10.474</td> <td>   34.155</td>\n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "  <th>Omnibus:</th>       <td>133.940</td> <th>  Durbin-Watson:     </th> <td>   1.873</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Prob(Omnibus):</th> <td> 0.000</td>  <th>  Jarque-Bera (JB):  </th> <td>  17.170</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Skew:</th>          <td> 0.044</td>  <th>  Prob(JB):          </th> <td>0.000187</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Kurtosis:</th>      <td> 1.831</td>  <th>  Cond. No.          </th> <td>    179.</td>\n",
       "</tr>\n",
       "</table><br/><br/>Notes:<br/>[1] Standard Errors assume that the covariance matrix of the errors is correctly specified."
      ],
      "text/plain": [
       "<class 'statsmodels.iolib.summary.Summary'>\n",
       "\"\"\"\n",
       "                            OLS Regression Results                            \n",
       "==============================================================================\n",
       "Dep. Variable:                 Rating   R-squared:                       0.602\n",
       "Model:                            OLS   Adj. R-squared:                  0.598\n",
       "Method:                 Least Squares   F-statistic:                     149.0\n",
       "Date:                Fri, 27 May 2022   Prob (F-statistic):           7.56e-59\n",
       "Time:                        08:02:39   Log-Likelihood:                -1792.1\n",
       "No. Observations:                 300   AIC:                             3592.\n",
       "Df Residuals:                     296   BIC:                             3607.\n",
       "Df Model:                           3                                         \n",
       "Covariance Type:            nonrobust                                         \n",
       "==============================================================================\n",
       "                 coef    std err          t      P>|t|      [0.025      0.975]\n",
       "------------------------------------------------------------------------------\n",
       "const        165.2144     16.641      9.928      0.000     132.464     197.964\n",
       "Income         3.4196      0.164     20.896      0.000       3.098       3.742\n",
       "Cards          8.2699      4.099      2.018      0.045       0.203      16.336\n",
       "Married       11.8404     11.339      1.044      0.297     -10.474      34.155\n",
       "==============================================================================\n",
       "Omnibus:                      133.940   Durbin-Watson:                   1.873\n",
       "Prob(Omnibus):                  0.000   Jarque-Bera (JB):               17.170\n",
       "Skew:                           0.044   Prob(JB):                     0.000187\n",
       "Kurtosis:                       1.831   Cond. No.                         179.\n",
       "==============================================================================\n",
       "\n",
       "Notes:\n",
       "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
       "\"\"\""
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model2.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8c15f308-a5bd-420a-9060-aae8a3a558e4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Income    20.895784\n",
       "const      9.928059\n",
       "Cards      2.017633\n",
       "dtype: float64"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model2.tvalues[model2.tvalues[model2.pvalues <= 0.05].abs().sort_values(ascending=False).index]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "73e5e5eb-5d84-4cf4-917d-2224ee66fb5a",
   "metadata": {},
   "source": [
    "## HR Example"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b8a4a2cb-5ae8-434d-a6e6-44d8f7f8e51e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>S</th>\n",
       "      <th>LPE</th>\n",
       "      <th>NP</th>\n",
       "      <th>ANH</th>\n",
       "      <th>TIC</th>\n",
       "      <th>Newborn</th>\n",
       "      <th>left</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.38</td>\n",
       "      <td>0.53</td>\n",
       "      <td>2</td>\n",
       "      <td>157</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.80</td>\n",
       "      <td>0.86</td>\n",
       "      <td>5</td>\n",
       "      <td>262</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.11</td>\n",
       "      <td>0.88</td>\n",
       "      <td>7</td>\n",
       "      <td>272</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.72</td>\n",
       "      <td>0.87</td>\n",
       "      <td>5</td>\n",
       "      <td>223</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.37</td>\n",
       "      <td>0.52</td>\n",
       "      <td>2</td>\n",
       "      <td>159</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      S   LPE  NP  ANH  TIC  Newborn  left\n",
       "0  0.38  0.53   2  157    3        0     1\n",
       "1  0.80  0.86   5  262    6        0     1\n",
       "2  0.11  0.88   7  272    4        0     1\n",
       "3  0.72  0.87   5  223    5        0     1\n",
       "4  0.37  0.52   2  159    3        0     1"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_hr = pd.read_csv(\"DATA_3.02_HR2.csv\"); df_hr.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b3c97757-53c6-4c41-b6d3-8f8937d66370",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 12000 entries, 0 to 11999\n",
      "Data columns (total 7 columns):\n",
      " #   Column   Non-Null Count  Dtype  \n",
      "---  ------   --------------  -----  \n",
      " 0   S        12000 non-null  float64\n",
      " 1   LPE      12000 non-null  float64\n",
      " 2   NP       12000 non-null  int64  \n",
      " 3   ANH      12000 non-null  int64  \n",
      " 4   TIC      12000 non-null  int64  \n",
      " 5   Newborn  12000 non-null  int64  \n",
      " 6   left     12000 non-null  int64  \n",
      "dtypes: float64(2), int64(5)\n",
      "memory usage: 656.4 KB\n"
     ]
    }
   ],
   "source": [
    "df_hr.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b631c30c-cc89-40e0-8fb8-652392f52d36",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>S</th>\n",
       "      <th>LPE</th>\n",
       "      <th>NP</th>\n",
       "      <th>ANH</th>\n",
       "      <th>TIC</th>\n",
       "      <th>Newborn</th>\n",
       "      <th>left</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>12000.000000</td>\n",
       "      <td>12000.000000</td>\n",
       "      <td>12000.000000</td>\n",
       "      <td>12000.000000</td>\n",
       "      <td>12000.000000</td>\n",
       "      <td>12000.000000</td>\n",
       "      <td>12000.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>0.629463</td>\n",
       "      <td>0.716558</td>\n",
       "      <td>3.801833</td>\n",
       "      <td>200.437917</td>\n",
       "      <td>3.228750</td>\n",
       "      <td>0.154167</td>\n",
       "      <td>0.166667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>0.241100</td>\n",
       "      <td>0.168368</td>\n",
       "      <td>1.163906</td>\n",
       "      <td>48.740178</td>\n",
       "      <td>1.056811</td>\n",
       "      <td>0.361123</td>\n",
       "      <td>0.372694</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.090000</td>\n",
       "      <td>0.360000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>96.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>0.480000</td>\n",
       "      <td>0.570000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>157.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>0.660000</td>\n",
       "      <td>0.720000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>199.500000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>0.820000</td>\n",
       "      <td>0.860000</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>243.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>7.000000</td>\n",
       "      <td>310.000000</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  S           LPE            NP           ANH           TIC  \\\n",
       "count  12000.000000  12000.000000  12000.000000  12000.000000  12000.000000   \n",
       "mean       0.629463      0.716558      3.801833    200.437917      3.228750   \n",
       "std        0.241100      0.168368      1.163906     48.740178      1.056811   \n",
       "min        0.090000      0.360000      2.000000     96.000000      2.000000   \n",
       "25%        0.480000      0.570000      3.000000    157.000000      2.000000   \n",
       "50%        0.660000      0.720000      4.000000    199.500000      3.000000   \n",
       "75%        0.820000      0.860000      5.000000    243.000000      4.000000   \n",
       "max        1.000000      1.000000      7.000000    310.000000      6.000000   \n",
       "\n",
       "            Newborn          left  \n",
       "count  12000.000000  12000.000000  \n",
       "mean       0.154167      0.166667  \n",
       "std        0.361123      0.372694  \n",
       "min        0.000000      0.000000  \n",
       "25%        0.000000      0.000000  \n",
       "50%        0.000000      0.000000  \n",
       "75%        0.000000      0.000000  \n",
       "max        1.000000      1.000000  "
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_hr.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bd2a4d7b-96db-42bf-86a3-ea8a1ff9153f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<AxesSubplot:ylabel='Frequency'>"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZoAAAD7CAYAAABT2VIoAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8/fFQqAAAACXBIWXMAAAsTAAALEwEAmpwYAAAX8ElEQVR4nO3de2xT9/3G8cc2kMIAhWRJMJcWoSFIhwZqwtAqUNcEMNAkrAgWlJato5RubcelHQOxHwkFxBouXbluMKYyrQi0SiuXtEoyytaMDTroChQCYYRLsyWQ4IBCKZBgn98fVTPYejmx/fUh9vslIeHztXOejyB+fI6dE5dlWZYAADDE7XQAAEBso2gAAEZRNAAAoygaAIBRFA0AwCiKBgBgFEUDADCqg9MB7laXL19TMNj2HzFKTu4qv/8jA4nuXswcH+Jt5nibVwpvZrfbpR49vvKZaxTN5wgGrZCK5tPHxhtmjg/xNnO8zSuZmZlTZwAAoygaAIBRFA0AwCiKBgBgVFSKpri4WFlZWRo4cKBOnTrVuv3s2bPKz8+Xz+dTfn6+zp07Z3QNABB9USma7Oxsbd26Vb17975je1FRkQoKClRWVqaCggIVFhYaXQMARF9UiiYzM1Ner/eObX6/X5WVlcrJyZEk5eTkqLKyUo2NjUbWAADOcOznaOrq6pSWliaPxyNJ8ng8Sk1NVV1dnSzLivhaUlJSm/IlJ3cNaa7mloBSUrqF9NhwNLcE1KmjJ+r7/ZQTMzuNmWNfvM0rmZmZH9j8HH7/RyH94FJKSjflvrDTQKIvtnvVBDU0XI36fqVPZnZq305h5tgXb/NK4c3sdrs+9wW6Y0Xj9Xp18eJFBQIBeTweBQIB1dfXy+v1yrKsiK8BAJzh2Mebk5OTlZ6erpKSEklSSUmJ0tPTlZSUZGQNAOAMl2VZxi/ms3TpUpWXl+vSpUvq0aOHEhMT9eabb6q6ulrz589XU1OTunfvruLiYvXv31+SjKy1BafO7OMUQ3yIt5njbV7J3KmzqBRNe0TR2Mc3ZHyIt5njbV7JXNFwZQAAgFEUDQDAKIoGAGAURQMAMIqiAQAYRdEAAIyiaAAARlE0AACjKBoAgFEUDQDAKIoGAGAURQMAMIqiAQAYRdEAAIyiaAAARlE0AACjKBoAgFEUDQDAKIoGAGAURQMAMIqiAQAYRdEAAIyiaAAARlE0AACjKBoAgFEUDQDAKIoGAGAURQMAMIqiAQAYRdEAAIyiaAAARlE0AACj7oqi+dOf/qTvfOc7mjBhgvLy8lReXi5JOnv2rPLz8+Xz+ZSfn69z5861PibUNQBAdDleNJZl6ac//amWL1+unTt3avny5Zo3b56CwaCKiopUUFCgsrIyFRQUqLCwsPVxoa4BAKLL8aKRJLfbratXr0qSrl69qtTUVF2+fFmVlZXKycmRJOXk5KiyslKNjY3y+/0hrQEAoq+D0wFcLpdeeeUVPfPMM+rSpYuuXbumTZs2qa6uTmlpafJ4PJIkj8ej1NRU1dXVybKskNaSkpIcmxMA4pXjRXPr1i1t3LhRGzZsUEZGht577z3Nnj1by5cvdzRXcnJXR/cfipSUbnG5b6cwc+yLt3klMzM7XjQnTpxQfX29MjIyJEkZGRnq3LmzEhISdPHiRQUCAXk8HgUCAdXX18vr9cqyrJDW2sLv/0jBoNXmeZz8j9nQcNWR/aakdHNs305h5tgXb/NK4c3sdrs+9wW64+/R9OzZUxcuXNCZM2ckSdXV1fL7/brvvvuUnp6ukpISSVJJSYnS09OVlJSk5OTkkNYAANHnsiyr7S/bI2zXrl369a9/LZfLJUmaOXOmRo0aperqas2fP19NTU3q3r27iouL1b9/f0kKec2ucI5ocl/Y2ebHhWv3qgkc0UQRM8e+eJtXMndEc1cUzd2IorGPb8j4EG8zx9u8UgyfOgMAxDaKBgBgFEUDADCKogEAGEXRAACMomgAAEZRNAAAoygaAIBRFA0AwCiKBgBgFEUDADCKogEAGEXRAACMomgAAEZRNAAAoygaAIBRFA0AwCiKBgBgFEUDADCKogEAGEXRAACMomgAAEZRNAAAoygaAIBRtotmz549unXrlsksAIAYZLto1qxZoxEjRmjx4sU6cuSIyUwAgBhiu2h27dqlLVu2KCEhQT/+8Y/l8/m0YcMG/etf/zKZDwDQzrXpPZpBgwZp3rx5euedd1RUVKTS0lKNHj1ajz32mHbt2qVgMGgqJwCgnerQ1gd8+OGH2rVrl3bt2iWXy6WZM2fK6/Vq69atKi8v17p160zkBAC0U7aLZuvWrdq5c6fOnz+vcePGafny5Ro6dGjrus/n04MPPmgiIwCgHbNdNBUVFfrBD36g7OxsderU6X/WO3furLVr10Y0HACg/bNdNGvWrJHb7VbHjh1bt7W0tMiyrNbiGTFiROQTAgDaNdsfBpg2bZqOHz9+x7bjx4/rySefDDvEzZs3VVRUpDFjxig3N1cLFy6UJJ09e1b5+fny+XzKz8/XuXPnWh8T6hoAILpsF01VVZWGDBlyx7ZvfOMbOnnyZNghVqxYoYSEBJWVlWn37t2aNWuWJKmoqEgFBQUqKytTQUGBCgsLWx8T6hoAILpsF0337t116dKlO7ZdunRJnTt3DivAtWvXtGPHDs2aNUsul0uS9NWvflV+v1+VlZXKycmRJOXk5KiyslKNjY0hrwEAos920YwZM0YvvPCCTp06pevXr6uqqkrz5s3TuHHjwgpQU1OjxMRErVu3ThMnTtTUqVN16NAh1dXVKS0tTR6PR5Lk8XiUmpqqurq6kNcAANFn+8MAc+bM0UsvvaTJkyerublZCQkJmjhxop5//vmwAgQCAdXU1Oj+++/XvHnzdOTIEf3whz/U6tWrw/q64UpO7uro/kORktItLvftFGaOffE2r2RmZttFk5CQoKKiIhUWFury5cvq0aNH66mucHi9XnXo0KH1VNeQIUPUo0cP3XPPPbp48aICgYA8Ho8CgYDq6+vl9XplWVZIa23h93+kYNBq8zxO/sdsaLjqyH5TUro5tm+nMHPsi7d5pfBmdrtdn/sCvU2XoLl69ao++OADVVVV6cCBA9q/f7/2798fUqhPJSUlafjw4frrX/8q6ZNPjPn9fvXr10/p6ekqKSmRJJWUlCg9PV1JSUlKTk4OaQ0AEH0uy7JsvWz/wx/+oMWLF6tLly665557/vMFXC69/fbbYYWoqanRggULdOXKFXXo0EGzZ8/WQw89pOrqas2fP19NTU3q3r27iouL1b9/f0kKec2ucI5ocl/Y2ebHhWv3qgkc0UQRM8e+eJtXMndEY7toRo4cqaVLl+qhhx4KKUR7Q9HYxzdkfIi3meNtXukuOHUWCAT4yX8AQJvZLpqnnnpKv/zlL/lVAACANrH9qbMtW7bo0qVL2rx5sxITE+9Y+/Of/xzhWACAWGG7aFasWGEyBwAgRtkumm9+85smcwAAYpTt92iam5v1i1/8QtnZ2crIyJAk7du3T6+99pqxcACA9s920SxbtkynTp3SypUrW68IMGDAAG3bts1YOABA+2f71NmePXtUXl6uLl26yO3+pJ/S0tJ08eJFY+EAAO2f7SOajh07KhAI3LGtsbHxfz6BBgDA7WwXzdixYzVv3jzV1NRIkurr67V48WI98sgjxsIBANo/20UzZ84c9enTR3l5eWpqapLP51NqaqqeffZZk/kAAO2c7fdoOnXqpAULFmjBggVqbGyM2K8JAADENttF8+kps09du3at9e99+/aNXCIAQEyxXTSjR4+Wy+XS7Rd7/vSI5sSJE5FPBgCICbaL5uTJk3fcbmho0Lp165SZmRnxUACA2NGm37B5u5SUFP3sZz/Tyy+/HMk8AIAYE3LRSNKZM2d0/fr1SGUBAMQg26fOCgoK7viU2fXr13X69Gk+3gwA+EK2i2by5Ml33O7cubMGDRqkfv36RToTACCG2C6aRx991GQOAECMsl00q1evtnW/WbNmhRwGABB7bBfN+fPnVV5ersGDB6t3796qra3VBx98oDFjxighIcFkRgBAO2a7aCzL0qpVq+Tz+Vq3lZeXq7S0VD//+c+NhAMAtH+2P95cUVGhUaNG3bEtKytL77zzTsRDAQBih+2iue+++7R169Y7tm3btk333ntvxEMBAGKH7VNnS5cu1XPPPafNmze3/mbNDh06aO3atSbzAQDaOdtFc//996usrExHjhxRfX29UlJSNHToUHXs2NFkPgBAOxfyJWiGDRumlpYWffzxx5HMAwCIMbaPaKqqqvSjH/1InTp10sWLFzV+/HgdPHhQb7zxhl555RWDEQEA7ZntI5pFixZp5syZKi0tVYcOn/TTsGHD9N577xkLBwBo/2wXzenTpzVhwgRJ//mFZ126dNHNmzfNJAMAxATbRdO7d28dO3bsjm1Hjx7l480AgC9k+z2aWbNm6emnn9aUKVPU0tKijRs3avv27VqyZInJfACAds72Ec3DDz+szZs3q7GxUcOGDdO///1vrV27ViNGjIhYmHXr1mngwIE6deqUJOnw4cPKy8uTz+fTtGnT5Pf7W+8b6hoAILpsFU0gENCoUaP0ta99TYsWLdKmTZu0ePFiDR48OGJBjh8/rsOHD6t3796SpGAwqLlz56qwsFBlZWXKzMzUypUrw1oDAESfraLxeDzyeDzG3vhvbm7W4sWLtWjRotZtx44dU0JCgjIzMyVJU6ZMUWlpaVhrAIDos/0ezfe+9z3Nnj1bTz/9tHr27HnHr3Xu27dvWCFWr16tvLw89enTp3VbXV2devXq1Xo7KSlJwWBQV65cCXktMTHRdqbk5K5hzeSElJRucblvpzBz7Iu3eSUzM39p0TQ0NCglJaX1Tf+//e1vsiyrdd3lcunEiRMhB3j//fd17Ngx/eQnPwn5a5jg93+kYND68jv+Fyf/YzY0XHVkvykp3Rzbt1OYOfbF27xSeDO73a7PfYH+pUXj8/n0j3/8QydPnpQkPfvss1q/fn1IQT7LwYMHVV1drezsbEnShQsX9OSTT2rq1Kmqra1tvV9jY6PcbrcSExPl9XpDWgMARN+Xvkdz+9GL9EkxRNKMGTO0b98+7d27V3v37lXPnj31m9/8RtOnT9eNGzd06NAhSdL27ds1duxYSdLgwYNDWgMARN+XHtHc/l6M9L/FY4rb7dby5ctVVFSkmzdvqnfv3lqxYkVYawCA6PvSogkEAjpw4EBrwfz3bUn61re+FbFAe/fubf37Aw88oN27d3/m/UJdAwBE15cWTXJyshYsWNB6OzEx8Y7bLpdLb7/9tpl0AIB270uL5vYjDAAA2irkX3wGAIAdFA0AwCiKBgBgFEUDADCKogEAGEXRAACMomgAAEZRNAAAoygaAIBRFA0AwCiKBgBgFEUDADCKogEAGEXRAACMomgAAEZRNAAAoygaAIBRFA0AwCiKBgBgFEUDADCKogEAGEXRAACMomgAAEZRNAAAoygaAIBRFA0AwCiKBgBgFEUDADCKogEAGEXRAACMcrxoLl++rKeeeko+n0+5ubl67rnn1NjYKEk6fPiw8vLy5PP5NG3aNPn9/tbHhboGAIgux4vG5XJp+vTpKisr0+7du9W3b1+tXLlSwWBQc+fOVWFhocrKypSZmamVK1dKUshrAIDoc7xoEhMTNXz48NbbQ4cOVW1trY4dO6aEhARlZmZKkqZMmaLS0lJJCnkNABB9jhfN7YLBoLZt26asrCzV1dWpV69erWtJSUkKBoO6cuVKyGsAgOjr4HSA2y1ZskRdunTR448/rj/+8Y+OZklO7uro/kORktItLvftFGaOfU7N29wSUKeOHkf2a2Lmu6ZoiouLdf78ef3qV7+S2+2W1+tVbW1t63pjY6PcbrcSExNDXmsLv/8jBYNWm+dw8huxoeGqI/tNSenm2L6dwsyxz8l5U1K6KfeFnVHf7+5VE0Ke2e12fe4L9Lvi1NnLL7+sY8eOaf369erUqZMkafDgwbpx44YOHTokSdq+fbvGjh0b1hoAIPocP6L55z//qY0bN6pfv36aMmWKJKlPnz5av369li9frqKiIt28eVO9e/fWihUrJElutzukNQBA9DleNAMGDFBVVdVnrj3wwAPavXt3RNcAANF1V5w6AwDELooGAGAURQMAMIqiAQAYRdEAAIyiaAAARlE0AACjKBoAgFEUDQDAKIoGAGAURQMAMIqiAQAYRdEAAIyiaAAARlE0AACjKBoAgFEUDQDAKIoGAGAURQMAMIqiAQAYRdEAAIyiaAAARlE0AACjKBoAgFEUDQDAKIoGAGAURQMAMIqiAQAYRdEAAIyiaAAARlE0AACjKBoAgFEUDQDAqJgtmrNnzyo/P18+n0/5+fk6d+6c05EAIC7FbNEUFRWpoKBAZWVlKigoUGFhodORACAudXA6gAl+v1+VlZV69dVXJUk5OTlasmSJGhsblZSUZOtruN2ukPef2qNzyI8NRziZ2/O+ncLMsc/Jedvb88gXPS4mi6aurk5paWnyeDySJI/Ho9TUVNXV1dkumh49vhLy/n/zf2NCfmw4kpO7OrJfp/ftFGaOfU7OG0vPIzF76gwAcHeIyaLxer26ePGiAoGAJCkQCKi+vl5er9fhZAAQf2KyaJKTk5Wenq6SkhJJUklJidLT022fNgMARI7LsizL6RAmVFdXa/78+WpqalL37t1VXFys/v37Ox0LAOJOzBYNAODuEJOnzgAAdw+KBgBgFEUDADCKogEAGEXRhMDOBTsDgYBefPFFjRo1SqNHj9brr78e/aARZGfm9evX65FHHlFubq4mTpyov/zlL9EPGkFtuTDrmTNnNGTIEBUXF0cvoAF2Z37rrbeUm5urnJwc5ebm6tKlS9ENGiF25vX7/ZoxY4Zyc3M1btw4LVq0SLdu3Yp+2AgpLi5WVlaWBg4cqFOnTn3mfSL+/GWhzaZOnWrt2LHDsizL2rFjhzV16tT/uc8bb7xhTZs2zQoEApbf77dGjhxp1dTURDtqxNiZuaKiwvr4448ty7KsEydOWBkZGdb169ejmjOS7MxsWZZ169Yt6/HHH7eef/5566WXXopmxIizM/PRo0etcePGWfX19ZZlWVZTU5N148aNqOaMFDvzLl26tPXftbm52Zo0aZL15ptvRjVnJB08eNCqra21Hn74Yauqquoz7xPp5y+OaNro0wt25uTkSPrkgp2VlZVqbGy8435vvfWWJk+eLLfbraSkJI0aNUqlpaVORA6b3ZlHjhypzp0/uRDgwIEDZVmWrly5Eu24EWF3ZknatGmTvv3tb6tfv35RThlZdmfesmWLpk2bppSUFElSt27dlJCQEPW84bI7r8vl0rVr1xQMBtXc3KyWlhalpaU5ETkiMjMzv/QqKZF+/qJo2uiLLtj53/fr1atX622v16sLFy5ENWuk2J35djt27NC9996rnj17RitmRNmd+eTJk9q3b5+eeOIJB1JGlt2Zq6urVVNTo8cee0yPPvqoNmzYIKsd/jie3XmfeeYZnT17ViNGjGj9k5GR4UTkqIn08xdFg4j7+9//rtWrV2vVqlVORzGqpaVFCxcu1Isvvtj6ZBUPAoGAqqqq9Oqrr+p3v/udKioqtHPnTqdjGVNaWqqBAwdq3759qqio0KFDh9rt2QmnUDRtZPeCnV6vV7W1ta236+rq2u2r+7ZcpPT999/X3LlztX79+nZ9yR87Mzc0NOjDDz/UjBkzlJWVpd/+9rf6/e9/r4ULFzoVOyx2/5179eqlsWPHqlOnTuratauys7N19OhRJyKHxe68r732mvLy8uR2u9WtWzdlZWXp3XffdSJy1ET6+YuiaSO7F+wcO3asXn/9dQWDQTU2NmrPnj3y+XxORA6b3ZmPHj2qOXPmaM2aNfr617/uRNSIsTNzr1699O6772rv3r3au3evvv/97+u73/2ulixZ4lTssNj9d87JydG+fftkWZZaWlp04MABDRo0yInIYbE7b58+fVRRUSFJam5u1v79+zVgwICo542miD9/hfwxgjh2+vRpa9KkSdaYMWOsSZMmWdXV1ZZlWdb06dOto0ePWpb1ySeRCgsLrezsbCs7O9vavn27k5HDZmfmiRMnWsOHD7fy8vJa/5w8edLJ2GGxM/Pt1qxZ0+4/dWZn5kAgYC1btswaO3asNX78eGvZsmVWIBBwMnbI7Mx7/vx564knnrBycnKscePGWYsWLbJaWlqcjB2WJUuWWCNHjrTS09OtBx980Bo/frxlWWafv7ioJgDAKE6dAQCMomgAAEZRNAAAoygaAIBRFA0AwCiKBgBgFEUDADCKogEAGPX/DOJu0hB5BZYAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "df_hr['left'].plot.hist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "84e823bb-82e6-467b-b67b-24dedbda07f4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<AxesSubplot:ylabel='Frequency'>"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZMAAAD7CAYAAACvzHniAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8/fFQqAAAACXBIWXMAAAsTAAALEwEAmpwYAAAY5klEQVR4nO3de1BU5x3G8Wd3EdQqg1DAFU2s1kFSWx0vddIx0wY1aILYtLU4JPZio2nTVE1ai2MaQHFsQE3itdWxo23imKkzjRfMADW2UtPEaiZqDaKNGrUBBRYdjTdw9/QPxzU2gsd92V0Wvp8ZZ9zz7uH89mXZZ8/tfR2WZVkCAMCAM9wFAAAiH2ECADBGmAAAjBEmAABjhAkAwBhhAgAwRpgAAIxFhbuAcDp37pJ8vvZxm01CQjd5PJ+Gu4w2gb64gX64hb64xaQvnE6HevT4wh3bOnSY+HxWuwkTSe3qtZiiL26gH26hL24JRl+E5DBXUVGR0tPTlZqaqqNHj/qXnzhxQtnZ2crIyFB2drY+/vhj4zYAQOiFJExGjx6tDRs2KCUl5bbl+fn5ysnJUVlZmXJycpSXl2fcBgAIvZCEyfDhw+V2u29b5vF4VFlZqczMTElSZmamKisr1dDQEHAbACA8wnbOpKamRsnJyXK5XJIkl8ulpKQk1dTUyLKsgNri4+PD9XIAoEPr0CfgExK6hbuEVpWY2D3cJbQZ9MUN9MMt9MUtweiLsIWJ2+3W2bNn5fV65XK55PV6VVtbK7fbLcuyAmq7Vx7Pp+3mCo/ExO6qq7sY7jLaBPriBvrhFvriFpO+cDodzX4JD9tNiwkJCUpLS1NJSYkkqaSkRGlpaYqPjw+4DQAQHo5QTI61YMEClZeXq76+Xj169FBcXJy2b9+uY8eOac6cObpw4YJiY2NVVFSkfv36SVLAbfeCPZP2ib64IZL6oXtsF3WOCexAydVr13XxwpUWnxNJfRFswdozCUmYtFWESftEX9wQSf2QmNhdE365JaB1ty2ZeNfXGUl9EWzt7jAXAKD9IEwAAMYIEwCAMcIEAGCMMAEAGCNMAADGCBMAgDHCBABgjDABABgjTAAAxggTAICxDj2fCYCOzWSAScneIJMdBWECoMPqHBMV8ACT0o1BJhk+8gYOcwEAjBEmAABjhAkAwBhhAgAwRpgAAIwRJgAAY4QJAMAYYQIAMEaYAACMESYAAGOECQDAGGECADBGmAAAjDFqMNDGmAyLzpDoCBfCBAgC03kyAh0WnSHRES6ECRAEJvNkbFsysZWrAYKPcyYAAGOECQDAGIe5APhx8h+BIkwA+Jme6+Hkf8fVJg5z/e1vf9O3v/1tTZw4UVlZWSovL5cknThxQtnZ2crIyFB2drY+/vhj/zottQEAQivsYWJZln7961+ruLhYW7ZsUXFxsXJzc+Xz+ZSfn6+cnByVlZUpJydHeXl5/vVaagMAhFbYw0SSnE6nLl68sYN88eJFJSUl6dy5c6qsrFRmZqYkKTMzU5WVlWpoaJDH42m2DQAQemE/Z+JwOPTqq6/qmWeeUdeuXXXp0iWtWbNGNTU1Sk5OlsvlkiS5XC4lJSWppqZGlmU12xYfHx/OlwMAHVLYw+T69etavXq1Vq1apWHDhun999/XrFmzVFxcHPRtJyR0C/o2QikxsXu4S2gzOnJffPa1h7ofwtXvdrYbrNoi8b0WjJrDHiaHDx9WbW2thg0bJkkaNmyYunTpopiYGJ09e1Zer1cul0ter1e1tbVyu92yLKvZtnvh8Xwqn88KxssKucTE7qqr41oaqW30RTg/YG6+9kD6wbTuQPs92Nttri9a4/cU6GsO12XYJn8fTqej2S/hYQ+Tnj176syZMzp+/Lj69eunY8eOyePx6P7771daWppKSko0ceJElZSUKC0tzX8Yq6U2AGjr2ttl2GEPk8TERBUUFGjmzJlyOBySpIULFyouLk4FBQWaM2eOVq1apdjYWBUVFfnXa6kNABBaYQ8TScrKylJWVtbnlvfv31+bNm264zottQEAQqtNXBoMAIhshAkAwBhhAgAwRpgAAIwRJgAAY23iai4AgH2NTd6Ab7hsbPK2cjU3ECYAEGGiO7mMbngMBg5zAQCMESYAAGMc5gIQ0eyeP4jE0X0jCWECIKK1xfMHHRGHuQAAxggTAIAxwgQAYIwwAQAY4wQ8gFZhclc2Ih9hAqBVdMSrqgjQWwgTAAhQRwzQ5hAmQDvy/9+U+daMUCFMgHbE5Juy1P6+LSN0uJoLAGCMMAEAGCNMAADGCBMAgDHCBABgjDABABgjTAAAxmyHyY4dO3T9+vVg1gIAiFC2w2TZsmUaNWqU5s+frwMHDgSzJgBAhLEdJlu3btX69esVExOjX/ziF8rIyNCqVav03//+N5j1AQAiwD2dMxk4cKByc3O1a9cu5efnq7S0VGPHjtUTTzyhrVu3yufzBatOAEAbds9jc506dUpbt27V1q1b5XA4NGPGDLndbm3YsEHl5eVasWJFMOoEALRhtsNkw4YN2rJli06ePKnx48eruLhYQ4YM8bdnZGToG9/4RjBqBAC0cbbDpKKiQj/+8Y81evRoRUdHf669S5cuWr58eUBFXLt2TQsXLtS7776rmJgYDRkyRIWFhTpx4oTmzJmj8+fPKy4uTkVFRerbt68ktdgGAAgt22GybNkyOZ1OderUyb+sqalJlmX5w2XUqFEBFbFo0SLFxMSorKxMDodD9fX1kqT8/Hzl5ORo4sSJ2rJli/Ly8vSnP/3prm2AJHWP7aLOMYHPsnD12nVdvHClFSsC2i/bf2lTp07V7Nmzbzu09eGHH2rJkiV67bXXAi7g0qVL2rx5s3bt2iWHwyFJ+uIXvyiPx6PKykqtW7dOkpSZmanCwkI1NDTIsqxm2+Lj4wOuBe1L55go47k9LrZiPUB7ZjtMjhw5osGDB9+27Gtf+5qqqqqMCjh9+rTi4uK0YsUK7dmzR1/4whc0c+ZMde7cWcnJyXK5XJIkl8ulpKQk1dTUyLKsZtsIEwAIPdthEhsbq/r6eiUmJvqX1dfXq0uXLkYFeL1enT59Wg888IByc3N14MAB/fSnP9XSpUuNfq4dCQndgr6NUGKK1ltaqy/oU7RHwXhf2w6TRx55RL/85S/1m9/8Rn369NGpU6f00ksvafz48UYFuN1uRUVFKTMzU5I0ePBg9ejRQ507d9bZs2fl9Xrlcrnk9XpVW1srt9sty7KabbsXHs+n8vkso/rbisTE7qqr46CMdKsvWuMPJtA+JYTQlgX6vnY6Hc1+Cbd90+Jzzz2n/v37a9KkSRo6dKiys7P1pS99Sc8//3xARd0UHx+vkSNH6p133pF04yotj8ejvn37Ki0tTSUlJZKkkpISpaWlKT4+XgkJCc22AQBCz/aeSUxMjPLz85WXl6dz586pR48e/hPmpubNm6e5c+eqqKhIUVFRKi4uVmxsrAoKCjRnzhytWrVKsbGxKioq8q/TUhsAILTu6brJixcv6sSJE7p06dJtyx988EGjIvr06XPHK8L69++vTZs23XGdltoAAKFlO0z+8pe/aP78+eratas6d+7sX+5wOPT2228HpTgAQGSwHSavvPKKli5dqm9+85vBrCcimNwMx41wANoj25+IXq834Dvc2xuTm+G4EQ5Ae2T7aq5p06bpd7/7HcPMAwA+x/aeyfr161VfX6+1a9cqLi7utra///3vrVwWACCS2A6TRYsWBbMOAEAEsx0mX//614NZBwAggtk+Z9LY2KhXXnlFo0eP1rBhwyRJu3fv1uuvvx604gAAkcF2mCxcuFBHjx7V4sWL/Xe+DxgwQBs3bgxacQCAyGD7MNeOHTtUXl6url27yum8kUHJyck6e/Zs0IoDAEQG23smnTp1ktfrvW1ZQ0PD567sAgB0PLbDZNy4ccrNzdXp06clSbW1tZo/f74ee+yxoBUHAIgM9zQEfe/evZWVlaULFy4oIyNDSUlJ+vnPfx7M+gAAEcD2OZPo6GjNnTtXc+fOVUNDQ6sOQQ8AiGy2w+Tm4a2bPjsMfZ8+fVqvIgBAxLEdJmPHjpXD4ZBl3Zrm9uaeyeHDh1u/MgBAxLAdJlVVVbc9rqur04oVKzR8+PBWLwoAEFlsn4D/f4mJiXrhhRf08ssvt2Y9AIAIFHCYSNLx48d15QoTPQFAR2f7MFdOTs5tV29duXJFH330EZcGI6gCndUyMbF7EKoB0Bzbf6WTJk267XGXLl00cOBA9e3bt7VrAvxMZ7UEEBq2w+Txxx8PZh0AgAhmO0yWLl1q63kzZ84MuBgAQGSyHSYnT55UeXm5Bg0apJSUFFVXV+vf//63HnnkEcXExASzRgBAG2c7TCzL0pIlS5SRkeFfVl5ertLSUv32t78NSnEAgMhg+9LgiooKjRkz5rZl6enp2rVrV6sXBQCILLbD5P7779eGDRtuW7Zx40bdd999rV4UACCy2D7MtWDBAj377LNau3atf4bFqKgoLV++PJj1AQAigO0weeCBB1RWVqYDBw6otrZWiYmJGjJkiDp16hTM+gAAESDg4VRGjBihpqYmXb58uTXrAQBEINt7JkeOHNHPfvYzRUdH6+zZs3r00Ue1d+9evfnmm3r11VeDWCIAoK2zvWdSUFCgGTNmqLS0VFFRNzJoxIgRev/994NWHAAgMtgOk48++kgTJ94Y6+jmgI9du3bVtWvXglMZACBi2A6TlJQUHTp06LZlBw8e5NJgAID9MJk5c6aefvppLVu2TE1NTVq9erVmzpypWbNmtVoxK1asUGpqqo4ePSpJ2r9/v7KyspSRkaGpU6fK4/H4n9tSGwAgtGyHycMPP6y1a9eqoaFBI0aM0CeffKLly5dr1KhRrVLIhx9+qP379yslJUWS5PP5NHv2bOXl5amsrEzDhw/X4sWL79oGAAg9W2Hi9Xo1ZswYffnLX1ZBQYHWrFmj+fPna9CgQa1SRGNjo+bPn6+CggL/skOHDikmJsY/x/zkyZNVWlp61zYAQOjZujTY5XLJ5XLp2rVrio6ObvUili5dqqysLPXu3du/rKamRr169fI/jo+Pl8/n0/nz51tsi4uLs73dhIRurVL/vQrWLIDMLtj66FO0R8F4X9u+z+QHP/iBZs2apaefflo9e/a8bQrfPn36BFzABx98oEOHDulXv/pVwD8jUB7Pp/L5rHtez/QXUVd30Wj9O0lM7B6Unxtu4fwwb2zyKrqTK2zbB4Il0M8Kp9PR7Jfwu4ZJXV2dEhMTVVhYKEn65z//Kcu69QHscDh0+PDhgAqTpL179+rYsWMaPXq0JOnMmTP6yU9+oilTpqi6utr/vIaGBjmdTsXFxcntdjfbBrSW6E4upgwGbLrrOZOb85dUVVWpqqpK6enp/v9XVVUZBYkkTZ8+Xbt379bOnTu1c+dO9ezZU3/4wx/01FNP6erVq9q3b58k6Y033tC4ceMkSYMGDWq2DQAQenfdM/nsXoh0Y08iFJxOp4qLi5Wfn69r164pJSVFixYtumsbACD07homnz03In0+XFrbzp07/f8fOnSotm3bdsfntdQGAAitu4aJ1+vVe++95w+R/38sSQ8++GDwKgQAtHl3DZOEhATNnTvX/zguLu62xw6HQ2+//XZwqgMARIS7hslnDzsBAHAnAU+OBQDATYQJAMAYYQIAMEaYAACMESYAAGOECQDAGGECADBGmAAAjBEmAABjhAkAwBhhAgAwZnvaXrSOxiZvwFPRXr12XRcvXGnligDAHGESYqZTwba/Wd4BtAcc5gIAGCNMAADGCBMAgDHCBABgjDABABgjTAAAxggTAIAxwgQAYIwwAQAYI0wAAMYYTgVB1z22izrH8FYD2jP+whF0nWOijMYjA9D2cZgLAGCMMAEAGOMwVwS521wod5snhflQAAQLYRJBTOZCkZgPBUDwcJgLAGAs7GFy7tw5TZs2TRkZGZowYYKeffZZNTQ0SJL279+vrKwsZWRkaOrUqfJ4PP71WmoDAIRW2MPE4XDoqaeeUllZmbZt26Y+ffpo8eLF8vl8mj17tvLy8lRWVqbhw4dr8eLFktRiGwAg9MIeJnFxcRo5cqT/8ZAhQ1RdXa1Dhw4pJiZGw4cPlyRNnjxZpaWlktRiGwAg9NrUCXifz6eNGzcqPT1dNTU16tWrl78tPj5ePp9P58+fb7EtLi7O9vYSErq1Zvlt3t2uBrvbutGdXK1cEYBwCPRzoCVtKkwKCwvVtWtXPfnkk/rrX/8a9O15PJ/K57Pueb1g/CJCweRqsG1LJqquLrBrwSK1v4D2KtC/ZafT0eyX8DYTJkVFRTp58qR+//vfy+l0yu12q7q62t/e0NAgp9OpuLi4FtsAAKEX9nMmkvTyyy/r0KFDWrlypaKjoyVJgwYN0tWrV7Vv3z5J0htvvKFx48bdtQ0AEHph3zP5z3/+o9WrV6tv376aPHmyJKl3795auXKliouLlZ+fr2vXriklJUWLFi2SJDmdzmbbAAChF/YwGTBggI4cOXLHtqFDh2rbtm333AYACK02cZgLABDZCBMAgDHCBABgjDABABgjTAAAxggTAIAxwgQAYIwwAQAYC/tNi4gMJiMOA2j/CBPYYjriMID2jcNcAABjhAkAwBhhAgAwRpgAAIwRJgAAY4QJAMAYYQIAMEaYAACMESYAAGOECQDAGGECADBGmAAAjBEmAABjhAkAwBhhAgAwRpgAAIwRJgAAY4QJAMAYYQIAMEaYAACMESYAAGOECQDAGGECADAW0WFy4sQJZWdnKyMjQ9nZ2fr444/DXRIAdEgRHSb5+fnKyclRWVmZcnJylJeXF+6SAKBDigp3AYHyeDyqrKzUunXrJEmZmZkqLCxUQ0OD4uPjbf0Mp9MR8PaTenSJuHXDue1IXDec2+Y1R8a64dy2ybqBfva1tJ7Dsiwr0ILC6dChQ8rNzdX27dv9yx599FEtWrRIX/nKV8JYGQB0PBF9mAsA0DZEbJi43W6dPXtWXq9XkuT1elVbWyu32x3mygCg44nYMElISFBaWppKSkokSSUlJUpLS7N9vgQA0Hoi9pyJJB07dkxz5szRhQsXFBsbq6KiIvXr1y/cZQFAhxPRYQIAaBsi9jAXAKDtIEwAAMYIEwCAMcIEAGCMMIkgdga2XLlypR577DFNmDBB3/nOd/SPf/wj9IWGwL0M8nn8+HENHjxYRUVFoSswhOz2xVtvvaUJEyYoMzNTEyZMUH19fWgLDTI7/eDxeDR9+nRNmDBB48ePV0FBga5fvx76YoOsqKhI6enpSk1N1dGjR+/4HK/Xq3nz5mnMmDEaO3asNm3aZLZRCxFjypQp1ubNmy3LsqzNmzdbU6ZM+dxzKioqrMuXL1uWZVmHDx+2hg0bZl25ciWkdYaCnb6wLMu6fv269eSTT1rPP/+89dJLL4WyxJCx0xcHDx60xo8fb9XW1lqWZVkXLlywrl69GtI6g81OPyxYsMD/PmhsbLS+973vWdu3bw9pnaGwd+9eq7q62nr44YetI0eO3PE5b775pjV16lTL6/VaHo/Heuihh6zTp08HvE32TCLEzYEtMzMzJd0Y2LKyslINDQ23Pe+hhx5Sly43BoBLTU2VZVk6f/58qMsNKrt9IUlr1qzRt771LfXt2zfEVYaG3b5Yv369pk6dqsTERElS9+7dFRMTE/J6g8VuPzgcDl26dEk+n0+NjY1qampScnJyOEoOquHDh991NJC33npLkyZNktPpVHx8vMaMGaPS0tKAt0mYRIiamholJyfL5XJJklwul5KSklRTU9PsOps3b9Z9992nnj17hqrMkLDbF1VVVdq9e7d+9KMfhaHK0LDbF8eOHdPp06f1xBNP6PHHH9eqVatktaNbzOz2wzPPPKMTJ05o1KhR/n/Dhg0LR8lhV1NTo169evkfu91unTlzJuCfR5i0U//617+0dOlSLVmyJNylhEVTU5NefPFFzZs3z/8B05F5vV4dOXJE69at02uvvaaKigpt2bIl3GWFXGlpqVJTU7V7925VVFRo3759Rt/GcQthEiHuZWDLDz74QLNnz9bKlSvb5fAydvqirq5Op06d0vTp05Wenq4//vGP+vOf/6wXX3wxXGUHhd33Ra9evTRu3DhFR0erW7duGj16tA4ePBiOkoPCbj+8/vrrysrKktPpVPfu3ZWenq49e/aEo+Swc7vdqq6u9j+uqakxOopBmEQIuwNbHjx4UM8995yWLVvWbud1sdMXvXr10p49e7Rz507t3LlTP/zhD/X9739fhYWF4So7KOy+LzIzM7V7925ZlqWmpia99957GjhwYDhKDgq7/dC7d29VVFRIkhobG/Xuu+9qwIABIa+3LRg3bpw2bdokn8+nhoYG7dixQxkZGQH/PMbmiiDNDWw5bdo0zZgxQ1/96lf13e9+V5988sltJxWLi4uVmpoaxspbn52++Kzly5fr8uXLys3NDVPFwWOnL3w+n4qKilRRUSGn06lRo0YpNzdXTmf7+T5ppx9OnTql/Px81dfXy+v1auTIkXrhhRcUFRWxk87e0YIFC1ReXq76+nr16NFDcXFx2r59+2194fV6NX/+fL3zzjuSpGnTpik7OzvgbRImAABj7edrCQAgbAgTAIAxwgQAYIwwAQAYI0wAAMYIEwCAMcIEAGCMMAEAGPsfwKI/5gABcAUAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "df_hr['S'].plot.hist(bins=20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2e7d2e0f-b5dd-4d01-a159-64bd49d06e28",
   "metadata": {},
   "outputs": [],
   "source": [
    "y = df_hr.pop('left')\n",
    "X = df_hr.copy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "66b02a04-fd6c-44e5-8803-e25be209c3d7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>S</th>\n",
       "      <th>LPE</th>\n",
       "      <th>NP</th>\n",
       "      <th>ANH</th>\n",
       "      <th>TIC</th>\n",
       "      <th>Newborn</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.38</td>\n",
       "      <td>0.53</td>\n",
       "      <td>2</td>\n",
       "      <td>157</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.80</td>\n",
       "      <td>0.86</td>\n",
       "      <td>5</td>\n",
       "      <td>262</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.11</td>\n",
       "      <td>0.88</td>\n",
       "      <td>7</td>\n",
       "      <td>272</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.72</td>\n",
       "      <td>0.87</td>\n",
       "      <td>5</td>\n",
       "      <td>223</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.37</td>\n",
       "      <td>0.52</td>\n",
       "      <td>2</td>\n",
       "      <td>159</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11995</th>\n",
       "      <td>0.90</td>\n",
       "      <td>0.55</td>\n",
       "      <td>3</td>\n",
       "      <td>259</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11996</th>\n",
       "      <td>0.74</td>\n",
       "      <td>0.95</td>\n",
       "      <td>5</td>\n",
       "      <td>266</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11997</th>\n",
       "      <td>0.85</td>\n",
       "      <td>0.54</td>\n",
       "      <td>3</td>\n",
       "      <td>185</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11998</th>\n",
       "      <td>0.33</td>\n",
       "      <td>0.65</td>\n",
       "      <td>3</td>\n",
       "      <td>172</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11999</th>\n",
       "      <td>0.50</td>\n",
       "      <td>0.73</td>\n",
       "      <td>4</td>\n",
       "      <td>180</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>12000 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          S   LPE  NP  ANH  TIC  Newborn\n",
       "0      0.38  0.53   2  157    3        0\n",
       "1      0.80  0.86   5  262    6        0\n",
       "2      0.11  0.88   7  272    4        0\n",
       "3      0.72  0.87   5  223    5        0\n",
       "4      0.37  0.52   2  159    3        0\n",
       "...     ...   ...  ..  ...  ...      ...\n",
       "11995  0.90  0.55   3  259    2        1\n",
       "11996  0.74  0.95   5  266    4        0\n",
       "11997  0.85  0.54   3  185    3        0\n",
       "11998  0.33  0.65   3  172    5        0\n",
       "11999  0.50  0.73   4  180    3        0\n",
       "\n",
       "[12000 rows x 6 columns]"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "59f26d5b-6e3e-4fd4-b4a1-a065d06989a2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0        1\n",
       "1        1\n",
       "2        1\n",
       "3        1\n",
       "4        1\n",
       "        ..\n",
       "11995    0\n",
       "11996    0\n",
       "11997    0\n",
       "11998    0\n",
       "11999    0\n",
       "Name: left, Length: 12000, dtype: int64"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "25f796be-6d89-4a65-a279-f23c79542383",
   "metadata": {},
   "outputs": [],
   "source": [
    "X = sm.add_constant(X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "01477663-857c-4f90-b814-635cbb1bffb5",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Optimization terminated successfully.\n",
      "         Current function value: 0.354538\n",
      "         Iterations 7\n"
     ]
    }
   ],
   "source": [
    "model_hr = sm.Logit(y, X).fit()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9f327cbc-aa9e-4966-9b8b-a5f92f387168",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"simpletable\">\n",
       "<caption>Logit Regression Results</caption>\n",
       "<tr>\n",
       "  <th>Dep. Variable:</th>         <td>left</td>       <th>  No. Observations:  </th>  <td> 12000</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Model:</th>                 <td>Logit</td>      <th>  Df Residuals:      </th>  <td> 11993</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Method:</th>                 <td>MLE</td>       <th>  Df Model:          </th>  <td>     6</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Date:</th>            <td>Fri, 27 May 2022</td> <th>  Pseudo R-squ.:     </th>  <td>0.2131</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Time:</th>                <td>08:04:49</td>     <th>  Log-Likelihood:    </th> <td> -4254.5</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>converged:</th>             <td>True</td>       <th>  LL-Null:           </th> <td> -5406.7</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Covariance Type:</th>     <td>nonrobust</td>    <th>  LLR p-value:       </th>  <td> 0.000</td> \n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "     <td></td>        <th>coef</th>     <th>std err</th>      <th>z</th>      <th>P>|z|</th>  <th>[0.025</th>    <th>0.975]</th>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>const</th>   <td>   -1.2412</td> <td>    0.160</td> <td>   -7.751</td> <td> 0.000</td> <td>   -1.555</td> <td>   -0.927</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>S</th>       <td>   -3.8163</td> <td>    0.121</td> <td>  -31.607</td> <td> 0.000</td> <td>   -4.053</td> <td>   -3.580</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>LPE</th>     <td>    0.5044</td> <td>    0.181</td> <td>    2.788</td> <td> 0.005</td> <td>    0.150</td> <td>    0.859</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>NP</th>      <td>   -0.3592</td> <td>    0.026</td> <td>  -13.569</td> <td> 0.000</td> <td>   -0.411</td> <td>   -0.307</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>ANH</th>     <td>    0.0038</td> <td>    0.001</td> <td>    6.067</td> <td> 0.000</td> <td>    0.003</td> <td>    0.005</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>TIC</th>     <td>    0.6188</td> <td>    0.027</td> <td>   22.820</td> <td> 0.000</td> <td>    0.566</td> <td>    0.672</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Newborn</th> <td>   -1.4851</td> <td>    0.113</td> <td>  -13.157</td> <td> 0.000</td> <td>   -1.706</td> <td>   -1.264</td>\n",
       "</tr>\n",
       "</table>"
      ],
      "text/plain": [
       "<class 'statsmodels.iolib.summary.Summary'>\n",
       "\"\"\"\n",
       "                           Logit Regression Results                           \n",
       "==============================================================================\n",
       "Dep. Variable:                   left   No. Observations:                12000\n",
       "Model:                          Logit   Df Residuals:                    11993\n",
       "Method:                           MLE   Df Model:                            6\n",
       "Date:                Fri, 27 May 2022   Pseudo R-squ.:                  0.2131\n",
       "Time:                        08:04:49   Log-Likelihood:                -4254.5\n",
       "converged:                       True   LL-Null:                       -5406.7\n",
       "Covariance Type:            nonrobust   LLR p-value:                     0.000\n",
       "==============================================================================\n",
       "                 coef    std err          z      P>|z|      [0.025      0.975]\n",
       "------------------------------------------------------------------------------\n",
       "const         -1.2412      0.160     -7.751      0.000      -1.555      -0.927\n",
       "S             -3.8163      0.121    -31.607      0.000      -4.053      -3.580\n",
       "LPE            0.5044      0.181      2.788      0.005       0.150       0.859\n",
       "NP            -0.3592      0.026    -13.569      0.000      -0.411      -0.307\n",
       "ANH            0.0038      0.001      6.067      0.000       0.003       0.005\n",
       "TIC            0.6188      0.027     22.820      0.000       0.566       0.672\n",
       "Newborn       -1.4851      0.113    -13.157      0.000      -1.706      -1.264\n",
       "==============================================================================\n",
       "\"\"\""
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_hr.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8046b296-fed4-47da-90f4-272f3593f146",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\u001b[0;31mSignature:\u001b[0m \u001b[0mmodel_hr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexog\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtransform\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
       "\u001b[0;31mDocstring:\u001b[0m\n",
       "Call self.model.predict with self.params as the first argument.\n",
       "\n",
       "Parameters\n",
       "----------\n",
       "exog : array_like, optional\n",
       "    The values for which you want to predict. see Notes below.\n",
       "transform : bool, optional\n",
       "    If the model was fit via a formula, do you want to pass\n",
       "    exog through the formula. Default is True. E.g., if you fit\n",
       "    a model y ~ log(x1) + log(x2), and transform is True, then\n",
       "    you can pass a data structure that contains x1 and x2 in\n",
       "    their original form. Otherwise, you'd need to log the data\n",
       "    first.\n",
       "*args\n",
       "    Additional arguments to pass to the model, see the\n",
       "    predict method of the model for the details.\n",
       "**kwargs\n",
       "    Additional keywords arguments to pass to the model, see the\n",
       "    predict method of the model for the details.\n",
       "\n",
       "Returns\n",
       "-------\n",
       "array_like\n",
       "    See self.model.predict.\n",
       "\n",
       "Notes\n",
       "-----\n",
       "The types of exog that are supported depends on whether a formula\n",
       "was used in the specification of the model.\n",
       "\n",
       "If a formula was used, then exog is processed in the same way as\n",
       "the original data. This transformation needs to have key access to the\n",
       "same variable names, and can be a pandas DataFrame or a dict like\n",
       "object that contains numpy arrays.\n",
       "\n",
       "If no formula was used, then the provided exog needs to have the\n",
       "same number of columns as the original exog in the model. No\n",
       "transformation of the data is performed except converting it to\n",
       "a numpy array.\n",
       "\n",
       "Row indices as in pandas data frames are supported, and added to the\n",
       "returned prediction.\n",
       "\u001b[0;31mFile:\u001b[0m      /opt/anaconda/envs/aiking/lib/python3.9/site-packages/statsmodels/base/model.py\n",
       "\u001b[0;31mType:\u001b[0m      method\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "model_hr.predict?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "177f018a-d746-4fb8-b1dc-83f71699ef6d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "7.641666666666667"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cutoff = 0.5\n",
    "(model_hr.predict(X) > cutoff).sum()*100/len(y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5cc0ea1a-eae5-4979-af3d-c5b5e511e7fe",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>left</th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>row_0</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>False</th>\n",
       "      <td>9464</td>\n",
       "      <td>1619</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>True</th>\n",
       "      <td>536</td>\n",
       "      <td>381</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "left      0     1\n",
       "row_0            \n",
       "False  9464  1619\n",
       "True    536   381"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.crosstab(model_hr.predict(X) >cutoff, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b3563e23-3c27-410d-8a72-81e32b2a60ce",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(0.9464, 0.1905)"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "9464/(9464+536), 381/(1619+381)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "55ab4840-1389-491a-aac8-7318dfffbaad",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9235833333333333"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(9464+1619)/12000"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "64ca6dff-f5c6-4d1f-bfa0-64b40d55d091",
   "metadata": {},
   "outputs": [],
   "source": [
    "accuracy = (9464+381)/(9464+381+526+1619); accuracy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "47df970f-6673-41e3-bf10-a06dbbbcfa55",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "S         -31.606505\n",
       "TIC        22.820109\n",
       "NP        -13.569440\n",
       "Newborn   -13.156788\n",
       "const      -7.751316\n",
       "ANH         6.067180\n",
       "LPE         2.788130\n",
       "dtype: float64"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_hr.tvalues[model_hr.tvalues[model_hr.pvalues <= 0.05].abs().sort_values(ascending=False).index]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d474039f-918e-4dcb-869a-782ebdef665d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>S</th>\n",
       "      <th>LPE</th>\n",
       "      <th>NP</th>\n",
       "      <th>ANH</th>\n",
       "      <th>TIC</th>\n",
       "      <th>Newborn</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.38</td>\n",
       "      <td>0.53</td>\n",
       "      <td>2</td>\n",
       "      <td>157</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.80</td>\n",
       "      <td>0.86</td>\n",
       "      <td>5</td>\n",
       "      <td>262</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.11</td>\n",
       "      <td>0.88</td>\n",
       "      <td>7</td>\n",
       "      <td>272</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.72</td>\n",
       "      <td>0.87</td>\n",
       "      <td>5</td>\n",
       "      <td>223</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.37</td>\n",
       "      <td>0.52</td>\n",
       "      <td>2</td>\n",
       "      <td>159</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11995</th>\n",
       "      <td>0.90</td>\n",
       "      <td>0.55</td>\n",
       "      <td>3</td>\n",
       "      <td>259</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11996</th>\n",
       "      <td>0.74</td>\n",
       "      <td>0.95</td>\n",
       "      <td>5</td>\n",
       "      <td>266</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11997</th>\n",
       "      <td>0.85</td>\n",
       "      <td>0.54</td>\n",
       "      <td>3</td>\n",
       "      <td>185</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11998</th>\n",
       "      <td>0.33</td>\n",
       "      <td>0.65</td>\n",
       "      <td>3</td>\n",
       "      <td>172</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11999</th>\n",
       "      <td>0.50</td>\n",
       "      <td>0.73</td>\n",
       "      <td>4</td>\n",
       "      <td>180</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>12000 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          S   LPE  NP  ANH  TIC  Newborn\n",
       "0      0.38  0.53   2  157    3        0\n",
       "1      0.80  0.86   5  262    6        0\n",
       "2      0.11  0.88   7  272    4        0\n",
       "3      0.72  0.87   5  223    5        0\n",
       "4      0.37  0.52   2  159    3        0\n",
       "...     ...   ...  ..  ...  ...      ...\n",
       "11995  0.90  0.55   3  259    2        1\n",
       "11996  0.74  0.95   5  266    4        0\n",
       "11997  0.85  0.54   3  185    3        0\n",
       "11998  0.33  0.65   3  172    5        0\n",
       "11999  0.50  0.73   4  180    3        0\n",
       "\n",
       "[12000 rows x 6 columns]"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "98ada868-95bb-4351-9c35-4f2678e89f76",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>S</th>\n",
       "      <th>LPE</th>\n",
       "      <th>NP</th>\n",
       "      <th>ANH</th>\n",
       "      <th>TIC</th>\n",
       "      <th>Newborn</th>\n",
       "      <th>left</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.38</td>\n",
       "      <td>0.53</td>\n",
       "      <td>2</td>\n",
       "      <td>157</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.80</td>\n",
       "      <td>0.86</td>\n",
       "      <td>5</td>\n",
       "      <td>262</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.11</td>\n",
       "      <td>0.88</td>\n",
       "      <td>7</td>\n",
       "      <td>272</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.72</td>\n",
       "      <td>0.87</td>\n",
       "      <td>5</td>\n",
       "      <td>223</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.37</td>\n",
       "      <td>0.52</td>\n",
       "      <td>2</td>\n",
       "      <td>159</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      S   LPE  NP  ANH  TIC  Newborn  left\n",
       "0  0.38  0.53   2  157    3        0     1\n",
       "1  0.80  0.86   5  262    6        0     1\n",
       "2  0.11  0.88   7  272    4        0     1\n",
       "3  0.72  0.87   5  223    5        0     1\n",
       "4  0.37  0.52   2  159    3        0     1"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_hr = pd.read_csv(\"DATA_3.02_HR2.csv\"); df_hr.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fac951c6-1207-4d7a-95c1-90506764e53f",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "*c* argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with *x* & *y*.  Please use the *color* keyword-argument or provide a 2D array with a single row if you intend to specify the same RGB or RGBA value for all points.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<AxesSubplot:xlabel='Time in Company', ylabel='Attrition'>"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYkAAAEQCAYAAABFtIg2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8/fFQqAAAACXBIWXMAAAsTAAALEwEAmpwYAAApxklEQVR4nO3dfVRUdeI/8PfceeBBQGAccFAUJdPx+WnVIrU0RXMQslxccvvtqrg9rHksSzaPgLLuhu2axWq1Wq5GteUpNUnT0rXEFLN8SsRcBUEdHgQNERS4c39/+HXWCS/O4MydUd6vczoHZj7cz9tphjdz753PVUmSJIGIiOgmBE8HICIi78WSICIiWSwJIiKSxZIgIiJZLAkiIpLFkiAiIlmKlURhYSESExMRGxuLxMREFBUV3XTc5s2bERcXB7PZjLi4OJw/f16piERE9AsqpT4n8eSTT+Kxxx5DfHw8Nm7ciE8++QRr1661G3PkyBHMmzcPa9asgcFgwKVLl6DT6eDj46NERCIi+gVF3klUVlYiPz8fZrMZAGA2m5Gfn4+qqiq7cf/6178wbdo0GAwGAEBgYCALgojIgxQpCYvFgvDwcKjVagCAWq1GWFgYLBaL3biTJ0+ipKQETzzxBB599FGsWLEC/EA4EZHnaDwd4EaiKOL48eNYvXo16uvrMWPGDERERCAhIcHT0YiIWiVFSsJoNKKsrAyiKEKtVkMURZSXl8NoNNqNi4iIwLhx46DT6aDT6TB69GgcPnzYqZK4cOEyrFbn333o9QGorKxx+ufcjbmcw1zO89ZszOWcluYSBBVCQtrI3q9ISej1ephMJuTk5CA+Ph45OTkwmUwIDQ21G2c2m/H1118jPj4ejY2N2Lt3L2JjY52ay2qVWlQS13/WGzGXc5jLed6ajbmc445cip0Cm56ejuzsbMTGxiI7OxsLFy4EACQnJ+PIkSMAgAkTJkCv1+ORRx5BQkIC7rnnHjz++ONKRSQiol9Q7BRYpVRW1rSoTQ2GQFRUXHJDotvDXM5hLud5azbmck5LcwmCCnp9gPz9txOKiIjubiwJIiKSxZIgIiJZLAkiIpLFkiAiIlksCSIiksWSICIiWSwJIiKSxZIgIiJZLAkiIpLFkiAiIlksCSIiksWSICIiWSwJIiKSxZIgIiJZLAkiIpLFkiAiIlksCSIiksWSICIiWSwJIiKSxZIgIiJZLAkiIpLFkiAiIlksCSIiksWSICIiWSwJIiKSxZIgIiJZGqUmKiwsREpKCi5evIjg4GBkZmYiKirKbkxWVhY++OADhIWFAQAGDhyItLQ0pSISEdEvKFYSaWlpSEpKQnx8PDZu3IjU1FSsXbu2ybiEhATMmzdPqVhERNQMRXY3VVZWIj8/H2azGQBgNpuRn5+PqqoqJaYnIqIWUqQkLBYLwsPDoVarAQBqtRphYWGwWCxNxn7++eeIi4vDtGnTcODAASXiERGRDMV2NzliypQpeOqpp6DVarF7924888wz2Lx5M0JCQhzehl4f0OL5DYbAFv+sOzGXc5jLed6ajbmc445cipSE0WhEWVkZRFGEWq2GKIooLy+H0Wi0G2cwGGxfx8TEwGg04sSJExgyZIjDc1VW1sBqlZzOaDAEoqLiktM/527M5Rzmcp63ZmMu57Q0lyComv3jWpHdTXq9HiaTCTk5OQCAnJwcmEwmhIaG2o0rKyuzfX3s2DGcPXsWXbp0USIiERHdhGK7m9LT05GSkoIVK1YgKCgImZmZAIDk5GQ899xz6NOnD5YuXYqjR49CEARotVosWbLE7t0FEREpS7GSiI6Oxrp165rcvnLlStvX14uDiIi8Az9xTUREslgSREQkiyVBRESyvOpzEkTUulklCVXVV3C1wYpGlQCV1Qq1wL9lPYklQUQedbVexJ6jpdh58CzOna+FIACCSgUAaGi0IizEDw/0MWJ4vwgE+Gk9nLb1YUkQkUdIkoSdB87i4//8F4AKVxvEa3eI9uMslbXYmFuIDbmFGDekE+JioqBR892FUlgSRKS42iuNWLbuEIrLL6G+wXrL8fWN18Zs/a4Y3xWUY+6U/ggN8nV3TAIPXBORwuquNuIv7+1HUWm1QwVxo/oGK8ov1GHRmv24cOmqmxLSjVgSRKSolZvyUX6xDo2i82usAdcObtfUNeC1jw+2aJ02cg5LgogU8/3xCuSfrmpxQVxntUoov1iHrfuKXZSM5LAkiEgRkiTh39tPOL2LSU59gxWf7S5CfYN468HUYiwJIlLETyUXUVPX4PLt7jtW7vJt0v+wJIhIEd8fr3D5X/1XG0Tk5Ze6dJtkjyVBRIr46cxFuOMw8+myGjdsla5jSRCRIs7/fMUt271c14BG0TXHOagplgQRKUKS3HO6qkoFngrrRiwJIlKETqN205ZV0Gj4q8xd+MgSkSIiwwLcsl1DsK9tQUByPZYEESnC1DkEGrVrf5mrVEC3yGCXbpPssSSISBHDerWHCq4tCa1GwIP9O7h0m2SPJUFEiggJ9IEpKgSC4Lqi0Af5oosx0GXbo6ZYEkSkmN+O7e6yXU5ajYAZ5p5Q8XiEW7EkiEgx+ra+eGLMvdDd5tlIOq2AsYMj0cUY5KJkJIclQUSKGt43Aub7O7e4KHRaAUN6hGPSyK4uTkY3wyvTEZHizPd3gSHYD2u+OI6GRitEBz4Mp1IBWrWAR4d3xdhfRXI3k0JYEkTkEUN7tkePTiFYt/Mkvisoh6C64TrXN9BpBFgloHeXUCSOugfhof4eSNt6sSSIyGPaBvhghrknkh6+F4f+ex4nzvyMwtJq1DeI8PXRIELvj24dg9HvnnZo20bn6bitEkuCiDzO31eD+3q3x32929tuMxgCUVFxyYOpCFDwwHVhYSESExMRGxuLxMREFBUVyY49deoU+vXrh8zMTKXiERHRTShWEmlpaUhKSsLWrVuRlJSE1NTUm44TRRFpaWl4+OGHlYpGREQyFCmJyspK5Ofnw2w2AwDMZjPy8/NRVVXVZOw///lPPPjgg4iKilIiGhERNUORYxIWiwXh4eFQq68tFaxWqxEWFgaLxYLQ0FDbuIKCAuTm5mLt2rVYsWJFi+bS61u+0qTB4J0f72cu5zCX87w1G3M5xx25vObAdUNDAxYsWIC//vWvtjJpicrKmhZdgMRbD5Ixl3OYy3nemo25nNPSXIKgavaPa0VKwmg0oqysDKIoQq1WQxRFlJeXw2g02sZUVFSguLgYM2fOBABUV1dDkiTU1NQgIyNDiZhERPQLipSEXq+HyWRCTk4O4uPjkZOTA5PJZLerKSIiAnl5ebbvs7KyUFtbi3nz5ikRkYiIbkKxs5vS09ORnZ2N2NhYZGdnY+HChQCA5ORkHDlyRKkYRETkBMWOSURHR2PdunVNbl+5cuVNx8+aNcvdkYiI6Ba4CiwREcliSRARkSyWBBERyWJJEBGRLJYEERHJYkkQEZEslgQREcliSRARkSyWBBERyXLoE9clJSVYtmwZjh07htraWrv7du7c6Y5cRETkBRwqiblz5yIyMhLz5s2Dn5+fuzMREZGXcKgkTpw4gQ8//BCCwL1TREStiUO/9X/1q18hPz/f3VmIiMjLOPROokOHDpgxYwbGjBmDdu3a2d03e/ZstwQjIiLPc6gk6urq8NBDD6GxsRGlpaXuzkRERF7CoZL461//6u4cRETkhRy+6FBRURFycnJQXl6OsLAwmM1mREVFuTEaERF5mkMHrnfs2IFJkyahsLAQbdu2RWFhIR577DFs377d3fmIiMiDHHon8dprr2HFihUYNmyY7ba8vDxkZGRg9OjRbgtHRESe5dA7idLSUgwePNjutkGDBvEgNhHRXc6hkujRowfeffddu9tWr14Nk8nkllBEROQdHNrdlJ6ejqeffhpr166F0WiExWKBn58f3nrrLXfnIyIiD3KoJKKjo7F582YcPHjQdnZTv379oNVq3Z2PiIg8yOFTYDUaTZPjEkREdHeTLYnx48djy5YtAICRI0dCpVLddByXCiciunvJlkRGRobt61dffVWRMERE5F1kS+LGXUuVlZUYP358kzFffPGFe1IREZFXcOgU2Pnz59/09tTUVIcnKiwsRGJiImJjY5GYmIiioqImYz755BPExcUhPj4ecXFxWLt2rcPbJyIi12v2wHVJSQkAQJIk29c33qfT6RyeKC0tDUlJSYiPj8fGjRuRmprapARiY2MxadIkqFQq1NTUIC4uDkOGDEGPHj0cnoeIiFyn2ZIYM2YMVCoVJEnCmDFj7O5r164dZs2a5dAklZWVyM/Px+rVqwEAZrMZGRkZqKqqQmhoqG1cQECA7esrV66goaFB9oA5ERG5X7MlUVBQAACYOnUqsrOzWzyJxWJBeHg41Go1AECtViMsLAwWi8WuJABg+/btWLp0KYqLi/HCCy+ge/fuLZ6XiIhuj0Ofk7idgnDW6NGjMXr0aJw7dw7PPvssRowYga5duzr883p9wK0HyTAYAlv8s+7EXM5hLud5azbmco47csmWxPTp0/HOO+8AAJKSkmR3+7z//vu3nMRoNKKsrAyiKEKtVkMURZSXl8NoNMr+TEREBPr06YOdO3c6VRKVlTWwWiWHx19nMASiouKS0z/nbszlHOZynrdmYy7ntDSXIKia/eNatiQSEhJsX0+ePNnpiW+k1+thMpmQk5OD+Ph45OTkwGQyNdnVdPLkSURHRwMAqqqqkJeXh7Fjx97W3ERE1HKyJREXFwcAEEURxcXFePrpp506m+mX0tPTkZKSghUrViAoKAiZmZkAgOTkZDz33HPo06cPPvroI+zevRsajQaSJGHq1Kl44IEHWjwnERHdHpUkSbfcNzN06FDs2bMHguDQxyo8irublMFczvHWXID3ZmMu57hrd5NDv/UTEhLw4YcfOj05ERHd2Rw6u+nw4cPIzs7GO++8g/bt29sdxHbkwDUREd2ZHCqJX//61/j1r3/t7ixERORlHCqJrl27ol+/fk1uP3z4sMsDERGR93CoJH7/+9/jhx9+aHL7jBkzsG/fPpeHIrrTWSUJJWU1KCqtRkl5DVSCAEgSOoUFIMoYhI6GNlxyhu4IzZaE1WqFJEl2/11XXFxsW2aDiK6pu9qI/xw4g237SnC1wQoJEuobrLb7fbQCABX8fDQYNzQSI/t3gI+WryPyXs2WRM+ePW1/7fTs2dPuPkEQ8NRTT7kvGdEd5lhRFd767Ciu1ItoaLTedMzV/yuMqw0iPv36FL7IK8bTCb3RrWOwgkmJHNdsSWzfvh2SJOG3v/2t3fpNKpUKoaGh8PX1dXtAojvBtu+K8enXp1AvUw43U99oRX1NPf7+74NIGnMvRvSLcGNCopZptiQ6dOgA4NoqsNe/vtHq1avx+9//3j3JiO4QOw+edbogblTfaMUHX/4EnVbAsJ7tXZyO6PY49GG65cuX3/T2N99806VhiO40pVW1+PdXJ1pcENfVN1qxZksBqqqvuCgZkWs0+05iz549AK6t37R37167A9clJSVo06aNe9MRebm3PzuKBvH2CuK6BlHCqpx8vJQ00CXbI3KFZkvi+rWt6+vr8fLLL9tuV6lUMBgMWLBggXvTEXmxQks1LOcv49arnznGapVw8lw1LJWXYdTzDzDyDs2WxI4dOwAAL730EpYsWWK7vaCgwHad6lGjRrk3IZGX+vK7Epe9i7hOtFqx/fszmDqWV2Qk7+DQh+mWLFmCqqoqbNq0CRs2bEBBQQEGDx5se6dB1BodO33BZe8irrNagR8Lq1y7UaLb0GxJNDQ0YMeOHVi/fj1yc3PRqVMnTJgwAWfPnsWyZcug1+uVyknkVequNqKmrsEt2678+QoaRSs0au9fmp/ufs2WRExMDFQqFSZNmoRZs2ahV69eAMBlw6nVq75cD61GgFgvunzbgqDC5SuNaNum5Rf5InKVZv9U6d69Oy5duoRDhw7hyJEj+Pnnn5XKReTVXLyXqen2Xb0fi6iFmi2J9957D19++SViYmLw7rvvIiYmBk899RRqa2vR2NioVEYirxPor5VdeuN2Wa0S2vg6dLiQyO1uudOzQ4cOePbZZ7Ft2zb861//gsFggCAImDhxot0ZT0StSRtfLfzd9Is8JNAHWg0X/SPv4NSRscGDByMjIwO7d+/GggUL8NNPP7krF5HXuzcy2OXbFFQqmDqHuHy7RC3Voj+FfHx8YDabYTabXZ2H6I4xZnAkfjxVhasNrjt4rVGrMHpQR5dtj+5uFy5dxd6jpTj/8xW0C/VH9w5t0TUiyKVzcMcnUQt169gWoUE+sFTWumR7KhUQ0a4NOoUHumR7dPeqbxDx7uZj+OGnCgBAoyhBBUCrFaAP8sUfJ/Vx2af2eSI2UQupVCr8YWIvaDWueRlp1QJmTuzlkm3R3atRtOLvHx3EgRPn0ShKaBSvnQknAahvsKK0shaL136P8guu+eOFJUF0GzqFB2JiTBR0t1kUOo2AyQ/dg/ah/i5KRnervPwyFJddkj27TgJQV9+ItVuPu2Q+lgTRbXpkWGc8PDgSOm3LXk46jYC4mCgeiyCHbN572naFQzmSBPxU8rNLlp5nSRDdJpVKhccfjMb0CT3hq1NDLagc+jmNWgV/Hw2eiu+NCfdFuTck3RUaGq0orXJsN5JGrcJ/z97+B6B54JrIRX7VIwzdI4ORs6cIuw5ZoFJde1GL1v99elotqKDTCJAAPDigAx4Z1hkBflrPhaY7itUqQQUVJAc+8y9JsHvutZRiJVFYWIiUlBRcvHgRwcHByMzMRFRUlN2Y5cuXY/PmzRAEAVqtFnPmzMHw4cOVikgOaBStqL3aCOHnOtTUNcDfVwNB5dhfzq1BUBsdkh6+F5MfjMbx4osoLK1GkeUSJJUKAiR0MQYhyhiE7pHBXMCPnKbTCvDzUePyFUdWvJBccoxLsZJIS0tDUlIS4uPjbdeiWLt2rd2Yvn37Ytq0afDz80NBQQGmTp2K3Nxc+Pr6KhWTfsFqlXDo5Hn88FMF/nvmZ5z/+QoEQQWVSgWr1QqVSgWj3h/3dgzGfb3bo4vRtedo36m0GjV6d9Wjd9drKyUbDIGoqLjk4VR0p1OpVBg1sCO25J22ndUkJzjAB1Htb/90akVKorKyEvn5+Vi9ejUAwGw2IyMjA1VVVQgNDbWNu/FdQ/fu3SFJEi5evIj27XlxeKVdbRCxbV8xvtx/Bo2iFVduWO3U/i2shOKyGpSU1eCbw+cQGuiLuPujMLRXON9hELnB6MEd8Z8DZ5tdql6rETBldDeoXPAaVOT9rsViQXh4ONTqa+vRqNVqhIWFwWKxyP7Mhg0b0KlTJxaEB/xUchEpb+9Bzp7TqKlrsCsIObZztKtqsXZrAf7y3vc4f7HO/WGJWpkgfx1SnhiIIH8tfLT2a3xp1CpoNQJ+G9sd/e5p55L5vPLA9b59+/D666/j3Xffdfpn9fqAFs9rMHjnJ12VyiVJEtZuPobPdp1E/S1OsWvO1QYrikovYcG7+/BC0kDc1yfChSlvrbX/f2wJb83GXPLzv5sai9yDZ7Fp1ylUVV+Bj06N4f07YEJMF+jb+rlsLkVKwmg0oqysDKIoQq1WQxRFlJeXw2g0Nhl74MABvPjii1ixYgW6du3q9FyVlTWwtuCIvrfuM1YqlyRJWLv1OPYcLb2tgrjOapVwtV7Eq9nf43fjL+O+Xsq8I2zt/x9bwluzMdet9Y0KQd+oQQD+l8ta3+hUPkFQNfvHtSK7m/R6PUwmE3JycgAAOTk5MJlMdscjAODw4cOYM2cO3njjDdtV8EgZG3MLXVYQN2potGLNlgIcLeJ1m4nuRIqdg5eeno7s7GzExsYiOzsbCxcuBAAkJyfjyJEjAICFCxfiypUrSE1NRXx8POLj43H8uGs+Wk7yCi3V2JJX7PKCuK6+0Yq3Nx5FrUOn7RGRN1HsmER0dDTWrVvX5PaVK1favv7kk0+UikP/p6HRijc3/Oi2q6xdd6VexHvbjuMPXMCO6I7CT/O0cnuOlqK6tt7t8zSKVvzwUwXOnb/s9rmIyHVYEq2YJEnYvOe023Yz/ZJoteKr/SWKzEVErsGSaMVOWapx8fJVxeazWoFvfyx16ZXciMi9WBKtWH5RFcRbfLTf1dSCCkWWakXnJKKWY0m0YseLL7pklUhnNIhWnC71jnPMiejWWBKtWEl5jeJzNooSfjpzUfF5iahlWBKt2JWrnjk2UH1ZfmEyIvIuLIlWzCopu6vJNq/Cu7iIqOVYEq2YRu2Zpbxbei1oIlIeX62tmD5I+Ys5qQBEhrV8pV4iUhZLohXrFhms+Jw+OjWiO7RVfF4iahmWRCvWrWPbJhctcTfRKvESp0R3EJZEKzagm0Hxg9fhIX4wBLvugihE5F4siVbMz0eDIaYwCIIyB7B9dWo8MqyzInMRkWuwJFq58UM7Q61QSWjUAgZ1D1NkLiJyDZZEKxfRrg3GDI50+2mpOo2AP0zsBa2GTzmiOwlfsYSE4V0QHOADlZveUGg1Agb1CEOvLqG3HkxEXoUlQdCoBTyf2B/+Pq6/UKFGrUKEvg2ejO3u8m0TkfuxJAgAEBbsh5d/OwgBfloILnpWaDUCOhoC8FLSAMVPtSUi12BJkI1R3wYLpw1Bt47B8LnNYxRajYDhfY1IeWIg/NzwDoWIlMFXL9kJCfTBS78ZgNwjFnz41QlIEpy6kpyPVo02fhr8YWIvdOsY7L6gRKQIlgQ1oVKpMLxvBIb1DMe+Y+XYsvc0yi7UQacVUN9gtbtQkVYjQCOoUN9oRfdOwRg/tDNMUSEQ3HUUnIgUxZIgWVqNGjF9jIjpY0RNXQNOl15CUWk1qqqvQq1RQ7KKaB/aBp3bByIyLIDHHYjuQiwJckiAnxa9uoTaTmM1GAJRUcHLkBLd7XjgmoiIZLEkiIhIFkuCiIhkKVYShYWFSExMRGxsLBITE1FUVNRkTG5uLiZNmoTevXsjMzNTqWhERCRDsZJIS0tDUlIStm7diqSkJKSmpjYZExkZicWLF2P69OlKxSIiomYoUhKVlZXIz8+H2WwGAJjNZuTn56OqqspuXOfOnWEymaDR8KQrIiJvoEhJWCwWhIeHQ62+dh69Wq1GWFgYLBaLEtMTEVEL3XV/suv1AS3+WYMh0IVJXIe5nMNczvPWbMzlHHfkUqQkjEYjysrKIIoi1Go1RFFEeXk5jEajy+eqrKyB1er8dZu99cNhzOUc5nKet2ZjLue0NJcgqJr941qR3U16vR4mkwk5OTkAgJycHJhMJoSG8iI0RETeTLGzm9LT05GdnY3Y2FhkZ2dj4cKFAIDk5GQcOXIEALB//36MGDECq1evxr///W+MGDECu3btUioiERH9gmLHJKKjo7Fu3bomt69cudL29eDBg/HNN98oFYmIiG6Bn7gmIiJZLAkiIpLFkiAiIlksCSIiksWSICIiWSwJIiKSxZIgIiJZLAkiIpLFkiAiIlksCSIiksWSICIiWSwJIiKSxZIgIiJZLAkiIpLFkiAiIlksCSIiksWSICIiWSwJIiKSxZIgIiJZLAkiIpLFkiAiIlksCSIiksWSICIiWSwJIiKSpfF0AE87U1GDbftK8HNdA3p2CsaD/TvAR6f2dCwiIq/Qqkui4PQFLFt3CI2iFVYJKCiqwu4jFiz4f7+CVsM3WURErfo34ftf/oT6xmsFAQANjVZUXLyC/QXlng1GROQlWnVJnKu83OS2qw0iTpz52QNpiIi8j2IlUVhYiMTERMTGxiIxMRFFRUVNxoiiiIULF+Lhhx/GmDFjsG7dOrdmCgn0aXKbTiOgg8HfrfMSEd0pFCuJtLQ0JCUlYevWrUhKSkJqamqTMZs2bUJxcTG2bduGjz76CFlZWThz5ozbMj02Mhq6G449CCrAV6fGfb2MbpuTiOhOosiB68rKSuTn52P16tUAALPZjIyMDFRVVSE0NNQ2bvPmzZg8eTIEQUBoaCgefvhhfPHFF5gxY4bDcwmCyuGxMX2MCA7wwVf7S1BzpRHRHYIwbkgnBPhrHf/HKcCZf5OSmMs53poL8N5szOWcluS61c8oUhIWiwXh4eFQq6+dWqpWqxEWFgaLxWJXEhaLBREREbbvjUYjSktLnZorJKSNU+NH6AMwYnAnp35GaXp9gKcj3BRzOcdbcwHem425nOOOXK36wDURETVPkZIwGo0oKyuDKIoArh2gLi8vh9FobDLu3Llztu8tFgvat2+vREQiIroJRUpCr9fDZDIhJycHAJCTkwOTyWS3qwkAxo0bh3Xr1sFqtaKqqgpfffUVYmNjlYhIREQ3oZIkSVJiopMnTyIlJQXV1dUICgpCZmYmunbtiuTkZDz33HPo06cPRFHEokWLsHv3bgBAcnIyEhMTlYhHREQ3oVhJEBHRnYcHromISBZLgoiIZLEkiIhIFkuCiIhktZrrSVy4cAEvvfQSiouLodPp0LlzZyxatKjJabh1dXX405/+hKNHj0KtVmPevHl46KGHvCJbSkoKvv32W4SEhAC4dsrw008/7dZszzzzDM6cOQNBEODv748FCxbAZDLZjRFFEX/+85+xa9cuqFQqzJw5E5MnT/Z4rqysLHzwwQcICwsDAAwcOBBpaWluzQUA//jHP5CVlYVNmzbh3nvvtbvPE88vR3J54rkFAKNGjYJOp4OPz7XFNufOnYvhw4fbjfHEY+ZILk88ZlevXsVf/vIX7NmzBz4+Pujfvz8yMjLsxrj89Si1EhcuXJD27t1r+/6VV16R/vSnPzUZl5WVJc2fP1+SJEkqLCyU7r//fqmmpsYrss2bN09677333Jrll6qrq21ff/nll1JCQkKTMevXr5emTZsmiaIoVVZWSsOHD5dKSko8nuuNN96QXnnlFbfm+KUff/xRmj59uvTQQw9Jx48fb3K/J55fjuTyxHNLkiTZPDfyxGPmSC5PPGYZGRnS4sWLJavVKkmSJFVUVDQZ4+rXY6vZ3RQcHIyhQ4favu/fv7/dp7uv27Jli+2zGVFRUejduze++eYbr8jmCYGBgbava2pqoFI1XQxMbmFGT+dSWn19PRYtWoT09HTZMZ54fjmSy5t54jHzRpcvX8aGDRswe/Zs2/O9Xbt2Tca5+vXYanY33chqteLDDz/EqFGjmtx37tw5dOjQwfZ9SxYZdFc2AFi9ejU++ugjREZG4oUXXkB0dLTbM82fPx+7d++GJElYtWpVk/tdsTCjO3IBwOeff47c3FwYDAbMmjULAwYMcFue119/HRMnTkTHjh1lx3ji+eVILsAzzy3g2q4cSZIwaNAgPP/88wgKCrK731OvyVvlApR9zEpKShAcHIx//OMfyMvLQ5s2bTB79mwMHjzYbpyrX4+t5p3EjTIyMuDv74+pU6d6OkoTzWWbM2cOvvzyS2zatAljx47FjBkzbOthudPixYuxc+dOzJkzB0uWLHH7fI66Va4pU6Zg+/bt2LRpE6ZPn45nnnkGFy5ccEuWAwcO4Mcff0RSUpJbtt9Sjuby1HPr/fffx2effYZPPvkEkiRh0aJFbp/TEY7kUvoxE0URJSUl6NmzJz799FPMnTsXs2bNQk1NjdvmBFphSWRmZuL06dNYtmwZBKHpPz8iIgJnz561fa/kIoO3yhYeHm67PSEhAbW1tYq+y0lISEBeXl6TX7SeXphRLpfBYIBWe+3aIDExMTAajThx4oRbMnz33Xc4efIkRo8ejVGjRqG0tBTTp09Hbm6u3Tiln1+O5vLUc+v6Ip86nQ5JSUn44YcfmozxxGvSkVxKP2ZGoxEajQZmsxkA0K9fP4SEhKCwsLDJOFe+HltVSSxduhQ//vgjli9fDp1Od9Mx48aNw0cffQQAKCoqwpEjR5qc1eCpbGVlZbavd+3aBUEQEB4e7rZMly9fhsVisX2/Y8cOtG3bFsHBwXbjlF6Y0dFcNz5ex44dw9mzZ9GlSxe3ZJo5cyZyc3OxY8cO7NixA+3bt8c777yDBx54wG6c0s8vR3Mp/dwCgNraWly6dAkAIEkSNm/e3OQMNUD5x8zRXEo/ZqGhoRg6dKhtbbvCwkJUVlaic+fOduNc/XpsNcckTpw4gbfffhtRUVGYMmUKAKBjx45Yvnw54uPj8c9//hPh4eGYPn06UlJSMGbMGAiCgEWLFiEgwL0XGHE027x581BZWQmVSoWAgAC8+eab0Gjc97+wrq4Os2fPRl1dHQRBQNu2bfHWW29BpVLZLcwYHx+PQ4cOYezYsQCAZ599FpGRkR7PtXTpUhw9ehSCIECr1WLJkiUwGAxuyyXH088vR3Ip/dwCrl2xctasWRBFEVarFdHR0bZTlD35mDmayxOP2cKFC/Hyyy8jMzMTGo0GS5YsQVBQkFtfj1zgj4iIZLWq3U1EROQclgQREcliSRARkSyWBBERyWJJEBGRLJYE3ZEmTJiAvLw8t8/z1ltvYf78+W6fh8hb8RRY8ko3rrFUV1cHnU4HtVoN4Nq54hMnTvRUNKccPnwYWVlZOHDgAARBQKdOnfCb3/wGjz32mKejETmEJUFeb9SoUfjzn/+M+++/39NRnHLgwAFMmzYNTz/9NB5//HGEhITg6NGjWLlyJV5//XVPxyNyCHc30R1p1KhR+PbbbwFcu7jQc889h7lz52LAgAGIi4tDYWEh3n77bdx3330YOXKk3VpFly5dwssvv4wHHngAw4cPx2uvvSa7MFtWVhbmzp0LADhz5gy6d++O9evX48EHH8TQoUPx5ptvymZcsmQJEhISMHPmTISGhkKlUqF37952BfHxxx9jzJgxGDJkCJ566im7pR66d++O999/H2PHjsWAAQOwbNkyFBcXY8qUKRg4cCBmz56N+vp6AEBeXh5GjBiBt956C0OHDsWoUaPw2Wef2ba1c+dOJCQkYODAgRg5ciSysrJs9zX376qoqEC/fv3s1sU6evQohg0bhoaGhlv/j6I7HkuC7gr/+c9/EB8fj++++w4mkwnTp0+H1WrFN998g2effRapqam2sSkpKdBoNNi2bRs2bNiA3bt3Y926dQ7P9f333+OLL77AmjVrsHz5cpw8ebLJmLq6Ohw8eLDZNXP27NmDv//971i2bBlyc3PRoUMHPP/883ZjcnNz8emnn+Ljjz/GqlWrsGDBArz66qv4+uuvceLECXz++ee2sefPn8eFCxewa9cuvPLKK0hNTcWpU6cAAH5+fsjMzMT+/fvx9ttv48MPP8RXX311y3+XwWDAkCFDsGXLFtu4jRs3YsKECbbFE+nuxpKgu8LgwYMxfPhwaDQajBs3DhcuXMDMmTOh1WrxyCOP4OzZs6iursb58+fx9ddf4+WXX4a/vz/0ej1+97vf2f2yvZU//vGP8PX1RY8ePdCjRw8UFBQ0GVNdXQ2r1drsWlGbNm3CY489hl69ekGn0+H555/HwYMHcebMGduYGTNmICAgAN26dcO9996LmJgYREZGIjAwECNGjEB+fr7dNmfPng2dTochQ4Zg5MiRtl/uQ4cORffu3SEIAnr06IEJEyZg3759Dv27Hn30Udu7ElEU8fnnnyM+Pt7hx4vubK1mgT+6u+n1etvXvr6+CAkJsR3o9vX1BXBtdc/y8nI0NjbarYJqtVptS0M74sargfn5+aG2trbJmKCgIAiCgIqKCtkL0ZSXl6NXr16279u0aYPg4GCUlZXZLhB041w+Pj5Nvj9//rzdnP7+/rbvIyIiUF5eDgA4dOgQ/va3v+HEiRNoaGhAfX09xo0b59C/a/To0UhLS0NJSQkKCwsREBCAvn37NvMI0d2EJUGtSvv27aHT6bB37163rtjp5+eH/v37Y9u2bRg2bNhNx4SFhdldJ6G2thYXL15s8XLT1dXVqK2ttRWFxWJBt27dAAAvvPACpk6dilWrVsHHxweLFy92+AJMPj4+GD9+PD777DOcOnWK7yJaGe5uolYlLCwMMTExeOWVV1BTUwOr1Yri4uImu15c4cUXX8T69euxatUq2y/kgoICzJkzBwBgNpvx6aef4tixY6ivr8fSpUvRt2/fW15mtDlZWVmor6/H/v37sXPnTtu7hcuXL6Nt27bw8fHB4cOHkZOT49R24+PjsX79euzYsYMl0crwnQS1OkuWLMHf/vY3PPLII7h8+TIiIyORnJzs8nkGDhyINWvW4I033sCbb74JtVqNzp0744knngAA3H///Zg9ezZmzZqF6upqDBgwAK+99lqL52vXrh2CgoIwfPhw+Pn5IT093barKy0tDZmZmVi0aBGGDBmC8ePHo7q62uFtDxo0CIIgoFevXnbXm6a7Hz8nQXQXyMvLw4svvohvvvnGbXM8+eSTiIuLw+TJk902B3kf7m4iols6fPgw8vPzMX78eE9HIYVxdxMRNWvevHn46quvMH/+fI9dapU8h7ubiIhIFnc3ERGRLJYEERHJYkkQEZEslgQREcliSRARkSyWBBERyfr/MTLQu+oZ91EAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "df_hr.groupby(['TIC'])['left'].agg(['mean', 'sum']).reset_index().plot.scatter(y='mean',x='TIC', s='sum', xlabel='Time in Company', ylabel='Attrition',ylim=(0,0.6) )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7d62f30a-835b-44d1-a4c8-3c040797b727",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>TIC</th>\n",
       "      <th>mean</th>\n",
       "      <th>sum</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>0.010262</td>\n",
       "      <td>31</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>3</td>\n",
       "      <td>0.165727</td>\n",
       "      <td>882</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4</td>\n",
       "      <td>0.240777</td>\n",
       "      <td>496</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>5</td>\n",
       "      <td>0.444240</td>\n",
       "      <td>482</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>6</td>\n",
       "      <td>0.212891</td>\n",
       "      <td>109</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   TIC      mean  sum\n",
       "0    2  0.010262   31\n",
       "1    3  0.165727  882\n",
       "2    4  0.240777  496\n",
       "3    5  0.444240  482\n",
       "4    6  0.212891  109"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_hr.groupby(['TIC'])['left'].agg(['mean', 'sum']).reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f7a3908c-8464-4e72-9136-5d868c082973",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_hr['S_ranked'] = -np.ceil(df_hr['S'].rank(method='max')/600)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a5798cfc-4a2f-4b6d-bab7-3f16ecfccd20",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_hr['attrition'] = df_hr.groupby('S_ranked')['left'].transform('mean')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dd1af100-2d5a-4151-a5a2-f1b1dd1f28e1",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "*c* argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with *x* & *y*.  Please use the *color* keyword-argument or provide a 2D array with a single row if you intend to specify the same RGB or RGBA value for all points.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<AxesSubplot:xlabel='S_ranked', ylabel='attrition'>"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYkAAAEMCAYAAAAxoErWAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8/fFQqAAAACXBIWXMAAAsTAAALEwEAmpwYAAAdVklEQVR4nO3dfVRUdeLH8Q8zSthPXIMAh54s3Yyt1LUHe9DWTAMLDuCGGD2ZR8pK2+20KGWhrC3mbr/aFnU7m6EZPUkpLqOpmbtlZZrWBifKY4ZZMoCCbj7gYsP9/eFP1hEuDMwj8H6d4znD8L1zPw4XPnO/987cEMMwDAEA0AJLoAMAAIIXJQEAMEVJAABMURIAAFOUBADAFCUBADBFSQAATPUIdABvO3DgiBob2//Wj8jI3qqtPeyDRN5BPs+Qz3PBnpF8HWOxhOiss/7H9PtdriQaG40OlcTJZYMZ+TxDPs8Fe0byeR/TTQAAU5QEAMAUJQEAMEVJAABMdbkD1wDQ3Ux+emPT7YLs0V59bPYkAKATO7UgWvraU5QEAHRSZoXgzaKgJAAApigJAIApSgIAOimzg9TePHhNSQBAJ3Z6IXj77CZOgQWATs7bxXAq9iQAAKYoCQCAKUoCAGCKkgAAmKIkAACmKAkAgClKAgBgipIAAJiiJAAApvz2juuKigplZ2fr4MGD6tu3r+bPn6/+/fu7jKmtrdVjjz0mh8Ohn376ScOHD9cTTzyhHj14YzgABILf9iRmz56tjIwMrVu3ThkZGcrJyWk25oUXXtCAAQNUUlKiv//97/ryyy+1fv16f0UEAJzGLyVRW1ur8vJyJSYmSpISExNVXl6uuro6l3EhISE6cuSIGhsb1dDQoOPHjysmJsYfEQEALfBLSTgcDsXExMhqtUqSrFaroqOj5XA4XMY9+OCDqqio0IgRI5r+XXHFFf6ICABoQVBN9q9du1aDBg3Syy+/rCNHjigzM1Nr165VQkKC248RGdm7w+uPigrv8LL+QD7PkM9zwZ6RfN7nl5Kw2Wyqrq6W0+mU1WqV0+lUTU2NbDaby7jCwkLl5eXJYrEoPDxco0eP1pYtW9pVErW1h9XYaLQ7Y1RUuPbtO9Tu5fyFfJ4hn+eCPSP5OsZiCWn1xbVfppsiIyMVFxcnu90uSbLb7YqLi1NERITLuHPPPVcffPCBJKmhoUGbN2/Wz3/+c39EBAC0wG9nN82ZM0eFhYWKj49XYWGhcnNzJUmZmZkqKyuTJD3++OPavn27kpKSlJKSov79+2vChAn+iggAOE2IYRjtn5sJYkw3BQb5PBPs+aTgz0i+jgmK6SYAQOdESQAATFESAABTlAQAwBQlAQAwRUkAAExREgAAU5QEAMAUJQEAMEVJAABMURIAAFOUBADAFCUBADBFSQAATAXV5UsBIBAmP72x6XZB9ugAJgk+7EkA6NZOLYiWvu7uKAkA3ZZZIVAU/0VJAABMURIAAFOUBIBuy+wgNQev/4uSANCtnV4IFIQrToEF0O1RDObYkwAAmKIkAACmKAkAgClKAgBgipIAAJiiJAAApigJAIApSgIAYIqSAACY4h3XADo9LhrkO+xJAOjUuGiQb1ESADotLhrke5QEAMAUJQEAMOW3kqioqFB6erri4+OVnp6u3bt3tzhuzZo1SkpKUmJiopKSkrR//35/RQTQyXDRIN/zW0nMnj1bGRkZWrdunTIyMpSTk9NsTFlZmRYsWKCCggLZ7Xa99tprCg8P91dEAJ0QFw3yLb+cAltbW6vy8nItWbJEkpSYmKi5c+eqrq5OERERTeOWLl2qyZMnKyoqSpIoCABuKcgeraiocO3bdyjQUbocv+xJOBwOxcTEyGq1SpKsVquio6PlcDhcxu3atUvff/+97rjjDqWmpmrRokUyDMMfEQEALQiqN9M5nU7t2LFDS5YsUUNDg6ZMmaLY2FilpKS4/RiRkb07vP6oqODecyGfZ8jnuWDPSD7v80tJ2Gw2VVdXy+l0ymq1yul0qqamRjabzWVcbGysEhISFBoaqtDQUN10000qLS1tV0nU1h5WY2P79z6CfVeVfJ4hn+eCPSP5OsZiCWn1xbVfppsiIyMVFxcnu90uSbLb7YqLi3M5HiGdOFbx4YcfyjAMHT9+XJ988okuueQSf0QEALTAb2c3zZkzR4WFhYqPj1dhYaFyc3MlSZmZmSorK5Mk3XrrrYqMjNQtt9yilJQUDRw4ULfddpu/IgIAThNidLEjw0w3BQb5PBPs+aTgz0i+jgmK6SYAQOdESQAATFESAABTlAQAwBQlAQAwRUkAAExREgAAU5QEAMAUJQEAMNWuD/j79ttv9fXXX+vo0aMu9/PRGQDQNbldEi+88IIWLlyoSy65RGFhYU33h4SEUBIA0EW5XRIvv/yyioqK+FRWAOhG3D4mERYWposuusiXWQAAQcbtkvjNb36jp556SjU1NWpsbHT5BwDomtyebsrOzpYkFRUVNd1nGIZCQkL01VdfeT8ZACDg3C6J9957z5c5AABByO2SOOeccyRJjY2N2r9/v84++2xZLLzNAgC6Mrf/yh8+fFgzZszQ4MGDdcMNN2jw4MGaOXOmDh0KvistAQC8w+2SeOqpp1RfX6+SkhKVlpaqpKRE9fX1euqpp3yZDwAQQG5PN23atEkbNmxQr169JEkXXnih5s2bp7Fjx/osHAAgsNzekzjjjDNUV1fnct+BAwcUGhrq9VAAgODg9p7EbbfdpsmTJ2vSpEmKjY1VZWWlli5dqgkTJvgyHwAggNwuiQceeEDR0dGy2+2qqalRdHS0pkyZwuc2AUAX5nZJnPwgP0oBALqPVkuiuLhYKSkpkqS33nrLdBzFAQBdU6slsXr16qaSWLVqVYtj+KhwAOi6Wi2JF198sen2K6+84vMwAIDg4vYpsCf3KE43fvx4b2UBAAQZt0viu+++a3afYRj64YcfvBoIABA82jy7acaMGZKk48ePN90+ae/evRo4cKBvkgEAAq7Nkjj//PNbvC1Jw4YNU0JCgvdTAQCCQpslMW3aNEnSkCFDNHLkSJ8HAgAEj1ZL4tNPP9VVV111YmCPHtq8eXOL46699lrvJwMABFyrJZGbmyu73S5JmjVrVotjQkJCuGodAHRRrZbEyYKQpHfffVdWq9XngQAAwcOtU2CdTqd++ctfqqGhwdd5AABBxK2SsFqt6t+/vw4cONDhFVVUVCg9PV3x8fFKT0/X7t27Tcd+++23GjJkiObPn9/h9QEAPOf2p8AmJSVp6tSpuvvuu9WvXz+X77lz4Hr27NnKyMhQcnKyVq1apZycHC1btqzZOKfTqdmzZ2vMmDHuRgMA+IjbJfH6669LkvLz813ud+fAdW1trcrLy7VkyRJJUmJioubOnau6ujpFRES4jP3b3/6mUaNG6ejRozp69Ki78QAAPuB2SWzcuLHDK3E4HIqJiWk68G21WhUdHS2Hw+FSEl9//bU+/PBDLVu2TIsWLerw+gAA3tGuK9P99a9/bXb/tGnTtGDBAo+DHD9+XE8++aTmzZvn0VlUkZG9O7xsVFR4h5f1B/J5hnyeC/aM5PM+t0tiy5YtLd6/devWNpe12Wyqrq6W0+mU1WqV0+lUTU2NbDZb05h9+/Zpz549uu+++yRJP/74owzD0OHDhzV37lx3Y6q29rAaGw23x58UFRWuffsOtXs5fyGfZ8jnuWDPSL6OsVhCWn1x3WZJPP/885JOvNI/efuk77//XrGxsW2GiIyMVFxcnOx2u5KTk2W32xUXF+cy1RQbG+tSRPn5+Tp69KhmzpzZ5uMDAHyjzZKoqqqSdOJjwU/ePslms2n69OlurWjOnDnKzs7WokWL1KdPn6bTWzMzM/Xwww/r8ssvb292AICPhRiG4dbczPLlyzVhwgRf5/EY002BQT7PBHs+Kfgzkq9j2ppucvuiQ88880yL9/PhfgDQdbldEsePH2/xvsbGRq8GAgAEjzaPSWRkZCgkJEQNDQ264447XL5XVVWloUOH+iobACDA2iyJtLQ0GYahsrIy3XbbbU33h4SEKDIyUtdcc41PAwIAAqfNkkhNTZV04sp0P/vZz1RaWqoDBw7IMAxVV1dr1apVLuUBAOg63H4zXUVFhbKysnTBBRfom2++0cCBA7Vz504NGzaMkgCALsrtkvjzn/+svLw8jRs3TldddZWKi4v19ttv65tvvvFlPgBAALl9dlNlZaXGjRvncl9qaqqKi4u9nQkAECTcLonIyEjt379fknTOOefo888/1549ezgFFgC6MLdLIi0tTdu3b5ckTZo0SXfffbeSk5N1++23+ywcACCw3D4mcfLTWSUpJSVFV199terr6zVgwACfBAMABJ7bJXE6dz79FQDQubk93QQA6H4oCQCAKUoCAGCKkgAAmKIkAACmKAkAgClKAgBgipIAAJiiJAAApigJAIApSgIAYIqSAACYoiQAAKYoCQCAKUoCAGCKkgAAmKIkAACmKAkAgClKAgBgipIAAJiiJAAApigJAIApSgIAYIqSAACY6uGvFVVUVCg7O1sHDx5U3759NX/+fPXv399lzMKFC7VmzRpZLBb17NlTjzzyiEaOHOmviACA0/itJGbPnq2MjAwlJydr1apVysnJ0bJly1zGDB48WJMnT1avXr309ddf684779SHH36osLAwf8UEAJzCL9NNtbW1Ki8vV2JioiQpMTFR5eXlqqurcxk3cuRI9erVS5I0aNAgGYahgwcP+iMiAKAFfikJh8OhmJgYWa1WSZLValV0dLQcDofpMsXFxTr//PPVr18/f0QEALTAb9NN7bF161Y9//zzKigoaPeykZG9O7zeqKjwDi/rD+TzDPk8F+wZyed9fikJm82m6upqOZ1OWa1WOZ1O1dTUyGazNRv7+eefKysrS4sWLdJFF13U7nXV1h5WY6PR7uWiosK1b9+hdi/nL+TzDPk8F+wZydcxFktIqy+u/TLdFBkZqbi4ONntdkmS3W5XXFycIiIiXMaVlpbqkUce0V/+8hddeuml/ogGAGiF394nMWfOHBUWFio+Pl6FhYXKzc2VJGVmZqqsrEySlJubq2PHjiknJ0fJyclKTk7Wjh07/BURAHAavx2TGDBggIqKiprd/+KLLzbdfvvtt/0VBwDgBt5xDQAwFZRnNwFAZzL56Y1NtwuyRwcwifexJwEAHji1IFr6urOjJKDJT29s+gfAfWa/M13pd4nppiDg6a6qJ8u39Cqoq+0uA+g49iQCzNNdVU+WD5ZXQezJAMGLkgggT/9IB8sfeU909flcdG1me91daW+ckkDAdIWSA04vhK5UEBLHJLq1guzRLf5B9uS4RiB+QQK9fqArb3fsSQSQp7uq3tjV9fRVUKCniwK9fqCroyQCzNM/0t7Y1S3IHt30rz08nS7ytOSYrgJ8j+mmIODprmpn3tU9fcqrM/9fgK6IkkDAUQxA8GK6CR0W6NP/Ar1+oDugJOCRQJ/+F+j1A10d003wWEH26IBempFiAHyHPQkAgClKAgBgipIAAJiiJAAApigJAIApSgIAYIqSAACYoiQAAKYoCQCAKd5xjW6PT6EFzLEngW6NixYBraMk0G1x0SKgbUw3AUCABfOUJ3sSABBAwT7lSUmg2+KiRQi0zjDlSUmgW+OiRUDrOCbhBcE8n4i28TMDzFESHmppPpE/OvA3T1+odPblO6uC7NEtTi0F03PAdJMHOsN8Inxv8tMbm/51dPmkR1d5tHxrX3f15Tu7YJ/ypCQADwT6D6SnL1Q6+/JdRUH26KZ/wcZvJVFRUaH09HTFx8crPT1du3fvbjbG6XQqNzdXY8aM0dixY1VUVOSXbJ6+kkP3xB9IdAd+K4nZs2crIyND69atU0ZGhnJycpqNKSkp0Z49e7R+/Xq9+eabys/P1w8//ODTXJ68kuMUSgBdnV9Kora2VuXl5UpMTJQkJSYmqry8XHV1dS7j1qxZo7S0NFksFkVERGjMmDFau3atz3J545VcsM8nomvz9IVKZ18evueXs5scDodiYmJktVolSVarVdHR0XI4HIqIiHAZFxsb2/S1zWZTVVVVu9YVGdnbK5mjosLdHlvyv8leWWdb2pMpELpbvpL/TVbSo6tavN8fy5s9ji+Xb+k59Of629LdtkF/6HKnwNbWHlZjo+Hx4+zbd8gLabwnKio86DKdqrvmO/0UxoLs0e1aj6fLn7rcSb5avrXn0B/rb0t33QY9ZbGEtPri2i8lYbPZVF1dLafTKavVKqfTqZqaGtlstmbjKisrNXjwYEnN9yy8rTOco4zg5+n2UpA9Omj/gAB+OSYRGRmpuLg42e12SZLdbldcXJzLVJMkJSQkqKioSI2Njaqrq9OGDRsUHx/v02wcUwAAc36bbpozZ46ys7O1aNEi9enTR/Pnz5ckZWZm6uGHH9bll1+u5ORkffHFF7r55pslSQ899JDOO+88n2fjlRwAtCzEMAzPJ/CDSEePSQR7SZDPM+TzXLBnJF/HtHVMgndcAwBMURIAAFOUBADAVJd7n4TFEhKQZf2BfJ4hn+eCPSP52q+tTF3uwDUAwHuYbgIAmKIkAACmKAkAgClKAgBgipIAAJiiJAAApigJAIApSgIAYIqSAACY6nIfy9Ga3Nxcbd68WaGhoTrzzDM1a9YsXX755ZKk/fv3a8aMGdq7d6/OOOMMzZ07V0OGDGnxcRYuXKiVK1dKklJTU/XQQw95Jd+qVau0ePFi7dq1S48//rjuvPPOpu9NmjRJBw4ckCQ5nU7t3LlTq1at0iWXXOLyGFu2bNF9992n/v37S5JCQ0NVVFTk83zZ2dn6+OOPddZZZ0k6cQGpBx54oMXHCcTz19rP/lQrVqxQXl6ezjnnHEnSueeeq4ULF/o8X319vR577DF9+eWXslqtmjlzpm688cYWH2f58uV68cUXZRiGbrjhBj3xxBOyWLz7ei8YtrfWBMP21pZg2Oa8wuhGNm7caDQ0NDTdvummm5q+l52dbSxcuNAwDMP49NNPjbFjxxqNjY3NHmPr1q1GYmKiUV9fb9TX1xuJiYnG1q1bvZJvx44dxs6dO42srCzjlVdeMR337rvvGrfeemuL3/vkk0+M1NRUr+RpT76ZM2e2mvmkQD1/rf3sT/X2228b06dP90qe9uTLz883Zs2aZRiGYVRUVBjXXXedcfjw4WaPsWfPHmPkyJFGbW2t4XQ6jcmTJxsrV670Sd6TArW9tSYYtre2BMM25w3darrpxhtvVM+ePSVJQ4cOVVVVlRobGyVJa9eu1cSJEyVJV155pUJDQ1VWVtbsMdasWaOUlBSFhYUpLCxMKSkpWrNmjVfyXXzxxRo4cGCbrwrfeust/frXv/bKOtvD3XytCdTz19rP3l9ay/fOO+8oPT1dktS/f39ddtll+uCDD5qNW7duncaMGaOIiAhZLBalpaV57fkzE6jtzRt8ub21JRi2OW/oViVxqldffVWjRo2SxWLRgQMHZBiGyzW3bTabqqqqmi3ncDgUGxvrMs7hcPglsyTt27dPmzdvVnJysumY3bt3KzU1VWlpaU272f6wZMkSJSUl6cEHH9SuXbtaHBPo509y/dm3ZOvWrUpOTtYdd9yhf/7zn37JVFlZ2TTdILm//cXGxvr0+WN7845g3Obc1aWOSaSmpqqysrLF73388ceyWq2SpNWrV6ukpESvvvqqP+O5na81xcXFGjlypEuhnerSSy/V+++/r/DwcH3//fe69957FRMTo+uuu86n+R555BFFRUXJYrGouLhYU6ZM0YYNG9z6P7nLG89fWz/7UaNG6ZZbblFYWJjKy8uVmZmpZcuWacCAAX7J5y/uZvXl9uZJPn9sb55mdPfvjSfbnD90qZJw51XMu+++q+eee05Lly7V2WefLUlNB7/q6uqafhkcDof69evXbHmbzeayYTgcDtlsNq/la8uKFSs0Y8YM0+/37v3fa9Wed955GjNmjD777DO3fmk9yRcTE9N0OyUlRfPmzVNVVZXLq2MpsM9fSz/70536x/AXv/iFhg0bptLSUrd+YT3JFxsbq71797psf8OHD2827vTnr7Ky0u3nryNZfbm9eZLPH9ubpxkl329z/tCtppv+8Y9/aN68eXrppZd07rnnunwvISFBb7zxhiRp27ZtOnbsmC677LJmj5GQkKDi4mIdO3ZMx44dU3FxscaNG+eX/J999pkOHTqkG264wXRMTU2NjP+/RMjBgwf10UcfNTsjxReqq6ubbm/atEkWi8XlF/mkQD1/rf3sT3Xq/2Pv3r3617/+pUGDBvk8X0JCgt58801JJ6ZvysrKNHLkyGbj4uPjtWHDBtXV1amxsVFFRUU+e/7Y3jwT7Nucu7rVRYeuueYa9ezZ06W5ly5dqrPOOkv79u1TVlaWKisrdcYZZyg3N1fDhg2TJM2aNUujR4/WTTfdJEnKz89XcXGxpBOvYqZPn+6VfHa7XX/84x/1448/qmfPnurVq5cKCgo0cOBASdITTzyhvn376ne/+53Lcs8//7yio6N1++23q7CwUK+//rp69Oghp9OplJQUTZkyxef5Jk2apNraWoWEhKh3796aMWOGhg4dKik4nr/Wfvan5nv22Wf13nvvNU0V3HvvvUpNTfV5vqNHjyo7O1tfffWVLBaLsrKyNGbMGEmuP19JeuONN7R48WJJ0vXXX6+cnByfTLMEentrTTBsb20Jhm3OG7pVSQAA2qdbTTcBANqHkgAAmKIkAACmKAkAgClKAgBgipIAAJiiJIAA27JlS6tvWGuPQYMG6bvvvvPKYwESJQFo27Ztmjhxoq644gpdffXVmjhxokpLSwMdCwgKXeqzm4D2Onz4sKZOnao5c+Zo3LhxOn78uLZt26bQ0FC3H+Onn35Sjx78KqFrYk8C3VpFRYUkKTExUVarVWFhYRoxYkSrnz+0YsUKTZw4UXl5eRo+fLjy8/O1Z88e3X333Ro+fLiGDx+uRx99VD/++GPTMqNHj9ZLL72kpKQkXXHFFfrtb3+r//znPy0+/rJly3TLLbeoqqpKDQ0Nmj9/vkaNGqXrrrtOOTk5OnbsWNPYxYsXa8SIERoxYoTeeustLz0rwH9REujWLrzwwqbLhb7//vv697//7dZypaWlOu+88/TRRx/pgQcekGEYuv/++7Vp0ya98847qqqqUn5+vssy77zzjhYvXqz33ntPO3bs0IoVK5o97oIFC7Ry5UoVFhaqX79+euaZZ1RRUaHi4mKtX79eNTU1TZe2/OCDD1RQUKCCggKtX79emzdv9vwJAU5DSaBb6927t1577TWFhIToySef1LXXXqupU6dq//79rS4XHR2tu+66Sz169FBYWJguuOACXX/99QoNDVVERITuvfdeffrppy7L3HXXXYqJiVHfvn1144036quvvmr6nmEYmjdvnj766CMtW7ZMERERMgxDy5cv1+OPP66+ffuqd+/euv/++7V69WpJJ0pn/Pjxuvjii3XmmWdq2rRp3n+C0O0xkYpub8CAAXr66aclSbt27VJWVpby8vL07LPPmi5z+rVG9u/frz/84Q/atm2bjhw5IsMw1KdPH5cxUVFRTbd79eqlmpqapq8PHTqk5cuX67nnnlN4eLikE9c3qa+v1/jx45vGGYbRdAnMmpoal4+zP/1aCoA3sCcBnGLAgAEaP368du7c2eq4kJAQl6+fffZZhYSEqKSkRJ999pn+9Kc/qT0fsNynTx+98MILeuyxx7R9+3ZJJy6GFRYWptWrV2vbtm3atm2btm/frs8//1zSib2ZUy/FaXaVNMATlAS6tV27dqmgoKDpetIOh0N2u11Dhgxp1+McOXJEZ555psLDw1VdXd10vYf2GD58uJ555hlNnz5dpaWlslgsSktLU15enmprayWduEDNpk2bJJ24oM7KlSv1zTffqL6+XgsWLGj3OoG2UBLo1nr37q0vvvhCaWlpGjp0qCZMmKCLL75Y2dnZ7XqcadOmqby8XFdeeaXuu+8+3XzzzR3Kc/311ysvL09Tp07Vl19+qaysLF1wwQWaMGGChg0bpkmTJjWdkfWrX/1K99xzj+655x6NHTtW11xzTYfWCbSGiw4BAEyxJwEAMMXZTUALcnJyVFJS0uz+pKQk/f73vw9AIiAwmG4CAJhiugkAYIqSAACYoiQAAKYoCQCAKUoCAGDq/wDoDk0Uz/GBNgAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "df_hr.plot.scatter(x='S_ranked', y='attrition')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0f9b5e52-7e39-424d-b4f9-e07e98992658",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Newborn</th>\n",
       "      <th>mean</th>\n",
       "      <th>sum</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>0.186700</td>\n",
       "      <td>1895</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>0.056757</td>\n",
       "      <td>105</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Newborn      mean   sum\n",
       "0        0  0.186700  1895\n",
       "1        1  0.056757   105"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_hr.groupby(['Newborn'])['left'].agg(['mean', 'sum']).reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3034bf7e-97a4-4159-8ded-8a8214ab8e86",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "*c* argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with *x* & *y*.  Please use the *color* keyword-argument or provide a 2D array with a single row if you intend to specify the same RGB or RGBA value for all points.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<AxesSubplot:xlabel='New Projects', ylabel='Attrition'>"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYkAAAEQCAYAAABFtIg2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8/fFQqAAAACXBIWXMAAAsTAAALEwEAmpwYAAAnIElEQVR4nO3de1xUdf4/8NfMGWa4KZfhNqBCWulo3gHzluaNLkOYVrRktXlp3fypW19LH1YgutkXa8ut1doema3hutWWmkRS6bpFa6aloaFlBoI4Ag5euMrMmfP7w3W+IgwNMHOGmXk9H48eD5j5nHPeH8J5cT7nnM9HIUmSBCIiojYo3V0AERF1XwwJIiKyiyFBRER2MSSIiMguhgQREdnFkCAiIrtkC4mSkhKkp6cjJSUF6enpKC0tbbNdfn4+UlNTYTAYkJqairNnz8pVIhERXUMh13MSDz30EGbOnIm0tDRs374dH3zwATZt2tSizeHDh7F06VL87W9/Q2RkJGpra6FWq6HRaOQokYiIriHLmYTJZEJxcTEMBgMAwGAwoLi4GDU1NS3avf3225g9ezYiIyMBAD169GBAEBG5kSwhYTQaER0dDUEQAACCICAqKgpGo7FFuxMnTqC8vBwPPPAA7r77bqxfvx58IJyIyH1U7i7gaqIo4scff8TGjRvR3NyMuXPnIjY2FtOnT3d3aUREPkmWkNDpdKisrIQoihAEAaIooqqqCjqdrkW72NhY3HbbbVCr1VCr1Zg8eTKKioo6FBLnztXDau342YdWGwyTqa7D23ky9tk3sM++obN9VioVCAsLsvu+LCGh1Wqh1+uRl5eHtLQ05OXlQa/XIzw8vEU7g8GAf//730hLS4PFYsHXX3+NlJSUDh3LapU6FRJXtvU17LNvYJ99gyv6LNstsCtWrEBubi5SUlKQm5uL7OxsAMC8efNw+PBhAMCdd94JrVaLO+64A9OnT8f111+Pe+65R64SiYjoGrLdAisXk6muQ2kqSRLMFivCwoNw4XwDVILvPF8YGdkD1dW17i5DVuyzb2CfHadUKqDVBtt9v1tduJaDVZLwQ0kNDh0/i+OnzuNMTQOsVgkKhQJWq4SgAD/0iQ7GgPgw3DwwGhEhAe4umYjIbXwmJJrNInZ9ewoF+8txySziUrPYssF/T6jqGs0oLj2Hn8rPY0dhKa7vFYK7xiagf58wN1RNRORePhESP5+6gNe2H0F9oxnNFqtD21hECYCEoyfP4UTFBYzsH4lZ0/ojQOMTPzIiIgBeHhKSJGHblyXY+U0ZzA6GQ1uaLVYcOFaFw7/UYMn9w9AnuocTqyQi6r689iqtJEnY/NlPKNjftYC4wixKqGs04383f4cS40UnVEhE1P15bUjk/acUhYeNaDZ3PSCu1tQs4oUtB1F9vtGp+yUi6o68MiTKKmuRt/ek0wPiimaziNe3H4HVu+4eJiJqxetCQrRasX7bEacMMdljlYCKs/XY/d0plx2DiKg78LqQOHT8LC7UNbv8OM1mKz789y8wW8Rfb0xE5KG8LiT2HKzAJbM8H9ySBOw/ViXLsYiI3MHrQqKm7pJsx7pkFvHJvjLZjkdEJDevCwm5Z340mhpkO3MhIpKb14WEKMobEmqVEuVVvjVvPRH5Dq8LCbmJVgnllb412yQR+Q6GRBdZRCvqmyzuLoOIyCUYEl0kSeBDdUTktRgSXSQICmj8BHeXQUTkEgyJLlKrlIiNsL+IOBGRJ/O6kFAq5D2e2WJFfAynDici7+R1IeGnknfoJ1CjQs9AtazHJCKSi9eFhCDjqYSfoMCtI3rJdjwiIrl5XUiMHRIDP0GeoJCgwMRhsbIci4jIHbwuJEYP0kGpdH23VIISSfoohARrXH4sIiJ38bqQCA7ww4MpN0Lj59qu+asFPDDlRpceg4jI3bwuJABg9KAYXB8XApWLhp3UKiXmpQ5EoL/KJfsnIuouvDIkFAoFHrt7MKLCAp0eFGqVEjMm9MXgvlqn7peIqDvyypAAgACNCstnjURsRBDUThp6UquUuPfWfpiW1Mcp+yMi6u5kGy8pKSnBsmXLcP78eYSGhiInJwcJCQkt2rz66qv4+9//jqioKADAiBEjkJWV1eljBvqr8MxDidj2ZQk+O1De6XWv1SolAv1VmJ92E27sHdrpeoiIPI1sIZGVlYWMjAykpaVh+/btyMzMxKZNm1q1mz59OpYuXeq046oEJe6Z2A9JA6KwqeAYTlXXQxStcGRtIo2fAEmSMGFYHGZM6Ms5mojI58gSEiaTCcXFxdi4cSMAwGAwYNWqVaipqUF4eLgcJSA+pgeefTgJp8/W47MD5fj+57OobTBD7aeEJF2+jgFJglUCRKsVOm0Qbh0eh9GDYqBRMxyIyDfJEhJGoxHR0dEQhMsftoIgICoqCkajsVVIfPzxxygsLERkZCQWLlyI4cOHO7WW2IggPHzbAABA4yULyiprUdtgRlCwBk0NzYgOD0RMeCCUck8CRUTUDXWrezjvv/9+zJ8/H35+fvjqq6/w2GOPIT8/H2FhYQ7vQ6sN7tAx+/RyfN/eKDLS9yYnZJ99A/vsHLKEhE6nQ2VlJURRhCAIEEURVVVV0Ol0LdpFRkbavh47dix0Oh2OHz+O5ORkh49lMtXB6sgFh2tERvZAdbVvLUPKPvsG9tk3dLbPSqWi3T+uZbkFVqvVQq/XIy8vDwCQl5cHvV7faqipsrLS9vXRo0dRUVGB6667To4SiYioDbINN61YsQLLli3D+vXr0bNnT+Tk5AAA5s2bh0WLFmHw4MF46aWX8MMPP0CpVMLPzw9r1qxpcXZBRETyUkiSdy3QzOEmx7HPvoF99g0ePdxERESeiSFBRER2MSSIiMiubvWcBBFRZ5QYL+Kz/eU4WVmLxksWqP1UCNAIuHlgNMYN0SHI38/dJXoshgQReSSrJGHvkTPI33sSptommC1W/N9tOM0AAOPZenz4xS8YeWMk7hyTgLiIILfV66kYEkTkccwWEeu2HsGPZedwyWx/dufm/878/M3RSnz3UzV+lzYIw2/gbfUdwWsSRORRLKIVf3r3EI6ebD8grmaVLgfGX7f/gO9+qnZxhd6FIUFEHuWdgh9Raqzt1PowzRYr3vjoB5RV+tYzFF3BkCAij3Gu9hK+/qHSNozUGWaLFdu+LHFiVd6NIUFEHuNfB0+hq1NESACOlNTgQn2zM0ryegwJIvIIFtGK3d9WwCJ2/iziCoUC+PehCidU5f0YEkTkEUqMF2F10lRzZosV/zlyxin78nYMCSLyCLUNZjhzvciGJosT9+a9GBJE5BHMFmuXr0dczRnDVr6AIUFEHiHQXwWFE08lNH6C83bmxRgSROQRosMCYBGddy4Row102r68GUOCiDxCVFggekXaXxynI/z9BKQk93HKvrwdQ4KIPMYdN/eBv7rrw0QqlRJD+mqdUJH3Y0gQkccYdkMEBGXXLkz4qZSYmtgLyi7ux1cwJIjIYwhKJR6bfhP8VJ376FIJCujCA3HbKA41OYohQUQeRZ8QjkfuGNDhoFAJSoT39MeS3wyHn4p3NjmK60kQkce5eWAMegaqsW7rYUgS0NQs2m2rVCigEhToFxeC/zdjMAI0/NjrCP60iMgjDUwIx9qF43DgWDXyvz6J6guNUCoUEK0SlEoFFABEq4SkAVGYltQbfaJ7uLtkj8SQICKP5acSMPqmGIy+KQblVXU4fbYeDU1mhIUGQrKI6N8nDIH+/JjrCv70iMgr9I4KRu+oy89RREb2QHU1FxZyBl64JiIiuxgSRERkF0OCiIjski0kSkpKkJ6ejpSUFKSnp6O0tNRu219++QVDhw5FTk6OXOUREVEbZAuJrKwsZGRkoKCgABkZGcjMzGyznSiKyMrKwpQpU+QqjYiI7JAlJEwmE4qLi2EwGAAABoMBxcXFqKmpadX2jTfewMSJE5GQkCBHaURE1A5ZboE1Go2Ijo6GIFx+FF4QBERFRcFoNCI8PNzW7tixYygsLMSmTZuwfv36Th1Lq+38VMKRkb73sA377BvYZ9/gij53m+ckzGYznn32WTz//PO2MOkMk6kOVmvHFybxxfuq2WffwD77hs72WalUtPvHtSwhodPpUFlZCVEUIQgCRFFEVVUVdDqdrU11dTXKysrw6KOPAgAuXrwISZJQV1eHVatWyVEmERFdQ5aQ0Gq10Ov1yMvLQ1paGvLy8qDX61sMNcXGxmLfvn2271999VU0NDRg6dKlcpRIRERtkO3uphUrViA3NxcpKSnIzc1FdnY2AGDevHk4fPiwXGUQEVEHKCRJct7K4t0Ar0k4jn32Deyzb3DVNQk+cU1ERHYxJIiIyC6GBBER2cWQICIiuxgSRERkF0OCiIjsYkgQEZFdDAkiIrKLIUFERHY5NHdTeXk51q5di6NHj6KhoaHFe3v27HFFXURE1A04FBJLlixB7969sXTpUgQEBLi6JiIi6iYcConjx49jy5YtUCo5OkVE5Esc+tRPSkpCcXGxq2shIqJuxqEzibi4OMydOxdTp05FREREi/cWL17sksKIiMj9HAqJxsZG3HrrrbBYLDhz5oyrayIiom7CoZB4/vnnXV0HERF1Qw4vX1paWoq8vDxUVVUhKioKBoMBCQkJLiyNiIjczaEL17t378aMGTNQUlKCkJAQlJSUYObMmdi1a5er6yMiIjdy6Ezi5Zdfxvr163HzzTfbXtu3bx9WrVqFyZMnu6w4IiJyL4fOJM6cOYPExMQWr40cOZIXsYmIvJxDITFgwAC89dZbLV7buHEj9Hq9S4oiIqLuwaHhphUrVuD3v/89Nm3aBJ1OB6PRiICAALz++uuuro+IiNzIoZDo168f8vPzcejQIdvdTUOHDoWfn5+r6yMiIjdy+BZYlUrV6roEERF5N7shcfvtt+OTTz4BAEyYMAEKhaLNdpwqnIjIe9kNiVWrVtm+fuGFF2QphoiIuhe7IXH10JLJZMLtt9/eqs3OnTtdUxUREXULDt0C+/TTT7f5emZmpsMHKikpQXp6OlJSUpCeno7S0tJWbT744AOkpqYiLS0Nqamp2LRpk8P7JyIi52v3wnV5eTkAQJIk29dXv6dWqx0+UFZWFjIyMpCWlobt27cjMzOzVQikpKRgxowZUCgUqKurQ2pqKpKTkzFgwACHj0NERM7TbkhMnToVCoUCkiRh6tSpLd6LiIjAwoULHTqIyWRCcXExNm7cCAAwGAxYtWoVampqEB4ebmsXHBxs+7qpqQlms9nuBXMiInK9dkPi2LFjAIBZs2YhNze30wcxGo2Ijo6GIAgAAEEQEBUVBaPR2CIkAGDXrl146aWXUFZWhv/5n/9B//79O31cIiLqGoeek+hKQHTU5MmTMXnyZJw+fRoLFizALbfcgr59+zq8vVYb/OuN7IiM7NHpbT0V++wb2Gff4Io+2w2JOXPmYMOGDQCAjIwMu8M+mzdv/tWD6HQ6VFZWQhRFCIIAURRRVVUFnU5nd5vY2FgMHjwYe/bs6VBImEx1sFolh9tfERnZA9XVtR3ezpOxz76BffYNne2zUqlo949ruyExffp029f33ntvhw98Na1WC71ej7y8PKSlpSEvLw96vb7VUNOJEyfQr18/AEBNTQ327duHadOmdenYRETUeXZDIjU1FQAgiiLKysrw+9//vkN3M11rxYoVWLZsGdavX4+ePXsiJycHADBv3jwsWrQIgwcPxrvvvouvvvoKKpUKkiRh1qxZGDduXKePSUREXaOQJOlXx2ZGjRqFvXv3Qql06LEKt+Jwk+PYZ9/APvsGVw03OfSpP336dGzZsqXDByciIs/m0N1NRUVFyM3NxYYNGxATE9PiIrYjF66JiMgzORQS9913H+677z5X10JERN2MQyHRt29fDB06tNXrRUVFTi+IiIi6D4euSTzyyCNtvj537lynFkNERN1Lu2cSVqsVkiS1+O+KsrIy2zQbRETkndoNiYEDB9ouUg8cOLDFe0qlEvPnz3ddZURE5HbthsSuXbsgSRIefPDBFvM3KRQKhIeHw9/f3+UFEhGR+7QbEnFxcQAuzwJ75eurbdy40e71CiIi8nwOXbhet25dm6+/9tprTi2GiIi6l3bPJPbu3Qvg8vxNX3/9dYsL1+Xl5QgKCnJtdURE5FbthsSVta2bm5uxfPly2+sKhQKRkZF49tlnXVsdERG5VbshsXv3bgDAU089hTVr1theP3bsmG2d6kmTJrm2QiIichuHnrhes2YNampqsGPHDmzbtg3Hjh1DYmKi7UyDiIi8U7shYTabsXv3bmzduhWFhYXo06cP7rzzTlRUVGDt2rXQarVy1UlERG7QbkiMHTsWCoUCM2bMwMKFCzFo0CAA4LThREQ+ot1bYPv374/a2lp8//33OHz4MC5cuCBXXURE1A20GxLvvPMOPvvsM4wdOxZvvfUWxo4di/nz56OhoQEWi0WuGomIyE1+9WG6uLg4LFiwAJ9++inefvttREZGQqlU4q677mpxxxMREXkfh+5uuiIxMRGJiYl45pln8Nlnn2Hbtm0uKouIiLqDDoXEFRqNBgaDAQaDwdn1EBFRN+LQ3E1EROSbGBJERGQXQ4KIiOxiSBARkV0MCSIisoshQUREdnXqFtjOKCkpwbJly3D+/HmEhoYiJycHCQkJLdqsW7cO+fn5UCqV8PPzw+OPP47x48fLVSIREV1DtpDIyspCRkYG0tLSbGtRbNq0qUWbIUOGYPbs2QgICMCxY8cwa9YsFBYWwt/fX64yiYjoKrIMN5lMJhQXF9sevjMYDCguLkZNTU2LduPHj0dAQACAy5MLSpKE8+fPy1EiERG1QZaQMBqNiI6OhiAIAABBEBAVFQWj0Wh3m23btqFPnz6IiYmRo0QiImqDbMNNHfHNN9/gz3/+M956660Ob6vVBnf6uJGRPTq9radin30D++wbXNFnWUJCp9OhsrISoihCEASIooiqqirodLpWbQ8ePIgnn3wS69evR9++fTt8LJOpDlar1OHtIiN7oLq6tsPbeTL22Tewz76hs31WKhXt/nEty3CTVquFXq9HXl4eACAvLw96vR7h4eEt2hUVFeHxxx/HK6+8YlsFj4iI3Ee25yRWrFiB3NxcpKSkIDc3F9nZ2QCAefPm4fDhwwCA7OxsNDU1ITMzE2lpaUhLS8OPP/4oV4lERHQNhSRJHR+b6cY43OQ49tk3sM++waOHm4iIyDN1y7ubiKjrLplFNDRZoNL4wWyxwk/Fvwmp4xgSRF7kUrOIr4vP4JN9ZTh7vhEq4XIwmEUrrtP1xO2j4jHsBi0EJQODHMOQIPICVquEf/77BHZ/ewoKhQKXzCIAoNlitbX55fRFbPi4GIJSgZkT+mLi8F7uKpc8CEOCyMOZLSJe+WcRjldcaBEKbWlqvhwe/9j1M06fbcBvptwAhUIhR5nkoXjOSeTBrJKE9VuP4KdTF9Bsbj8grtZsseKLotPYXljiwurIGzAkiDzYl9+fxtGyczD/yhlEW5rNVuzcV4YTFRdcUBl5C4YEkYeSJAkf7z3ZoTOIa5ktl4OCyB6GBJGH+rniAmobzF3ahwTg+xNncbGh2TlFkddhSBB5qM8OnELzf+9i6gqFQoH/HLY/bT/5NoYEkYeqqK6DM+bUMVusKK+qc8KeyBsxJIg81JXbWZ2hrrFrw1bkvRgSRB7KmdNsBKj5yBS1jSFB5KHCemicsh+lAtCG+jtlX+R9GBJEHurW4XHwVwtd3o8gKDHmptarRBIBDAkijzXixkg4Y0aN2IggxEUEdX1H5JUYEkQeSiUocevwOKiEzieFxk+J20f1cWJV5G0YEkQe7I6b4xESpIGyEznhJyiQoOuJkf0jnV8YeQ2GBJEHC/T3w9IHhiM4wA9CB5LCT6VEdHggFt8zhGtLULv420Hk4SJCApA9Oxkx2kBo/Nq/kK1UXA6IgfFheOahRPjz1lf6FfwNIfICIcEarJydjJ/Kz2PnvjL8UFoDlaCEJElQKhSQAIhWCaMGRmNaUm/0irS/8D3R1RgSRF5CoVCgf58w9O8Thgt1l1BirEV9kxmhIQEQzSJu6BWCAA3/yVPH8DeGyAuFBGsw7IbLD9tFRvZAdXWtmysiT8VrEkREZBdDgoiI7GJIEBGRXQwJIiKyS7aQKCkpQXp6OlJSUpCeno7S0tJWbQoLCzFjxgzcdNNNyMnJkas0IiKyQ7aQyMrKQkZGBgoKCpCRkYHMzMxWbXr37o3nnnsOc+bMkassIiJqhywhYTKZUFxcDIPBAAAwGAwoLi5GTU1Ni3bx8fHQ6/VQqXhnLhFRdyBLSBiNRkRHR0MQLk8ZIAgCoqKiYDRy8XUiou7M6/5k12o7P91AZGQPJ1biGdhn38A++wZX9FmWkNDpdKisrIQoihAEAaIooqqqCjqd81fDMpnqYLVKHd7OF59KZZ99A/vsGzrbZ6VS0e4f17IMN2m1Wuj1euTl5QEA8vLyoNfrER4eLsfhiYiok2S7u2nFihXIzc1FSkoKcnNzkZ2dDQCYN28eDh8+DAA4cOAAbrnlFmzcuBH/+Mc/cMstt+DLL7+Uq0QiIrqGQpKkjo/NdGMcbnKcL/X5YkMzLtY3IzQ0EOIlM0KCNe4uSTa+9P/5CvbZcb823OR1F66JrhCtVnz/swn5X59EWWUtVIISCoUCZosInTYId9wcj5H9I6ESOPEAkT0MCfJK1ecbsWbLQdQ1mHHJLAIALKJoe7+8qg5v7zyGLbuO48n7hyGOi/AQtYl/QpHXOXu+ESvf3o+ai022gGjLpWYRF+ub8dw736Kiuk7GCok8B0OCvIpVkvDCPw6i4ZIFjl5ta2oW8cKWgzBbrK4tjsgDMSTIqxz5pQYX680OB8QVl8xWHPixyjVFEXkwhgR5lU/2nWx3iMmeS2YR+XtPuqAiIs/GkCCv0XjJgp9PXej09lXnG2G60OTEiog8H0OCvEZdoxkqQdHp7VVKBS42NDuxIiLPx5AguqLz+ULktRgS5DV6BPrBInZ+AgGLKCEkSO3Eiog8H0OCvIa/WoUbeod0evvosACE9/R3YkVEno8hQV7ljlHx0PgJHd7OXy3gjpvjXVARkWdjSJBXGXhdOEKC1VB28PqC2k/AyP5RrimKyIMxJMirKBUKPHn/cAT4qxwKCgUun0U89Zvh8FPxnwPRtfivgryONsQfWb9NgjbEH/7tDD35qwX0DFbj2YcTERsRJGOFRJ6Ds8CSV4oICcDzj47G4V9M+GRfGU5UXIBKpYTyv1OF944Kxu2j4jHshghOFU7UDoYEeS2lUoGh10dg6PURqG8y42J9M8LCgmBuakaPQN7qSuQIhgT5hCB/PwT5+/nkimVEXcHzbCIisoshQUREdjEkiIjILoYEERHZxZAgIiK7eHeTjzp7oRENFgkapQRByb8ViKhtDAkfc/Z8I9ZtPYLTpnoISgUEpQIP3zYAiQM4bxERtcaQ8CFWq4T//ft3OFd7CZIEmP/7+pt5xYgI9UdCTE+31kdE3Q/HGXzIkZIaNDRZIF2zLo9ZtGLnvjL3FEVE3RpDwofUXGyCaG29cpskAVXnGt1QERF1d7KFRElJCdLT05GSkoL09HSUlpa2aiOKIrKzszFlyhRMnToV77//vlzl+YT4mB5QtDF9tqBU4PpenV/RjYi8l2whkZWVhYyMDBQUFCAjIwOZmZmt2uzYsQNlZWX49NNP8e677+LVV1/FqVOn5CrR6yXE9MB1MT3hJ7RMCrVKiZSkPm6qioi6M1kuXJtMJhQXF2Pjxo0AAIPBgFWrVqGmpgbh4eG2dvn5+bj33nuhVCoRHh6OKVOmYOfOnZg7d67Dx1J2dEkyJ23rGRRY8pvhKPimDPuPVsFstaJfbE+kjr0OkaEB7i5ONt7//7k19tk3dKbPv7aNLCFhNBoRHR0NQbi8AIwgCIiKioLRaGwREkajEbGxsbbvdTodzpw506FjhYV1fvEYrTa409t6kodTb8LDqe6uwn185f/z1dhn3+CKPvPCNRER2SVLSOh0OlRWVkIURQCXL1BXVVVBp9O1anf69Gnb90ajETExMXKUSEREbZAlJLRaLfR6PfLy8gAAeXl50Ov1LYaaAOC2227D+++/D6vVipqaGnz++edISUmRo0QiImqDQpKufbTKNU6cOIFly5bh4sWL6NmzJ3JyctC3b1/MmzcPixYtwuDBgyGKIlauXImvvvoKADBv3jykp6fLUR4REbVBtpAgIiLPwwvXRERkF0OCiIjsYkgQEZFdDAkiIrLL59eTOHfuHJ566imUlZVBrVYjPj4eK1eubHV7rrd57LHHcOrUKSiVSgQGBuLZZ5+FXq93d1ku95e//AWvvvoqduzYgRtvvNHd5bjUpEmToFarodFoAABLlizB+PHj3VyVa126dAmrV6/G3r17odFoMGzYMKxatcrdZbnMqVOnsGDBAtv3tbW1qKurwzfffOO0Y/h8SCgUCsydOxejRo0CAOTk5ODFF1/E6tWr3VyZa+Xk5KBHjx4AgM8//xzLly/H1q1b3VyVa/3www84dOgQ4uLi3F2KbF555RWvD8OrvfDCC9BoNCgoKIBCocDZs2fdXZJL9erVC9u3b7d9/9xzz9keWnYWnx9uCg0NtQUEAAwbNqzFU9/e6kpAAEBdXR0Ubc0h7kWam5uxcuVKrFixwt2lkIvU19dj27ZtWLx4se33OSIiws1Vyae5uRk7duzAzJkznbpfnz+TuJrVasWWLVswadIkd5cii6effhpfffUVJEnCm2++6e5yXOrPf/4z7rrrLvTq1cvdpchqyZIlkCQJI0eOxBNPPIGePb13idry8nKEhobiL3/5C/bt24egoCAsXrwYiYmJ7i5NFrt370Z0dDQGDRrk1P36/JnE1VatWoXAwEDMmjXL3aXI4rnnnsOePXvw+OOPY82aNe4ux2UOHjyII0eOICMjw92lyGrz5s346KOP8MEHH0CSJKxcudLdJbmUKIooLy/HwIED8eGHH2LJkiVYuHAh6urq3F2aLD744AOnn0UADAmbnJwcnDx5EmvXroVS6Vs/lunTp2Pfvn04d+6cu0txif379+PEiROYPHkyJk2ahDNnzmDOnDkoLCx0d2kudWUCTbVajYyMDHz33Xdursi1dDodVCoVDAYDAGDo0KEICwtDSUmJmytzvcrKSuzfvx+pqc5fA8C3Pg3teOmll3DkyBGsW7cOarXa3eW4XH19PYxGo+373bt3IyQkBKGhoe4ryoUeffRRFBYWYvfu3di9ezdiYmKwYcMGjBs3zt2luUxDQwNqa2sBAJIkIT8/3+vvXgsPD8eoUaNsc7+VlJTAZDIhPj7ezZW53tatWzFhwgSEhYU5fd8+f03i+PHj+Otf/4qEhATcf//9AC7fMbBu3To3V+Y6jY2NWLx4MRobG6FUKhESEoLXX3/d6y9e+xKTyYSFCxdCFEVYrVb069cPWVlZ7i7L5bKzs7F8+XLk5ORApVJhzZo1Xn0d5oqtW7fi6aefdsm+OcEfERHZxeEmIiKyiyFBRER2MSSIiMguhgQREdnFkCAiIrsYEkTd0PDhw1FeXu7uMogYEuR9Jk2ahNGjR6OhocH22vvvv48HH3zQ5ccdMmQIhg8fjjFjxmDZsmWor6/v1L4OHjyI3r17d6meBx98EO+//36X9kHEkCCvZLVasWnTJtmP+/rrr+PgwYPYunUrjhw5gtdee61VG4vFIntdRJ3FkCCvNGfOHLz11lu4ePFim++fOHECjzzyCJKTk5GSkoL8/HwAl2cSTUxMhNVqBQA888wzGD16tG27J598Em+//favHj86Ohrjx4/H8ePHAQD9+/fH5s2bMW3aNEybNg0A8N5772Hq1KlITk7G/PnzUVlZadu+f//+OHnyJIDLU0Dn5ORg4sSJGDNmDDIzM9HU1GRr+/nnnyMtLQ0jRozAlClT8MUXX+Dll1/GgQMHsHLlSgwfPhwrV66EJElYvXo1Ro8ejREjRiA1NRU//fRTB36q5IsYEuSVbrrpJiQnJ2PDhg2t3mtoaMDs2bNhMBjwn//8By+//DKys7Px888/o3fv3ggODkZxcTGAy5MDBgYG4sSJE7bvk5OTf/X4RqMRX3zxRYv5kj7//HO89957yM/Px969e/GnP/0Ja9euRWFhIeLi4vDEE0+0ua8XX3wRJSUl2LZtGz799FNUVVXZpo0pKirC0qVL8dRTT+HAgQPYvHkz4uLi8PjjjyMxMRGZmZk4ePAgMjMzUVhYiAMHDqCgoADffvst1q5d67XzdZHzMCTIay1atAi5ubmoqalp8fqePXsQFxeHmTNnQqVSYeDAgUhJScHOnTsBAElJSdi/fz+qq6sBACkpKfjmm29QXl6Ouro6DBgwwO4xFyxYgMTERGRkZCApKQnz58+3vffoo48iNDQU/v7+tsVhBg0aBLVajSeeeAKHDh3CqVOnWuxPkiS89957WL58OUJDQxEcHIzf/e53+PjjjwEA//znPzFz5kyMHTsWSqUS0dHR6NevX5u1qVQq1NfX45dffoEkSejXrx+ioqI6/oMln+LzE/yR97rxxhsxceJEvPHGGy0+OCsqKlBUVNRiMRpRFHHXXXcBAJKTk7Fr1y5ER0cjKSkJo0aNwvbt26HRaJCYmNjuVPLr1q3DmDFj2nzvytTdAFBVVdVicZigoCCEhoaisrKyxcJINTU1aGxsxIwZM2yvSZJkGw4zGo2YMGGCQz+P0aNH44EHHsDKlStRUVGBadOmYenSpQgODnZoe/JNDAnyaosWLcLdd9+N2bNn217T6XRISkrCxo0b29wmKSkJa9asQUxMDJKSkjBy5EhkZWVBo9EgKSmp07VcPctuVFQUKioqbN83NDTg/PnziI6ObrFNWFgY/P398fHHH7d670pfysrKHK7hoYcewkMPPQSTyYQ//OEPePPNN/GHP/yh450hn8HhJvJq8fHxuOOOO/DOO+/YXps4cSJKS0uxbds2mM1mmM1mFBUV2a47JCQkQKPR4KOPPkJycjKCg4Oh1WpRUFDQpZC4msFgwIcffoijR4+iubkZL730EoYMGdJqeVWlUol7770Xq1evhslkAnB5gZkvv/wSAHDPPffgww8/xN69e2G1WlFZWWnrR0RERItnLYqKivD999/DbDYjICAAarXa5xbYoo7jbwh5vQULFrR4ZiI4OBgbNmxAfn4+xo8fj3HjxuHFF19Ec3OzrU1ycjJCQ0NtQ0TJycmQJMlp6wePGTMGixcvxsKFCzFu3DiUl5fj5ZdfbrPtk08+ifj4eNx3330YMWIEfvvb39pWWxsyZAief/55rF69GiNHjsSsWbNw+vRpAJfPGq4E2x//+EfU19fjmWeeQXJyMm699VaEhoZizpw5TukPeS+uJ0HUzVitVuj1evzrX/9CbGysu8shH8czCaJu5qeffoJGo0FERIS7SyFiSBB1JwUFBXj44YexZMkSn1hvnbo/DjcREZFdPJMgIiK7GBJERGQXQ4KIiOxiSBARkV0MCSIisoshQUREdv1/l+6oiqhoDz0AAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "df_hr.groupby(['NP'])['left'].agg(['mean', 'sum']).reset_index().plot.scatter(y='mean',x='NP', s='sum', xlabel='New Projects', ylabel='Attrition',ylim=(0,0.6) )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "91a3053d-25a9-4c9c-881a-78fc43ac486e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"simpletable\">\n",
       "<caption>Logit Regression Results</caption>\n",
       "<tr>\n",
       "  <th>Dep. Variable:</th>         <td>left</td>       <th>  No. Observations:  </th>  <td> 12000</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Model:</th>                 <td>Logit</td>      <th>  Df Residuals:      </th>  <td> 11993</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Method:</th>                 <td>MLE</td>       <th>  Df Model:          </th>  <td>     6</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Date:</th>            <td>Fri, 27 May 2022</td> <th>  Pseudo R-squ.:     </th>  <td>0.2131</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Time:</th>                <td>08:46:06</td>     <th>  Log-Likelihood:    </th> <td> -4254.5</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>converged:</th>             <td>True</td>       <th>  LL-Null:           </th> <td> -5406.7</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Covariance Type:</th>     <td>nonrobust</td>    <th>  LLR p-value:       </th>  <td> 0.000</td> \n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "     <td></td>        <th>coef</th>     <th>std err</th>      <th>z</th>      <th>P>|z|</th>  <th>[0.025</th>    <th>0.975]</th>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>const</th>   <td>   -1.2412</td> <td>    0.160</td> <td>   -7.751</td> <td> 0.000</td> <td>   -1.555</td> <td>   -0.927</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>S</th>       <td>   -3.8163</td> <td>    0.121</td> <td>  -31.607</td> <td> 0.000</td> <td>   -4.053</td> <td>   -3.580</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>LPE</th>     <td>    0.5044</td> <td>    0.181</td> <td>    2.788</td> <td> 0.005</td> <td>    0.150</td> <td>    0.859</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>NP</th>      <td>   -0.3592</td> <td>    0.026</td> <td>  -13.569</td> <td> 0.000</td> <td>   -0.411</td> <td>   -0.307</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>ANH</th>     <td>    0.0038</td> <td>    0.001</td> <td>    6.067</td> <td> 0.000</td> <td>    0.003</td> <td>    0.005</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>TIC</th>     <td>    0.6188</td> <td>    0.027</td> <td>   22.820</td> <td> 0.000</td> <td>    0.566</td> <td>    0.672</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Newborn</th> <td>   -1.4851</td> <td>    0.113</td> <td>  -13.157</td> <td> 0.000</td> <td>   -1.706</td> <td>   -1.264</td>\n",
       "</tr>\n",
       "</table>"
      ],
      "text/plain": [
       "<class 'statsmodels.iolib.summary.Summary'>\n",
       "\"\"\"\n",
       "                           Logit Regression Results                           \n",
       "==============================================================================\n",
       "Dep. Variable:                   left   No. Observations:                12000\n",
       "Model:                          Logit   Df Residuals:                    11993\n",
       "Method:                           MLE   Df Model:                            6\n",
       "Date:                Fri, 27 May 2022   Pseudo R-squ.:                  0.2131\n",
       "Time:                        08:46:06   Log-Likelihood:                -4254.5\n",
       "converged:                       True   LL-Null:                       -5406.7\n",
       "Covariance Type:            nonrobust   LLR p-value:                     0.000\n",
       "==============================================================================\n",
       "                 coef    std err          z      P>|z|      [0.025      0.975]\n",
       "------------------------------------------------------------------------------\n",
       "const         -1.2412      0.160     -7.751      0.000      -1.555      -0.927\n",
       "S             -3.8163      0.121    -31.607      0.000      -4.053      -3.580\n",
       "LPE            0.5044      0.181      2.788      0.005       0.150       0.859\n",
       "NP            -0.3592      0.026    -13.569      0.000      -0.411      -0.307\n",
       "ANH            0.0038      0.001      6.067      0.000       0.003       0.005\n",
       "TIC            0.6188      0.027     22.820      0.000       0.566       0.672\n",
       "Newborn       -1.4851      0.113    -13.157      0.000      -1.706      -1.264\n",
       "==============================================================================\n",
       "\"\"\""
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_hr.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "01476cae-b429-48e3-bd59-6dac6d4294a9",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}