{ "cells": [ { "cell_type": "markdown", "id": "c5c095bd-1cfa-420d-bd59-f778a89eb9c4", "metadata": {}, "source": [ "# Traffic Accidents" ] }, { "cell_type": "code", "execution_count": null, "id": "88bc3fc1-a348-42c8-adcb-f8ad4bb4d94d", "metadata": {}, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2\n", "%matplotlib inline" ] }, { "cell_type": "markdown", "id": "a5ed1c7c-fe94-47f4-8c00-52267c2c989c", "metadata": {}, "source": [ "## Imports" ] }, { "cell_type": "code", "execution_count": null, "id": "00915712-88b3-4e54-bf8b-c97096759ceb", "metadata": {}, "outputs": [], "source": [ "from fastai.vision.all import *\n", "from aiking.data.external import * #We need to import this after fastai modules\n", "import pandas as pd\n", "from sklearn.ensemble import RandomForestRegressor\n", "from sklearn.model_selection import cross_val_score\n", "from sklearn.feature_selection import mutual_info_regression" ] }, { "cell_type": "markdown", "id": "4beee4c8-21b4-4440-9a84-a5687b948a6d", "metadata": {}, "source": [ "## Getting Dataset" ] }, { "cell_type": "code", "execution_count": null, "id": "cbe2bba6-511a-4df0-ac60-6a99c693ee0c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Path('/content/drive/MyDrive/PPV/S_Personal_Study/aiking/data/us-accidents')" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#kaggle datasets download -d sobhanmoosavi/us-accidents\n", "path = untar_data(\"kaggle_datasets::sobhanmoosavi/us-accidents\"); path" ] }, { "cell_type": "code", "execution_count": null, "id": "7c9525c0-f665-4eb0-9230-ac1cc11819bb", "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(path/\"US_Accidents_Dec20_updated.csv\", parse_dates=['Start_Time', 'Weather_Timestamp', 'End_Time'])" ] }, { "cell_type": "code", "execution_count": null, "id": "582913d8-6ad9-4e91-a318-51b2aec0e559", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>0</th>\n", " <th>1</th>\n", " <th>2</th>\n", " <th>3</th>\n", " <th>4</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>ID</th>\n", " <td>A-2716600</td>\n", " <td>A-2716601</td>\n", " <td>A-2716602</td>\n", " <td>A-2716603</td>\n", " <td>A-2716604</td>\n", " </tr>\n", " <tr>\n", " <th>Severity</th>\n", " <td>3</td>\n", " <td>2</td>\n", " <td>2</td>\n", " <td>2</td>\n", " <td>2</td>\n", " </tr>\n", " <tr>\n", " <th>Start_Time</th>\n", " <td>2016-02-08 00:37:08</td>\n", " <td>2016-02-08 05:56:20</td>\n", " <td>2016-02-08 06:15:39</td>\n", " <td>2016-02-08 06:15:39</td>\n", " <td>2016-02-08 06:51:45</td>\n", " </tr>\n", " <tr>\n", " <th>End_Time</th>\n", " <td>2016-02-08 06:37:08</td>\n", " <td>2016-02-08 11:56:20</td>\n", " <td>2016-02-08 12:15:39</td>\n", " <td>2016-02-08 12:15:39</td>\n", " <td>2016-02-08 12:51:45</td>\n", " </tr>\n", " <tr>\n", " <th>Start_Lat</th>\n", " <td>40.10891</td>\n", " <td>39.86542</td>\n", " <td>39.10266</td>\n", " <td>39.10148</td>\n", " <td>41.06213</td>\n", " </tr>\n", " <tr>\n", " <th>Start_Lng</th>\n", " <td>-83.09286</td>\n", " <td>-84.0628</td>\n", " <td>-84.52468</td>\n", " <td>-84.52341</td>\n", " <td>-81.53784</td>\n", " </tr>\n", " <tr>\n", " <th>End_Lat</th>\n", " <td>40.11206</td>\n", " <td>39.86501</td>\n", " <td>39.10209</td>\n", " <td>39.09841</td>\n", " <td>41.06217</td>\n", " </tr>\n", " <tr>\n", " <th>End_Lng</th>\n", " <td>-83.03187</td>\n", " <td>-84.04873</td>\n", " <td>-84.52396</td>\n", " <td>-84.52241</td>\n", " <td>-81.53547</td>\n", " </tr>\n", " <tr>\n", " <th>Distance(mi)</th>\n", " <td>3.23</td>\n", " <td>0.747</td>\n", " <td>0.055</td>\n", " <td>0.219</td>\n", " <td>0.123</td>\n", " </tr>\n", " <tr>\n", " <th>Description</th>\n", " <td>Between Sawmill Rd/Exit 20 and OH-315/Olentangy Riv Rd/Exit 22 - Accident.</td>\n", " <td>At OH-4/OH-235/Exit 41 - Accident.</td>\n", " <td>At I-71/US-50/Exit 1 - Accident.</td>\n", " <td>At I-71/US-50/Exit 1 - Accident.</td>\n", " <td>At Dart Ave/Exit 21 - Accident.</td>\n", " </tr>\n", " <tr>\n", " <th>Number</th>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>Street</th>\n", " <td>Outerbelt E</td>\n", " <td>I-70 E</td>\n", " <td>I-75 S</td>\n", " <td>US-50 E</td>\n", " <td>I-77 N</td>\n", " </tr>\n", " <tr>\n", " <th>Side</th>\n", " <td>R</td>\n", " <td>R</td>\n", " <td>R</td>\n", " <td>R</td>\n", " <td>R</td>\n", " </tr>\n", " <tr>\n", " <th>City</th>\n", " <td>Dublin</td>\n", " <td>Dayton</td>\n", " <td>Cincinnati</td>\n", " <td>Cincinnati</td>\n", " <td>Akron</td>\n", " </tr>\n", " <tr>\n", " <th>County</th>\n", " <td>Franklin</td>\n", " <td>Montgomery</td>\n", " <td>Hamilton</td>\n", " <td>Hamilton</td>\n", " <td>Summit</td>\n", " </tr>\n", " <tr>\n", " <th>State</th>\n", " <td>OH</td>\n", " <td>OH</td>\n", " <td>OH</td>\n", " <td>OH</td>\n", " <td>OH</td>\n", " </tr>\n", " <tr>\n", " <th>Zipcode</th>\n", " <td>43017</td>\n", " <td>45424</td>\n", " <td>45203</td>\n", " <td>45202</td>\n", " <td>44311</td>\n", " </tr>\n", " <tr>\n", " <th>Country</th>\n", " <td>US</td>\n", " <td>US</td>\n", " <td>US</td>\n", " <td>US</td>\n", " <td>US</td>\n", " </tr>\n", " <tr>\n", " <th>Timezone</th>\n", " <td>US/Eastern</td>\n", " <td>US/Eastern</td>\n", " <td>US/Eastern</td>\n", " <td>US/Eastern</td>\n", " <td>US/Eastern</td>\n", " </tr>\n", " <tr>\n", " <th>Airport_Code</th>\n", " <td>KOSU</td>\n", " <td>KFFO</td>\n", " <td>KLUK</td>\n", " <td>KLUK</td>\n", " <td>KAKR</td>\n", " </tr>\n", " <tr>\n", " <th>Weather_Timestamp</th>\n", " <td>2016-02-08 00:53:00</td>\n", " <td>2016-02-08 05:58:00</td>\n", " <td>2016-02-08 05:53:00</td>\n", " <td>2016-02-08 05:53:00</td>\n", " <td>2016-02-08 06:54:00</td>\n", " </tr>\n", " <tr>\n", " <th>Temperature(F)</th>\n", " <td>42.1</td>\n", " <td>36.9</td>\n", " <td>36.0</td>\n", " <td>36.0</td>\n", " <td>39.0</td>\n", " </tr>\n", " <tr>\n", " <th>Wind_Chill(F)</th>\n", " <td>36.1</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>Humidity(%)</th>\n", " <td>58.0</td>\n", " <td>91.0</td>\n", " <td>97.0</td>\n", " <td>97.0</td>\n", " <td>55.0</td>\n", " </tr>\n", " <tr>\n", " <th>Pressure(in)</th>\n", " <td>29.76</td>\n", " <td>29.68</td>\n", " <td>29.7</td>\n", " <td>29.7</td>\n", " <td>29.65</td>\n", " </tr>\n", " <tr>\n", " <th>Visibility(mi)</th>\n", " <td>10.0</td>\n", " <td>10.0</td>\n", " <td>10.0</td>\n", " <td>10.0</td>\n", " <td>10.0</td>\n", " </tr>\n", " <tr>\n", " <th>Wind_Direction</th>\n", " <td>SW</td>\n", " <td>Calm</td>\n", " <td>Calm</td>\n", " <td>Calm</td>\n", " <td>Calm</td>\n", " </tr>\n", " <tr>\n", " <th>Wind_Speed(mph)</th>\n", " <td>10.4</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>Precipitation(in)</th>\n", " <td>0.0</td>\n", " <td>0.02</td>\n", " <td>0.02</td>\n", " <td>0.02</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>Weather_Condition</th>\n", " <td>Light Rain</td>\n", " <td>Light Rain</td>\n", " <td>Overcast</td>\n", " <td>Overcast</td>\n", " <td>Overcast</td>\n", " </tr>\n", " <tr>\n", " <th>Amenity</th>\n", " <td>False</td>\n", " <td>False</td>\n", " <td>False</td>\n", " <td>False</td>\n", " <td>False</td>\n", " </tr>\n", " <tr>\n", " <th>Bump</th>\n", " <td>False</td>\n", " <td>False</td>\n", " <td>False</td>\n", " <td>False</td>\n", " <td>False</td>\n", " </tr>\n", " <tr>\n", " <th>Crossing</th>\n", " <td>False</td>\n", " <td>False</td>\n", " <td>False</td>\n", " <td>False</td>\n", " <td>False</td>\n", " </tr>\n", " <tr>\n", " <th>Give_Way</th>\n", " <td>False</td>\n", " <td>False</td>\n", " <td>False</td>\n", " <td>False</td>\n", " <td>False</td>\n", " </tr>\n", " <tr>\n", " <th>Junction</th>\n", " <td>False</td>\n", " <td>False</td>\n", " <td>True</td>\n", " <td>True</td>\n", " <td>False</td>\n", " </tr>\n", " <tr>\n", " <th>No_Exit</th>\n", " <td>False</td>\n", " <td>False</td>\n", " <td>False</td>\n", " <td>False</td>\n", " <td>False</td>\n", " </tr>\n", " <tr>\n", " <th>Railway</th>\n", " <td>False</td>\n", " <td>False</td>\n", " <td>False</td>\n", " <td>False</td>\n", " <td>False</td>\n", " </tr>\n", " <tr>\n", " <th>Roundabout</th>\n", " <td>False</td>\n", " <td>False</td>\n", " <td>False</td>\n", " <td>False</td>\n", " <td>False</td>\n", " </tr>\n", " <tr>\n", " <th>Station</th>\n", " <td>False</td>\n", " <td>False</td>\n", " <td>False</td>\n", " <td>False</td>\n", " <td>False</td>\n", " </tr>\n", " <tr>\n", " <th>Stop</th>\n", " <td>False</td>\n", " <td>False</td>\n", " <td>False</td>\n", " <td>False</td>\n", " <td>False</td>\n", " </tr>\n", " <tr>\n", " <th>Traffic_Calming</th>\n", " <td>False</td>\n", " <td>False</td>\n", " <td>False</td>\n", " <td>False</td>\n", " <td>False</td>\n", " </tr>\n", " <tr>\n", " <th>Traffic_Signal</th>\n", " <td>False</td>\n", " <td>False</td>\n", " <td>False</td>\n", " <td>False</td>\n", " <td>False</td>\n", " </tr>\n", " <tr>\n", " <th>Turning_Loop</th>\n", " <td>False</td>\n", " <td>False</td>\n", " <td>False</td>\n", " <td>False</td>\n", " <td>False</td>\n", " </tr>\n", " <tr>\n", " <th>Sunrise_Sunset</th>\n", " <td>Night</td>\n", " <td>Night</td>\n", " <td>Night</td>\n", " <td>Night</td>\n", " <td>Night</td>\n", " </tr>\n", " <tr>\n", " <th>Civil_Twilight</th>\n", " <td>Night</td>\n", " <td>Night</td>\n", " <td>Night</td>\n", " <td>Night</td>\n", " <td>Night</td>\n", " </tr>\n", " <tr>\n", " <th>Nautical_Twilight</th>\n", " <td>Night</td>\n", " <td>Night</td>\n", " <td>Night</td>\n", " <td>Night</td>\n", " <td>Day</td>\n", " </tr>\n", " <tr>\n", " <th>Astronomical_Twilight</th>\n", " <td>Night</td>\n", " <td>Night</td>\n", " <td>Day</td>\n", " <td>Day</td>\n", " <td>Day</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " 0 ... 4\n", "ID A-2716600 ... A-2716604\n", "Severity 3 ... 2\n", "Start_Time 2016-02-08 00:37:08 ... 2016-02-08 06:51:45\n", "End_Time 2016-02-08 06:37:08 ... 2016-02-08 12:51:45\n", "Start_Lat 40.10891 ... 41.06213\n", "Start_Lng -83.09286 ... -81.53784\n", "End_Lat 40.11206 ... 41.06217\n", "End_Lng -83.03187 ... -81.53547\n", "Distance(mi) 3.23 ... 0.123\n", "Description Between Sawmill Rd/Exit 20 and OH-315/Olentangy Riv Rd/Exit 22 - Accident. ... At Dart Ave/Exit 21 - Accident.\n", "Number NaN ... NaN\n", "Street Outerbelt E ... I-77 N\n", "Side R ... R\n", "City Dublin ... Akron\n", "County Franklin ... Summit\n", "State OH ... OH\n", "Zipcode 43017 ... 44311\n", "Country US ... US\n", "Timezone US/Eastern ... US/Eastern\n", "Airport_Code KOSU ... KAKR\n", "Weather_Timestamp 2016-02-08 00:53:00 ... 2016-02-08 06:54:00\n", "Temperature(F) 42.1 ... 39.0\n", "Wind_Chill(F) 36.1 ... NaN\n", "Humidity(%) 58.0 ... 55.0\n", "Pressure(in) 29.76 ... 29.65\n", "Visibility(mi) 10.0 ... 10.0\n", "Wind_Direction SW ... Calm\n", "Wind_Speed(mph) 10.4 ... NaN\n", "Precipitation(in) 0.0 ... NaN\n", "Weather_Condition Light Rain ... Overcast\n", "Amenity False ... False\n", "Bump False ... False\n", "Crossing False ... False\n", "Give_Way False ... False\n", "Junction False ... False\n", "No_Exit False ... False\n", "Railway False ... False\n", "Roundabout False ... False\n", "Station False ... False\n", "Stop False ... False\n", "Traffic_Calming False ... False\n", "Traffic_Signal False ... False\n", "Turning_Loop False ... False\n", "Sunrise_Sunset Night ... Night\n", "Civil_Twilight Night ... Night\n", "Nautical_Twilight Night ... Day\n", "Astronomical_Twilight Night ... Day\n", "\n", "[47 rows x 5 columns]" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head().T" ] }, { "cell_type": "code", "execution_count": null, "id": "630eaeda-14cb-4eae-994f-ff7ee4020a6d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "<class 'pandas.core.frame.DataFrame'>\n", "RangeIndex: 1516064 entries, 0 to 1516063\n", "Data columns (total 47 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 ID 1516064 non-null object \n", " 1 Severity 1516064 non-null int64 \n", " 2 Start_Time 1516064 non-null datetime64[ns]\n", " 3 End_Time 1516064 non-null datetime64[ns]\n", " 4 Start_Lat 1516064 non-null float64 \n", " 5 Start_Lng 1516064 non-null float64 \n", " 6 End_Lat 1516064 non-null float64 \n", " 7 End_Lng 1516064 non-null float64 \n", " 8 Distance(mi) 1516064 non-null float64 \n", " 9 Description 1516064 non-null object \n", " 10 Number 469969 non-null float64 \n", " 11 Street 1516064 non-null object \n", " 12 Side 1516064 non-null object \n", " 13 City 1515981 non-null object \n", " 14 County 1516064 non-null object \n", " 15 State 1516064 non-null object \n", " 16 Zipcode 1515129 non-null object \n", " 17 Country 1516064 non-null object \n", " 18 Timezone 1513762 non-null object \n", " 19 Airport_Code 1511816 non-null object \n", " 20 Weather_Timestamp 1485800 non-null datetime64[ns]\n", " 21 Temperature(F) 1473031 non-null float64 \n", " 22 Wind_Chill(F) 1066748 non-null float64 \n", " 23 Humidity(%) 1470555 non-null float64 \n", " 24 Pressure(in) 1479790 non-null float64 \n", " 25 Visibility(mi) 1471853 non-null float64 \n", " 26 Wind_Direction 1474206 non-null object \n", " 27 Wind_Speed(mph) 1387202 non-null float64 \n", " 28 Precipitation(in) 1005515 non-null float64 \n", " 29 Weather_Condition 1472057 non-null object \n", " 30 Amenity 1516064 non-null bool \n", " 31 Bump 1516064 non-null bool \n", " 32 Crossing 1516064 non-null bool \n", " 33 Give_Way 1516064 non-null bool \n", " 34 Junction 1516064 non-null bool \n", " 35 No_Exit 1516064 non-null bool \n", " 36 Railway 1516064 non-null bool \n", " 37 Roundabout 1516064 non-null bool \n", " 38 Station 1516064 non-null bool \n", " 39 Stop 1516064 non-null bool \n", " 40 Traffic_Calming 1516064 non-null bool \n", " 41 Traffic_Signal 1516064 non-null bool \n", " 42 Turning_Loop 1516064 non-null bool \n", " 43 Sunrise_Sunset 1515981 non-null object \n", " 44 Civil_Twilight 1515981 non-null object \n", " 45 Nautical_Twilight 1515981 non-null object \n", " 46 Astronomical_Twilight 1515981 non-null object \n", "dtypes: bool(13), datetime64[ns](3), float64(13), int64(1), object(17)\n", "memory usage: 412.1+ MB\n" ] } ], "source": [ "df.info()" ] }, { "cell_type": "code", "execution_count": null, "id": "e22f9c5b-6035-4168-93a3-24ede90c390c", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:1: FutureWarning: Treating datetime data as categorical rather than numeric in `.describe` is deprecated and will be removed in a future version of pandas. Specify `datetime_is_numeric=True` to silence this warning and adopt the future behavior now.\n", " \"\"\"Entry point for launching an IPython kernel.\n" ] }, { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>count</th>\n", " <th>unique</th>\n", " <th>top</th>\n", " <th>freq</th>\n", " <th>first</th>\n", " <th>last</th>\n", " <th>mean</th>\n", " <th>std</th>\n", " <th>min</th>\n", " <th>25%</th>\n", " <th>50%</th>\n", " <th>75%</th>\n", " <th>max</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>ID</th>\n", " <td>1516064</td>\n", " <td>1516064</td>\n", " <td>A-2716600</td>\n", " <td>1</td>\n", " <td>NaT</td>\n", " <td>NaT</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>Severity</th>\n", " <td>1516064.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaT</td>\n", " <td>NaT</td>\n", " <td>2.23863</td>\n", " <td>0.608148</td>\n", " <td>1.0</td>\n", " <td>2.0</td>\n", " <td>2.0</td>\n", " <td>2.0</td>\n", " <td>4.0</td>\n", " </tr>\n", " <tr>\n", " <th>Start_Time</th>\n", " <td>1516064</td>\n", " <td>1014168</td>\n", " <td>2020-12-16 13:53:25</td>\n", " <td>117</td>\n", " <td>2016-02-08 00:37:08</td>\n", " <td>2020-12-31 23:28:56</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>End_Time</th>\n", " <td>1516064</td>\n", " <td>1141423</td>\n", " <td>2017-05-15 15:22:55</td>\n", " <td>73</td>\n", " <td>2016-02-08 06:37:08</td>\n", " <td>2021-01-01 00:00:00</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>Start_Lat</th>\n", " <td>1516064.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaT</td>\n", " <td>NaT</td>\n", " <td>36.900558</td>\n", " <td>5.165653</td>\n", " <td>24.570222</td>\n", " <td>33.854225</td>\n", " <td>37.35113</td>\n", " <td>40.725927</td>\n", " <td>49.00058</td>\n", " </tr>\n", " <tr>\n", " <th>Start_Lng</th>\n", " <td>1516064.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaT</td>\n", " <td>NaT</td>\n", " <td>-98.599194</td>\n", " <td>18.496022</td>\n", " <td>-124.497567</td>\n", " <td>-118.207575</td>\n", " <td>-94.381003</td>\n", " <td>-80.87469</td>\n", " <td>-67.113167</td>\n", " </tr>\n", " <tr>\n", " <th>End_Lat</th>\n", " <td>1516064.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaT</td>\n", " <td>NaT</td>\n", " <td>36.900606</td>\n", " <td>5.165629</td>\n", " <td>24.57011</td>\n", " <td>33.854204</td>\n", " <td>37.351342</td>\n", " <td>40.72593</td>\n", " <td>49.075</td>\n", " </tr>\n", " <tr>\n", " <th>End_Lng</th>\n", " <td>1516064.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaT</td>\n", " <td>NaT</td>\n", " <td>-98.59901</td>\n", " <td>18.495903</td>\n", " <td>-124.497829</td>\n", " <td>-118.207746</td>\n", " <td>-94.379875</td>\n", " <td>-80.87449</td>\n", " <td>-67.109242</td>\n", " </tr>\n", " <tr>\n", " <th>Distance(mi)</th>\n", " <td>1516064.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaT</td>\n", " <td>NaT</td>\n", " <td>0.587262</td>\n", " <td>1.632659</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.178</td>\n", " <td>0.594</td>\n", " <td>155.186</td>\n", " </tr>\n", " <tr>\n", " <th>Description</th>\n", " <td>1516064</td>\n", " <td>527655</td>\n", " <td>A crash has occurred causing no to minimum delays. Use caution.</td>\n", " <td>2709</td>\n", " <td>NaT</td>\n", " <td>NaT</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>Number</th>\n", " <td>469969.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaT</td>\n", " <td>NaT</td>\n", " <td>8907.533114</td>\n", " <td>22421.89671</td>\n", " <td>0.0</td>\n", " <td>1212.0</td>\n", " <td>4000.0</td>\n", " <td>10100.0</td>\n", " <td>9999997.0</td>\n", " </tr>\n", " <tr>\n", " <th>Street</th>\n", " <td>1516064</td>\n", " <td>93048</td>\n", " <td>I-5 N</td>\n", " <td>26645</td>\n", " <td>NaT</td>\n", " <td>NaT</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>Side</th>\n", " <td>1516064</td>\n", " <td>2</td>\n", " <td>R</td>\n", " <td>1294562</td>\n", " <td>NaT</td>\n", " <td>NaT</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>City</th>\n", " <td>1515981</td>\n", " <td>10657</td>\n", " <td>Los Angeles</td>\n", " <td>39984</td>\n", " <td>NaT</td>\n", " <td>NaT</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>County</th>\n", " <td>1516064</td>\n", " <td>1671</td>\n", " <td>Los Angeles</td>\n", " <td>138819</td>\n", " <td>NaT</td>\n", " <td>NaT</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>State</th>\n", " <td>1516064</td>\n", " <td>49</td>\n", " <td>CA</td>\n", " <td>448833</td>\n", " <td>NaT</td>\n", " <td>NaT</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>Zipcode</th>\n", " <td>1515129</td>\n", " <td>177196</td>\n", " <td>91761</td>\n", " <td>3617</td>\n", " <td>NaT</td>\n", " <td>NaT</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>Country</th>\n", " <td>1516064</td>\n", " <td>1</td>\n", " <td>US</td>\n", " <td>1516064</td>\n", " <td>NaT</td>\n", " <td>NaT</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>Timezone</th>\n", " <td>1513762</td>\n", " <td>4</td>\n", " <td>US/Eastern</td>\n", " <td>587961</td>\n", " <td>NaT</td>\n", " <td>NaT</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>Airport_Code</th>\n", " <td>1511816</td>\n", " <td>1985</td>\n", " <td>KCQT</td>\n", " <td>30149</td>\n", " <td>NaT</td>\n", " <td>NaT</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>Weather_Timestamp</th>\n", " <td>1485800</td>\n", " <td>331748</td>\n", " <td>2020-12-16 13:53:00</td>\n", " <td>468</td>\n", " <td>2016-02-08 00:53:00</td>\n", " <td>2020-12-31 23:35:00</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>Temperature(F)</th>\n", " <td>1473031.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaT</td>\n", " <td>NaT</td>\n", " <td>59.584597</td>\n", " <td>18.273164</td>\n", " <td>-89.0</td>\n", " <td>47.0</td>\n", " <td>61.0</td>\n", " <td>73.0</td>\n", " <td>170.6</td>\n", " </tr>\n", " <tr>\n", " <th>Wind_Chill(F)</th>\n", " <td>1066748.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaT</td>\n", " <td>NaT</td>\n", " <td>55.10976</td>\n", " <td>21.127345</td>\n", " <td>-89.0</td>\n", " <td>40.8</td>\n", " <td>57.0</td>\n", " <td>71.0</td>\n", " <td>113.0</td>\n", " </tr>\n", " <tr>\n", " <th>Humidity(%)</th>\n", " <td>1470555.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaT</td>\n", " <td>NaT</td>\n", " <td>64.659601</td>\n", " <td>23.259865</td>\n", " <td>1.0</td>\n", " <td>48.0</td>\n", " <td>68.0</td>\n", " <td>84.0</td>\n", " <td>100.0</td>\n", " </tr>\n", " <tr>\n", " <th>Pressure(in)</th>\n", " <td>1479790.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaT</td>\n", " <td>NaT</td>\n", " <td>29.554954</td>\n", " <td>1.016756</td>\n", " <td>0.0</td>\n", " <td>29.44</td>\n", " <td>29.88</td>\n", " <td>30.04</td>\n", " <td>58.04</td>\n", " </tr>\n", " <tr>\n", " <th>Visibility(mi)</th>\n", " <td>1471853.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaT</td>\n", " <td>NaT</td>\n", " <td>9.131755</td>\n", " <td>2.889112</td>\n", " <td>0.0</td>\n", " <td>10.0</td>\n", " <td>10.0</td>\n", " <td>10.0</td>\n", " <td>140.0</td>\n", " </tr>\n", " <tr>\n", " <th>Wind_Direction</th>\n", " <td>1474206</td>\n", " <td>24</td>\n", " <td>CALM</td>\n", " <td>202870</td>\n", " <td>NaT</td>\n", " <td>NaT</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>Wind_Speed(mph)</th>\n", " <td>1387202.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaT</td>\n", " <td>NaT</td>\n", " <td>7.630812</td>\n", " <td>5.637364</td>\n", " <td>0.0</td>\n", " <td>4.6</td>\n", " <td>7.0</td>\n", " <td>10.4</td>\n", " <td>984.0</td>\n", " </tr>\n", " <tr>\n", " <th>Precipitation(in)</th>\n", " <td>1005515.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaT</td>\n", " <td>NaT</td>\n", " <td>0.008478</td>\n", " <td>0.129317</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " <td>24.0</td>\n", " </tr>\n", " <tr>\n", " <th>Weather_Condition</th>\n", " <td>1472057</td>\n", " <td>116</td>\n", " <td>Fair</td>\n", " <td>465252</td>\n", " <td>NaT</td>\n", " <td>NaT</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>Amenity</th>\n", " <td>1516064</td>\n", " <td>2</td>\n", " <td>False</td>\n", " <td>1503661</td>\n", " <td>NaT</td>\n", " <td>NaT</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>Bump</th>\n", " <td>1516064</td>\n", " <td>2</td>\n", " <td>False</td>\n", " <td>1515803</td>\n", " <td>NaT</td>\n", " <td>NaT</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>Crossing</th>\n", " <td>1516064</td>\n", " <td>2</td>\n", " <td>False</td>\n", " <td>1429681</td>\n", " <td>NaT</td>\n", " <td>NaT</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>Give_Way</th>\n", " <td>1516064</td>\n", " <td>2</td>\n", " <td>False</td>\n", " <td>1512809</td>\n", " <td>NaT</td>\n", " <td>NaT</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>Junction</th>\n", " <td>1516064</td>\n", " <td>2</td>\n", " <td>False</td>\n", " <td>1311566</td>\n", " <td>NaT</td>\n", " <td>NaT</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>No_Exit</th>\n", " <td>1516064</td>\n", " <td>2</td>\n", " <td>False</td>\n", " <td>1514335</td>\n", " <td>NaT</td>\n", " <td>NaT</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>Railway</th>\n", " <td>1516064</td>\n", " <td>2</td>\n", " <td>False</td>\n", " <td>1503480</td>\n", " <td>NaT</td>\n", " <td>NaT</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>Roundabout</th>\n", " <td>1516064</td>\n", " <td>2</td>\n", " <td>False</td>\n", " <td>1516013</td>\n", " <td>NaT</td>\n", " <td>NaT</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>Station</th>\n", " <td>1516064</td>\n", " <td>2</td>\n", " <td>False</td>\n", " <td>1487917</td>\n", " <td>NaT</td>\n", " <td>NaT</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>Stop</th>\n", " <td>1516064</td>\n", " <td>2</td>\n", " <td>False</td>\n", " <td>1498368</td>\n", " <td>NaT</td>\n", " <td>NaT</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>Traffic_Calming</th>\n", " <td>1516064</td>\n", " <td>2</td>\n", " <td>False</td>\n", " <td>1515575</td>\n", " <td>NaT</td>\n", " <td>NaT</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>Traffic_Signal</th>\n", " <td>1516064</td>\n", " <td>2</td>\n", " <td>False</td>\n", " <td>1346095</td>\n", " <td>NaT</td>\n", " <td>NaT</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>Turning_Loop</th>\n", " <td>1516064</td>\n", " <td>1</td>\n", " <td>False</td>\n", " <td>1516064</td>\n", " <td>NaT</td>\n", " <td>NaT</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>Sunrise_Sunset</th>\n", " <td>1515981</td>\n", " <td>2</td>\n", " <td>Day</td>\n", " <td>909838</td>\n", " <td>NaT</td>\n", " <td>NaT</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>Civil_Twilight</th>\n", " <td>1515981</td>\n", " <td>2</td>\n", " <td>Day</td>\n", " <td>969075</td>\n", " <td>NaT</td>\n", " <td>NaT</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>Nautical_Twilight</th>\n", " <td>1515981</td>\n", " <td>2</td>\n", " <td>Day</td>\n", " <td>1035103</td>\n", " <td>NaT</td>\n", " <td>NaT</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>Astronomical_Twilight</th>\n", " <td>1515981</td>\n", " <td>2</td>\n", " <td>Day</td>\n", " <td>1090946</td>\n", " <td>NaT</td>\n", " <td>NaT</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " count unique ... 75% max\n", "ID 1516064 1516064 ... NaN NaN\n", "Severity 1516064.0 NaN ... 2.0 4.0\n", "Start_Time 1516064 1014168 ... NaN NaN\n", "End_Time 1516064 1141423 ... NaN NaN\n", "Start_Lat 1516064.0 NaN ... 40.725927 49.00058\n", "Start_Lng 1516064.0 NaN ... -80.87469 -67.113167\n", "End_Lat 1516064.0 NaN ... 40.72593 49.075\n", "End_Lng 1516064.0 NaN ... -80.87449 -67.109242\n", "Distance(mi) 1516064.0 NaN ... 0.594 155.186\n", "Description 1516064 527655 ... NaN NaN\n", "Number 469969.0 NaN ... 10100.0 9999997.0\n", "Street 1516064 93048 ... NaN NaN\n", "Side 1516064 2 ... NaN NaN\n", "City 1515981 10657 ... NaN NaN\n", "County 1516064 1671 ... NaN NaN\n", "State 1516064 49 ... NaN NaN\n", "Zipcode 1515129 177196 ... NaN NaN\n", "Country 1516064 1 ... NaN NaN\n", "Timezone 1513762 4 ... NaN NaN\n", "Airport_Code 1511816 1985 ... NaN NaN\n", "Weather_Timestamp 1485800 331748 ... NaN NaN\n", "Temperature(F) 1473031.0 NaN ... 73.0 170.6\n", "Wind_Chill(F) 1066748.0 NaN ... 71.0 113.0\n", "Humidity(%) 1470555.0 NaN ... 84.0 100.0\n", "Pressure(in) 1479790.0 NaN ... 30.04 58.04\n", "Visibility(mi) 1471853.0 NaN ... 10.0 140.0\n", "Wind_Direction 1474206 24 ... NaN NaN\n", "Wind_Speed(mph) 1387202.0 NaN ... 10.4 984.0\n", "Precipitation(in) 1005515.0 NaN ... 0.0 24.0\n", "Weather_Condition 1472057 116 ... NaN NaN\n", "Amenity 1516064 2 ... NaN NaN\n", "Bump 1516064 2 ... NaN NaN\n", "Crossing 1516064 2 ... NaN NaN\n", "Give_Way 1516064 2 ... NaN NaN\n", "Junction 1516064 2 ... NaN NaN\n", "No_Exit 1516064 2 ... NaN NaN\n", "Railway 1516064 2 ... NaN NaN\n", "Roundabout 1516064 2 ... NaN NaN\n", "Station 1516064 2 ... NaN NaN\n", "Stop 1516064 2 ... NaN NaN\n", "Traffic_Calming 1516064 2 ... NaN NaN\n", "Traffic_Signal 1516064 2 ... NaN NaN\n", "Turning_Loop 1516064 1 ... NaN NaN\n", "Sunrise_Sunset 1515981 2 ... NaN NaN\n", "Civil_Twilight 1515981 2 ... NaN NaN\n", "Nautical_Twilight 1515981 2 ... NaN NaN\n", "Astronomical_Twilight 1515981 2 ... NaN NaN\n", "\n", "[47 rows x 13 columns]" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.describe(include='all').T" ] }, { "cell_type": "code", "execution_count": null, "id": "fc040106-5425-49bc-836d-6877fcf74d99", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" } }, "nbformat": 4, "nbformat_minor": 5 }