diff --git a/anova.ipynb b/anova.ipynb deleted file mode 100644 index 3f95c119fe1dc99f17c34589c3a10b85285802d0..0000000000000000000000000000000000000000 --- a/anova.ipynb +++ /dev/null @@ -1,2526 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>accident_index</th>\n", - " <th>accident_year</th>\n", - " <th>accident_reference</th>\n", - " <th>location_easting_osgr</th>\n", - " <th>location_northing_osgr</th>\n", - " <th>longitude</th>\n", - " <th>latitude</th>\n", - " <th>police_force</th>\n", - " <th>accident_severity</th>\n", - " <th>number_of_vehicles</th>\n", - " <th>...</th>\n", - " <th>pedestrian_crossing_physical_facilities</th>\n", - " <th>light_conditions</th>\n", - " <th>weather_conditions</th>\n", - " <th>road_surface_conditions</th>\n", - " <th>special_conditions_at_site</th>\n", - " <th>carriageway_hazards</th>\n", - " <th>urban_or_rural_area</th>\n", - " <th>did_police_officer_attend_scene_of_accident</th>\n", - " <th>trunk_road_flag</th>\n", - " <th>lsoa_of_accident_location</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>197901A11AD14</td>\n", - " <td>1979</td>\n", - " <td>01A11AD14</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>1</td>\n", - " <td>3</td>\n", - " <td>2</td>\n", - " <td>...</td>\n", - " <td>-1</td>\n", - " <td>1</td>\n", - " <td>8</td>\n", - " <td>1</td>\n", - " <td>-1</td>\n", - " <td>0</td>\n", - " <td>-1</td>\n", - " <td>-1</td>\n", - " <td>-1</td>\n", - " <td>-1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>197901A1BAW34</td>\n", - " <td>1979</td>\n", - " <td>01A1BAW34</td>\n", - " <td>198460.0</td>\n", - " <td>894000.0</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>1</td>\n", - " <td>3</td>\n", - " <td>1</td>\n", - " <td>...</td>\n", - " <td>-1</td>\n", - " <td>4</td>\n", - " <td>8</td>\n", - " <td>3</td>\n", - " <td>-1</td>\n", - " <td>0</td>\n", - " <td>-1</td>\n", - " <td>-1</td>\n", - " <td>-1</td>\n", - " <td>-1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>197901A1BFD77</td>\n", - " <td>1979</td>\n", - " <td>01A1BFD77</td>\n", - " <td>406380.0</td>\n", - " <td>307000.0</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>1</td>\n", - " <td>3</td>\n", - " <td>2</td>\n", - " <td>...</td>\n", - " <td>-1</td>\n", - " <td>4</td>\n", - " <td>8</td>\n", - " <td>3</td>\n", - " <td>-1</td>\n", - " <td>0</td>\n", - " <td>-1</td>\n", - " <td>-1</td>\n", - " <td>-1</td>\n", - " <td>-1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>197901A1BGC20</td>\n", - " <td>1979</td>\n", - " <td>01A1BGC20</td>\n", - " <td>281680.0</td>\n", - " <td>440000.0</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>1</td>\n", - " <td>3</td>\n", - " <td>2</td>\n", - " <td>...</td>\n", - " <td>-1</td>\n", - " <td>4</td>\n", - " <td>8</td>\n", - " <td>3</td>\n", - " <td>-1</td>\n", - " <td>0</td>\n", - " <td>-1</td>\n", - " <td>-1</td>\n", - " <td>-1</td>\n", - " <td>-1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>197901A1BGF95</td>\n", - " <td>1979</td>\n", - " <td>01A1BGF95</td>\n", - " <td>153960.0</td>\n", - " <td>795000.0</td>\n", - " <td>NaN</td>\n", - " <td>NaN</td>\n", - " <td>1</td>\n", - " <td>2</td>\n", - " <td>2</td>\n", - " <td>...</td>\n", - " <td>-1</td>\n", - " <td>4</td>\n", - " <td>3</td>\n", - " <td>3</td>\n", - " <td>-1</td>\n", - " <td>0</td>\n", - " <td>-1</td>\n", - " <td>-1</td>\n", - " <td>-1</td>\n", - " <td>-1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>...</th>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " </tr>\n", - " <tr>\n", - " <th>8602819</th>\n", - " <td>2020991027064</td>\n", - " <td>2020</td>\n", - " <td>991027064</td>\n", - " <td>343034.0</td>\n", - " <td>731654.0</td>\n", - " <td>-2.926320</td>\n", - " <td>56.473539</td>\n", - " <td>99</td>\n", - " <td>2</td>\n", - " <td>2</td>\n", - " <td>...</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>-1</td>\n", - " <td>-1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>8602820</th>\n", - " <td>2020991029573</td>\n", - " <td>2020</td>\n", - " <td>991029573</td>\n", - " <td>257963.0</td>\n", - " <td>658891.0</td>\n", - " <td>-4.267565</td>\n", - " <td>55.802353</td>\n", - " <td>99</td>\n", - " <td>3</td>\n", - " <td>1</td>\n", - " <td>...</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>2</td>\n", - " <td>-1</td>\n", - " <td>-1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>8602821</th>\n", - " <td>2020991030297</td>\n", - " <td>2020</td>\n", - " <td>991030297</td>\n", - " <td>383664.0</td>\n", - " <td>810646.0</td>\n", - " <td>-2.271903</td>\n", - " <td>57.186317</td>\n", - " <td>99</td>\n", - " <td>2</td>\n", - " <td>2</td>\n", - " <td>...</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>2</td>\n", - " <td>1</td>\n", - " <td>-1</td>\n", - " <td>-1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>8602822</th>\n", - " <td>2020991030900</td>\n", - " <td>2020</td>\n", - " <td>991030900</td>\n", - " <td>277161.0</td>\n", - " <td>674852.0</td>\n", - " <td>-3.968753</td>\n", - " <td>55.950940</td>\n", - " <td>99</td>\n", - " <td>3</td>\n", - " <td>2</td>\n", - " <td>...</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>2</td>\n", - " <td>-1</td>\n", - " <td>-1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>8602823</th>\n", - " <td>2020991032575</td>\n", - " <td>2020</td>\n", - " <td>991032575</td>\n", - " <td>240402.0</td>\n", - " <td>681950.0</td>\n", - " <td>-4.561040</td>\n", - " <td>56.003843</td>\n", - " <td>99</td>\n", - " <td>3</td>\n", - " <td>1</td>\n", - " <td>...</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>2</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>-1</td>\n", - " <td>-1</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "<p>8602824 rows × 36 columns</p>\n", - "</div>" - ], - "text/plain": [ - " accident_index accident_year accident_reference \\\n", - "0 197901A11AD14 1979 01A11AD14 \n", - "1 197901A1BAW34 1979 01A1BAW34 \n", - "2 197901A1BFD77 1979 01A1BFD77 \n", - "3 197901A1BGC20 1979 01A1BGC20 \n", - "4 197901A1BGF95 1979 01A1BGF95 \n", - "... ... ... ... \n", - "8602819 2020991027064 2020 991027064 \n", - "8602820 2020991029573 2020 991029573 \n", - "8602821 2020991030297 2020 991030297 \n", - "8602822 2020991030900 2020 991030900 \n", - "8602823 2020991032575 2020 991032575 \n", - "\n", - " location_easting_osgr location_northing_osgr longitude latitude \\\n", - "0 NaN NaN NaN NaN \n", - "1 198460.0 894000.0 NaN NaN \n", - "2 406380.0 307000.0 NaN NaN \n", - "3 281680.0 440000.0 NaN NaN \n", - "4 153960.0 795000.0 NaN NaN \n", - "... ... ... ... ... \n", - "8602819 343034.0 731654.0 -2.926320 56.473539 \n", - "8602820 257963.0 658891.0 -4.267565 55.802353 \n", - "8602821 383664.0 810646.0 -2.271903 57.186317 \n", - "8602822 277161.0 674852.0 -3.968753 55.950940 \n", - "8602823 240402.0 681950.0 -4.561040 56.003843 \n", - "\n", - " police_force accident_severity number_of_vehicles ... \\\n", - "0 1 3 2 ... \n", - "1 1 3 1 ... \n", - "2 1 3 2 ... \n", - "3 1 3 2 ... \n", - "4 1 2 2 ... \n", - "... ... ... ... ... \n", - "8602819 99 2 2 ... \n", - "8602820 99 3 1 ... \n", - "8602821 99 2 2 ... \n", - "8602822 99 3 2 ... \n", - "8602823 99 3 1 ... \n", - "\n", - " pedestrian_crossing_physical_facilities light_conditions \\\n", - "0 -1 1 \n", - "1 -1 4 \n", - "2 -1 4 \n", - "3 -1 4 \n", - "4 -1 4 \n", - "... ... ... \n", - "8602819 0 1 \n", - "8602820 0 1 \n", - "8602821 0 1 \n", - "8602822 0 1 \n", - "8602823 0 1 \n", - "\n", - " weather_conditions road_surface_conditions \\\n", - "0 8 1 \n", - "1 8 3 \n", - "2 8 3 \n", - "3 8 3 \n", - "4 3 3 \n", - "... ... ... \n", - "8602819 1 1 \n", - "8602820 1 1 \n", - "8602821 1 1 \n", - "8602822 1 1 \n", - "8602823 1 1 \n", - "\n", - " special_conditions_at_site carriageway_hazards urban_or_rural_area \\\n", - "0 -1 0 -1 \n", - "1 -1 0 -1 \n", - "2 -1 0 -1 \n", - "3 -1 0 -1 \n", - "4 -1 0 -1 \n", - "... ... ... ... \n", - "8602819 0 0 1 \n", - "8602820 0 0 1 \n", - "8602821 0 0 2 \n", - "8602822 0 0 1 \n", - "8602823 0 2 1 \n", - "\n", - " did_police_officer_attend_scene_of_accident trunk_road_flag \\\n", - "0 -1 -1 \n", - "1 -1 -1 \n", - "2 -1 -1 \n", - "3 -1 -1 \n", - "4 -1 -1 \n", - "... ... ... \n", - "8602819 1 -1 \n", - "8602820 2 -1 \n", - "8602821 1 -1 \n", - "8602822 2 -1 \n", - "8602823 1 -1 \n", - "\n", - " lsoa_of_accident_location \n", - "0 -1 \n", - "1 -1 \n", - "2 -1 \n", - "3 -1 \n", - "4 -1 \n", - "... ... \n", - "8602819 -1 \n", - "8602820 -1 \n", - "8602821 -1 \n", - "8602822 -1 \n", - "8602823 -1 \n", - "\n", - "[8602824 rows x 36 columns]" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import pandas as pd\n", - "# import the usual suspects ...\n", - "import pandas as pd\n", - "import numpy as np\n", - "import glob\n", - "\n", - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "\n", - "# suppress all warnings\n", - "import warnings\n", - "warnings.filterwarnings(\"ignore\")\n", - "accidents = pd.read_csv ('/Users/Asfandyar/Downloads/dft-road-casualty-statistics-accident-1979-2020.csv')\n", - "accidents" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 False\n", - "1 False\n", - "2 False\n", - "3 False\n", - "4 False\n", - " ... \n", - "8602819 False\n", - "8602820 False\n", - "8602821 False\n", - "8602822 False\n", - "8602823 False\n", - "Name: accident_year, Length: 8602824, dtype: bool" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "accidents\n", - "mask = (accidents['accident_year'] > 2004) & (accidents['accident_year'] <= 2019)\n", - "mask" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "Acc=accidents.loc[mask]" - ] - }, - { - "cell_type": "code", - "execution_count": 253, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>accident_severity</th>\n", - " <th>first_road_class</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>2</td>\n", - " <td>3</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>3</td>\n", - " <td>4</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>3</td>\n", - " <td>5</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>3</td>\n", - " <td>3</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>3</td>\n", - " <td>6</td>\n", - " </tr>\n", - " <tr>\n", - " <th>...</th>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " </tr>\n", - " <tr>\n", - " <th>9995</th>\n", - " <td>3</td>\n", - " <td>3</td>\n", - " </tr>\n", - " <tr>\n", - " <th>9996</th>\n", - " <td>3</td>\n", - " <td>3</td>\n", - " </tr>\n", - " <tr>\n", - " <th>9997</th>\n", - " <td>3</td>\n", - " <td>3</td>\n", - " </tr>\n", - " <tr>\n", - " <th>9998</th>\n", - " <td>2</td>\n", - " <td>3</td>\n", - " </tr>\n", - " <tr>\n", - " <th>9999</th>\n", - " <td>3</td>\n", - " <td>3</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "<p>10000 rows × 2 columns</p>\n", - "</div>" - ], - "text/plain": [ - " accident_severity first_road_class\n", - "0 2 3\n", - "1 3 4\n", - "2 3 5\n", - "3 3 3\n", - "4 3 6\n", - "... ... ...\n", - "9995 3 3\n", - "9996 3 3\n", - "9997 3 3\n", - "9998 2 3\n", - "9999 3 3\n", - "\n", - "[10000 rows x 2 columns]" - ] - }, - "execution_count": 253, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "AccRT = Acc[['accident_severity', 'first_road_class']][:10000]\n", - "AccRT = AccRT.reset_index(drop=True)\n", - "AccRT" - ] - }, - { - "cell_type": "code", - "execution_count": 279, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "severity~first_road_class\n", - "F_onewayResult(statistic=3.291830175326872, pvalue=0.0372058822331273)\n", - " sum_sq df F PR(>F)\n", - "first_road_class 0.667436 1.0 4.602263 0.031942\n", - "Residual 2900.179764 19998.0 NaN NaN\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "import scipy.stats as stats\n", - "np.random.seed(12)\n", - "AccRT = Acc[['accident_severity', 'first_road_class']][:20000]\n", - "AccRT = AccRT.reset_index(drop=True)\n", - "#AccRT\n", - "AccRT['accident_severity']=AccRT['accident_severity'].replace({1: 'Fatal', 2: 'Serious', 3: 'Slight'})\n", - "#AccRT\n", - "\n", - "\n", - "# Group age data by race\n", - "severity_frame = pd.DataFrame({\"severity\":AccRT['accident_severity'],\"first_road_class\":AccRT['first_road_class']})\n", - "groups = severity_frame.groupby(\"severity\").groups\n", - "\n", - "# Etract individual groups\n", - "Fatal = AccRT['first_road_class'].to_numpy()[groups[\"Fatal\"]]\n", - "Serious = AccRT['first_road_class'].to_numpy()[groups[\"Serious\"]]\n", - "Slight = AccRT['first_road_class'].to_numpy()[groups[\"Slight\"]]\n", - "\n", - "\n", - "# Perform the ANOVA\n", - "print('severity~first_road_class')\n", - "print(stats.f_oneway(Fatal,Serious,Slight))\n", - "AccRT = Acc[['accident_severity', 'first_road_class']][:20000]\n", - "AccRT = AccRT.reset_index(drop=True)\n", - "from statsmodels.formula.api import ols\n", - "import statsmodels.api as sm\n", - "severity_frame = pd.DataFrame({\"severity\":AccRT['accident_severity'],\"first_road_class\":AccRT['first_road_class']})\n", - "severity_frame\n", - "model = ols('severity~first_road_class', # Model formula\n", - " data = severity_frame).fit()\n", - " \n", - "anova_result = sm.stats.anova_lm(model, typ=2)\n", - "print (anova_result)\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 280, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "severity~road_type\n", - "F_onewayResult(statistic=0.5792752078545762, pvalue=0.5603137255090602)\n", - " sum_sq df F PR(>F)\n", - "road_type 0.121945 1.0 0.840706 0.359206\n", - "Residual 2900.725255 19998.0 NaN NaN\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "import scipy.stats as stats\n", - "np.random.seed(12)\n", - "AccRT = Acc[['accident_severity', 'road_type']][:20000]\n", - "AccRT = AccRT.reset_index(drop=True)\n", - "#AccRT\n", - "AccRT['accident_severity']=AccRT['accident_severity'].replace({1: 'Fatal', 2: 'Serious', 3: 'Slight'})\n", - "#AccRT\n", - "\n", - "\n", - "# Group age data by race\n", - "severity_frame = pd.DataFrame({\"severity\":AccRT['accident_severity'],\"road_type\":AccRT['road_type']})\n", - "groups = severity_frame.groupby(\"severity\").groups\n", - "\n", - "# Etract individual groups\n", - "Fatal = AccRT['road_type'].to_numpy()[groups[\"Fatal\"]]\n", - "Serious = AccRT['road_type'].to_numpy()[groups[\"Serious\"]]\n", - "Slight = AccRT['road_type'].to_numpy()[groups[\"Slight\"]]\n", - "\n", - "\n", - "# Perform the ANOVA\n", - "print('severity~road_type')\n", - "print(stats.f_oneway(Fatal,Serious,Slight))\n", - "AccRT = Acc[['accident_severity', 'road_type']][:20000]\n", - "AccRT = AccRT.reset_index(drop=True)\n", - "from statsmodels.formula.api import ols\n", - "import statsmodels.api as sm\n", - "severity_frame = pd.DataFrame({\"severity\":AccRT['accident_severity'],\"road_type\":AccRT['road_type']})\n", - "severity_frame\n", - "model = ols('severity~road_type', # Model formula\n", - " data = severity_frame).fit()\n", - " \n", - "anova_result = sm.stats.anova_lm(model, typ=2)\n", - "print (anova_result)\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 60, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " sum_sq df F PR(>F)\n", - "junction_detail 3.204045 1.0 22.11262 0.000003\n", - "Residual 2897.643155 19998.0 NaN NaN\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "import scipy.stats as stats\n", - "np.random.seed(12)\n", - "AccRT = Acc[['accident_severity', 'junction_detail']][:20000]\n", - "AccRT = AccRT.reset_index(drop=True)\n", - "#AccRT\n", - "AccRT['accident_severity']=AccRT['accident_severity'].replace({1: 'Fatal', 2: 'Serious', 3: 'Slight'})\n", - "\n", - "\n", - "\n", - "AccRT = Acc[['accident_severity', 'junction_detail']][:20000]\n", - "AccRT = AccRT.reset_index(drop=True)\n", - "from statsmodels.formula.api import ols\n", - "import statsmodels.api as sm\n", - "severity_frame = pd.DataFrame({\"severity\":AccRT['accident_severity'],\"junction_detail\":AccRT['junction_detail']})\n", - "severity_frame\n", - "model = ols('severity~junction_detail', # Model formula\n", - " data = severity_frame).fit()\n", - " \n", - "anova_result = sm.stats.anova_lm(model, typ=2)\n", - "print (anova_result)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 63, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 -1\n", - "1 2\n", - "2 -1\n", - "3 -1\n", - "4 -1\n", - " ..\n", - "2287422 -1\n", - "2287423 -1\n", - "2287424 4\n", - "2287425 4\n", - "2287426 -1\n", - "Name: junction_control, Length: 2287427, dtype: int64" - ] - }, - "execution_count": 63, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "Acc['junction_control']" - ] - }, - { - "cell_type": "code", - "execution_count": 68, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " sum_sq df F PR(>F)\n", - "junction_control 1756.967410 1.0 10523.832139 0.0\n", - "Residual 381888.567304 2287425.0 NaN NaN\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "import scipy.stats as stats\n", - "np.random.seed(12)\n", - "\n", - "\n", - "\n", - "# Group age data by \n", - "AccRT = Acc[['accident_severity', 'junction_control']]\n", - "AccRT = AccRT.reset_index(drop=True)\n", - "from statsmodels.formula.api import ols\n", - "import statsmodels.api as sm\n", - "severity_frame = pd.DataFrame({\"severity\":AccRT['accident_severity'],\"junction_control\":AccRT['junction_control']})\n", - "severity_frame\n", - "model = ols('severity~junction_control', # Model formula\n", - " data = severity_frame).fit()\n", - " \n", - "anova_result = sm.stats.anova_lm(model, typ=2)\n", - "print (anova_result)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 71, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['accident_index', 'accident_year', 'accident_reference', 'location_easting_osgr', 'location_northing_osgr', 'longitude', 'latitude', 'police_force', 'accident_severity', 'number_of_vehicles', 'number_of_casualties', 'date', 'day_of_week', 'time', 'local_authority_district', 'local_authority_ons_district', 'local_authority_highway', 'first_road_class', 'first_road_number', 'road_type', 'speed_limit', 'junction_detail', 'junction_control', 'second_road_class', 'second_road_number', 'pedestrian_crossing_human_control', 'pedestrian_crossing_physical_facilities', 'light_conditions', 'weather_conditions', 'road_surface_conditions', 'special_conditions_at_site', 'carriageway_hazards', 'urban_or_rural_area', 'did_police_officer_attend_scene_of_accident', 'trunk_road_flag', 'lsoa_of_accident_location']\n" - ] - } - ], - "source": [ - "print(Acc.columns.tolist())" - ] - }, - { - "cell_type": "code", - "execution_count": 70, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " sum_sq df F PR(>F)\n", - "day_of_week 4.208090 1.0 25.090335 5.471043e-07\n", - "Residual 383641.326624 2287425.0 NaN NaN\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "import scipy.stats as stats\n", - "np.random.seed(12)\n", - "\n", - "\n", - "\n", - "# Group age data by \n", - "AccRT = Acc[['accident_severity', 'day_of_week']]\n", - "AccRT = AccRT.reset_index(drop=True)\n", - "from statsmodels.formula.api import ols\n", - "import statsmodels.api as sm\n", - "severity_frame = pd.DataFrame({\"severity\":AccRT['accident_severity'],\"day_of_week\":AccRT['day_of_week']})\n", - "severity_frame\n", - "model = ols('severity~day_of_week', # Model formula\n", - " data = severity_frame).fit()\n", - " \n", - "anova_result = sm.stats.anova_lm(model, typ=2)\n", - "print (anova_result)" - ] - }, - { - "cell_type": "code", - "execution_count": 75, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " sum_sq df F PR(>F)\n", - "light_conditions 1403.800907 1.0 8400.676862 0.0\n", - "Residual 382241.733807 2287425.0 NaN NaN\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "import scipy.stats as stats\n", - "np.random.seed(12)\n", - "\n", - "\n", - "\n", - "# Group age data by \n", - "AccRT = Acc[['accident_severity', 'light_conditions']]\n", - "AccRT = AccRT.reset_index(drop=True)\n", - "from statsmodels.formula.api import ols\n", - "import statsmodels.api as sm\n", - "severity_frame = pd.DataFrame({\"severity\":AccRT['accident_severity'],\"light_conditions\":AccRT['light_conditions']})\n", - "severity_frame\n", - "model = ols('severity~light_conditions', # Model formula\n", - " data = severity_frame).fit()\n", - " \n", - "anova_result = sm.stats.anova_lm(model, typ=2)\n", - "print (anova_result)" - ] - }, - { - "cell_type": "code", - "execution_count": 76, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " sum_sq df F PR(>F)\n", - "weather_conditions 190.013728 1.0 1133.487794 2.002963e-248\n", - "Residual 383455.520986 2287425.0 NaN NaN\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "import scipy.stats as stats\n", - "np.random.seed(12)\n", - "\n", - "\n", - "\n", - "# Group age data by \n", - "AccRT = Acc[['accident_severity', 'weather_conditions']]\n", - "AccRT = AccRT.reset_index(drop=True)\n", - "from statsmodels.formula.api import ols\n", - "import statsmodels.api as sm\n", - "severity_frame = pd.DataFrame({\"severity\":AccRT['accident_severity'],\"weather_conditions\":AccRT['weather_conditions']})\n", - "severity_frame\n", - "model = ols('severity~weather_conditions', # Model formula\n", - " data = severity_frame).fit()\n", - " \n", - "anova_result = sm.stats.anova_lm(model, typ=2)\n", - "print (anova_result)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "# import the usual suspects ...\n", - "import pandas as pd\n", - "import numpy as np\n", - "import glob\n", - "\n", - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "vechilee = pd.read_csv ('/Users/Asfandyar/Downloads/archive/Vehicle_Information.csv',encoding= 'unicode_escape')\n" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>accident_index</th>\n", - " <th>accident_year</th>\n", - " <th>accident_reference</th>\n", - " <th>location_easting_osgr</th>\n", - " <th>location_northing_osgr</th>\n", - " <th>longitude</th>\n", - " <th>latitude</th>\n", - " <th>police_force</th>\n", - " <th>accident_severity</th>\n", - " <th>number_of_vehicles</th>\n", - " <th>...</th>\n", - " <th>pedestrian_crossing_physical_facilities</th>\n", - " <th>light_conditions</th>\n", - " <th>weather_conditions</th>\n", - " <th>road_surface_conditions</th>\n", - " <th>special_conditions_at_site</th>\n", - " <th>carriageway_hazards</th>\n", - " <th>urban_or_rural_area</th>\n", - " <th>did_police_officer_attend_scene_of_accident</th>\n", - " <th>trunk_road_flag</th>\n", - " <th>lsoa_of_accident_location</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>6224198</th>\n", - " <td>200501BS00001</td>\n", - " <td>2005</td>\n", - " <td>01BS00001</td>\n", - " <td>525680.0</td>\n", - " <td>178240.0</td>\n", - " <td>-0.191170</td>\n", - " <td>51.489096</td>\n", - " <td>1</td>\n", - " <td>2</td>\n", - " <td>1</td>\n", - " <td>...</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>2</td>\n", - " <td>2</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>2</td>\n", - " <td>E01002849</td>\n", - " </tr>\n", - " <tr>\n", - " <th>6224199</th>\n", - " <td>200501BS00002</td>\n", - " <td>2005</td>\n", - " <td>01BS00002</td>\n", - " <td>524170.0</td>\n", - " <td>181650.0</td>\n", - " <td>-0.211708</td>\n", - " <td>51.520075</td>\n", - " <td>1</td>\n", - " <td>3</td>\n", - " <td>1</td>\n", - " <td>...</td>\n", - " <td>5</td>\n", - " <td>4</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>2</td>\n", - " <td>E01002909</td>\n", - " </tr>\n", - " <tr>\n", - " <th>6224200</th>\n", - " <td>200501BS00003</td>\n", - " <td>2005</td>\n", - " <td>01BS00003</td>\n", - " <td>524520.0</td>\n", - " <td>182240.0</td>\n", - " <td>-0.206458</td>\n", - " <td>51.525301</td>\n", - " <td>1</td>\n", - " <td>3</td>\n", - " <td>2</td>\n", - " <td>...</td>\n", - " <td>0</td>\n", - " <td>4</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>2</td>\n", - " <td>E01002857</td>\n", - " </tr>\n", - " <tr>\n", - " <th>6224201</th>\n", - " <td>200501BS00004</td>\n", - " <td>2005</td>\n", - " <td>01BS00004</td>\n", - " <td>526900.0</td>\n", - " <td>177530.0</td>\n", - " <td>-0.173862</td>\n", - " <td>51.482442</td>\n", - " <td>1</td>\n", - " <td>3</td>\n", - " <td>1</td>\n", - " <td>...</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>2</td>\n", - " <td>E01002840</td>\n", - " </tr>\n", - " <tr>\n", - " <th>6224202</th>\n", - " <td>200501BS00005</td>\n", - " <td>2005</td>\n", - " <td>01BS00005</td>\n", - " <td>528060.0</td>\n", - " <td>179040.0</td>\n", - " <td>-0.156618</td>\n", - " <td>51.495752</td>\n", - " <td>1</td>\n", - " <td>3</td>\n", - " <td>1</td>\n", - " <td>...</td>\n", - " <td>0</td>\n", - " <td>7</td>\n", - " <td>1</td>\n", - " <td>2</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>2</td>\n", - " <td>E01002863</td>\n", - " </tr>\n", - " <tr>\n", - " <th>...</th>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " </tr>\n", - " <tr>\n", - " <th>8511620</th>\n", - " <td>2019984106919</td>\n", - " <td>2019</td>\n", - " <td>984106919</td>\n", - " <td>312635.0</td>\n", - " <td>573392.0</td>\n", - " <td>-3.368899</td>\n", - " <td>55.047323</td>\n", - " <td>98</td>\n", - " <td>3</td>\n", - " <td>1</td>\n", - " <td>...</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>2</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>2</td>\n", - " <td>1</td>\n", - " <td>-1</td>\n", - " <td>-1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>8511621</th>\n", - " <td>2019984107019</td>\n", - " <td>2019</td>\n", - " <td>984107019</td>\n", - " <td>337522.0</td>\n", - " <td>591682.0</td>\n", - " <td>-2.983499</td>\n", - " <td>55.215407</td>\n", - " <td>98</td>\n", - " <td>3</td>\n", - " <td>4</td>\n", - " <td>...</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>2</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>2</td>\n", - " <td>1</td>\n", - " <td>-1</td>\n", - " <td>-1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>8511622</th>\n", - " <td>2019984107219</td>\n", - " <td>2019</td>\n", - " <td>984107219</td>\n", - " <td>318544.0</td>\n", - " <td>567087.0</td>\n", - " <td>-3.274645</td>\n", - " <td>54.991685</td>\n", - " <td>98</td>\n", - " <td>3</td>\n", - " <td>2</td>\n", - " <td>...</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>2</td>\n", - " <td>2</td>\n", - " <td>-1</td>\n", - " <td>-1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>8511623</th>\n", - " <td>2019984107419</td>\n", - " <td>2019</td>\n", - " <td>984107419</td>\n", - " <td>336525.0</td>\n", - " <td>584226.0</td>\n", - " <td>-2.997491</td>\n", - " <td>55.148292</td>\n", - " <td>98</td>\n", - " <td>3</td>\n", - " <td>1</td>\n", - " <td>...</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>2</td>\n", - " <td>2</td>\n", - " <td>-1</td>\n", - " <td>-1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>8511624</th>\n", - " <td>201998QC01004</td>\n", - " <td>2019</td>\n", - " <td>98QC01004</td>\n", - " <td>291367.0</td>\n", - " <td>608364.0</td>\n", - " <td>-3.715064</td>\n", - " <td>55.357237</td>\n", - " <td>98</td>\n", - " <td>2</td>\n", - " <td>1</td>\n", - " <td>...</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>2</td>\n", - " <td>1</td>\n", - " <td>-1</td>\n", - " <td>-1</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "<p>2287427 rows × 36 columns</p>\n", - "</div>" - ], - "text/plain": [ - " accident_index accident_year accident_reference \\\n", - "6224198 200501BS00001 2005 01BS00001 \n", - "6224199 200501BS00002 2005 01BS00002 \n", - "6224200 200501BS00003 2005 01BS00003 \n", - "6224201 200501BS00004 2005 01BS00004 \n", - "6224202 200501BS00005 2005 01BS00005 \n", - "... ... ... ... \n", - "8511620 2019984106919 2019 984106919 \n", - "8511621 2019984107019 2019 984107019 \n", - "8511622 2019984107219 2019 984107219 \n", - "8511623 2019984107419 2019 984107419 \n", - "8511624 201998QC01004 2019 98QC01004 \n", - "\n", - " location_easting_osgr location_northing_osgr longitude latitude \\\n", - "6224198 525680.0 178240.0 -0.191170 51.489096 \n", - "6224199 524170.0 181650.0 -0.211708 51.520075 \n", - "6224200 524520.0 182240.0 -0.206458 51.525301 \n", - "6224201 526900.0 177530.0 -0.173862 51.482442 \n", - "6224202 528060.0 179040.0 -0.156618 51.495752 \n", - "... ... ... ... ... \n", - "8511620 312635.0 573392.0 -3.368899 55.047323 \n", - "8511621 337522.0 591682.0 -2.983499 55.215407 \n", - "8511622 318544.0 567087.0 -3.274645 54.991685 \n", - "8511623 336525.0 584226.0 -2.997491 55.148292 \n", - "8511624 291367.0 608364.0 -3.715064 55.357237 \n", - "\n", - " police_force accident_severity number_of_vehicles ... \\\n", - "6224198 1 2 1 ... \n", - "6224199 1 3 1 ... \n", - "6224200 1 3 2 ... \n", - "6224201 1 3 1 ... \n", - "6224202 1 3 1 ... \n", - "... ... ... ... ... \n", - "8511620 98 3 1 ... \n", - "8511621 98 3 4 ... \n", - "8511622 98 3 2 ... \n", - "8511623 98 3 1 ... \n", - "8511624 98 2 1 ... \n", - "\n", - " pedestrian_crossing_physical_facilities light_conditions \\\n", - "6224198 1 1 \n", - "6224199 5 4 \n", - "6224200 0 4 \n", - "6224201 0 1 \n", - "6224202 0 7 \n", - "... ... ... \n", - "8511620 0 1 \n", - "8511621 0 1 \n", - "8511622 0 1 \n", - "8511623 0 1 \n", - "8511624 0 1 \n", - "\n", - " weather_conditions road_surface_conditions \\\n", - "6224198 2 2 \n", - "6224199 1 1 \n", - "6224200 1 1 \n", - "6224201 1 1 \n", - "6224202 1 2 \n", - "... ... ... \n", - "8511620 1 2 \n", - "8511621 1 2 \n", - "8511622 1 1 \n", - "8511623 1 1 \n", - "8511624 1 1 \n", - "\n", - " special_conditions_at_site carriageway_hazards urban_or_rural_area \\\n", - "6224198 0 0 1 \n", - "6224199 0 0 1 \n", - "6224200 0 0 1 \n", - "6224201 0 0 1 \n", - "6224202 0 0 1 \n", - "... ... ... ... \n", - "8511620 0 0 2 \n", - "8511621 0 0 2 \n", - "8511622 0 0 2 \n", - "8511623 0 0 2 \n", - "8511624 0 0 2 \n", - "\n", - " did_police_officer_attend_scene_of_accident trunk_road_flag \\\n", - "6224198 1 2 \n", - "6224199 1 2 \n", - "6224200 1 2 \n", - "6224201 1 2 \n", - "6224202 1 2 \n", - "... ... ... \n", - "8511620 1 -1 \n", - "8511621 1 -1 \n", - "8511622 2 -1 \n", - "8511623 2 -1 \n", - "8511624 1 -1 \n", - "\n", - " lsoa_of_accident_location \n", - "6224198 E01002849 \n", - "6224199 E01002909 \n", - "6224200 E01002857 \n", - "6224201 E01002840 \n", - "6224202 E01002863 \n", - "... ... \n", - "8511620 -1 \n", - "8511621 -1 \n", - "8511622 -1 \n", - "8511623 -1 \n", - "8511624 -1 \n", - "\n", - "[2287427 rows x 36 columns]" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "Acc" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [], - "source": [ - "vechilee = vechilee.rename({'Accident_Index': 'accident_index'}, axis=1)\n", - "Acc = Acc.rename({'accident_index': 'accident_index'}, axis=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "mask = (vechilee['Year'] > 2004)\n", - "vechilee=vechilee.loc[mask]" - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 3\n", - "1 4\n", - "2 5\n", - "3 6\n", - "4 2\n", - " ..\n", - "2287422 7\n", - "2287423 5\n", - "2287424 6\n", - "2287425 7\n", - "2287426 1\n", - "Name: day_of_week, Length: 2287427, dtype: int64" - ] - }, - "execution_count": 57, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "Acc['day_of_week']" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>accident_index</th>\n", - " <th>accident_year</th>\n", - " <th>accident_reference</th>\n", - " <th>location_easting_osgr</th>\n", - " <th>location_northing_osgr</th>\n", - " <th>longitude</th>\n", - " <th>latitude</th>\n", - " <th>police_force</th>\n", - " <th>accident_severity</th>\n", - " <th>number_of_vehicles</th>\n", - " <th>...</th>\n", - " <th>pedestrian_crossing_physical_facilities</th>\n", - " <th>light_conditions</th>\n", - " <th>weather_conditions</th>\n", - " <th>road_surface_conditions</th>\n", - " <th>special_conditions_at_site</th>\n", - " <th>carriageway_hazards</th>\n", - " <th>urban_or_rural_area</th>\n", - " <th>did_police_officer_attend_scene_of_accident</th>\n", - " <th>trunk_road_flag</th>\n", - " <th>lsoa_of_accident_location</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>200501BS00001</td>\n", - " <td>2005</td>\n", - " <td>01BS00001</td>\n", - " <td>525680.0</td>\n", - " <td>178240.0</td>\n", - " <td>-0.191170</td>\n", - " <td>51.489096</td>\n", - " <td>1</td>\n", - " <td>2</td>\n", - " <td>1</td>\n", - " <td>...</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>2</td>\n", - " <td>2</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>2</td>\n", - " <td>E01002849</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>200501BS00002</td>\n", - " <td>2005</td>\n", - " <td>01BS00002</td>\n", - " <td>524170.0</td>\n", - " <td>181650.0</td>\n", - " <td>-0.211708</td>\n", - " <td>51.520075</td>\n", - " <td>1</td>\n", - " <td>3</td>\n", - " <td>1</td>\n", - " <td>...</td>\n", - " <td>5</td>\n", - " <td>4</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>2</td>\n", - " <td>E01002909</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>200501BS00003</td>\n", - " <td>2005</td>\n", - " <td>01BS00003</td>\n", - " <td>524520.0</td>\n", - " <td>182240.0</td>\n", - " <td>-0.206458</td>\n", - " <td>51.525301</td>\n", - " <td>1</td>\n", - " <td>3</td>\n", - " <td>2</td>\n", - " <td>...</td>\n", - " <td>0</td>\n", - " <td>4</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>2</td>\n", - " <td>E01002857</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>200501BS00004</td>\n", - " <td>2005</td>\n", - " <td>01BS00004</td>\n", - " <td>526900.0</td>\n", - " <td>177530.0</td>\n", - " <td>-0.173862</td>\n", - " <td>51.482442</td>\n", - " <td>1</td>\n", - " <td>3</td>\n", - " <td>1</td>\n", - " <td>...</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>2</td>\n", - " <td>E01002840</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>200501BS00005</td>\n", - " <td>2005</td>\n", - " <td>01BS00005</td>\n", - " <td>528060.0</td>\n", - " <td>179040.0</td>\n", - " <td>-0.156618</td>\n", - " <td>51.495752</td>\n", - " <td>1</td>\n", - " <td>3</td>\n", - " <td>1</td>\n", - " <td>...</td>\n", - " <td>0</td>\n", - " <td>7</td>\n", - " <td>1</td>\n", - " <td>2</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>2</td>\n", - " <td>E01002863</td>\n", - " </tr>\n", - " <tr>\n", - " <th>...</th>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2287422</th>\n", - " <td>2019984106919</td>\n", - " <td>2019</td>\n", - " <td>984106919</td>\n", - " <td>312635.0</td>\n", - " <td>573392.0</td>\n", - " <td>-3.368899</td>\n", - " <td>55.047323</td>\n", - " <td>98</td>\n", - " <td>3</td>\n", - " <td>1</td>\n", - " <td>...</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>2</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>2</td>\n", - " <td>1</td>\n", - " <td>-1</td>\n", - " <td>-1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2287423</th>\n", - " <td>2019984107019</td>\n", - " <td>2019</td>\n", - " <td>984107019</td>\n", - " <td>337522.0</td>\n", - " <td>591682.0</td>\n", - " <td>-2.983499</td>\n", - " <td>55.215407</td>\n", - " <td>98</td>\n", - " <td>3</td>\n", - " <td>4</td>\n", - " <td>...</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>2</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>2</td>\n", - " <td>1</td>\n", - " <td>-1</td>\n", - " <td>-1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2287424</th>\n", - " <td>2019984107219</td>\n", - " <td>2019</td>\n", - " <td>984107219</td>\n", - " <td>318544.0</td>\n", - " <td>567087.0</td>\n", - " <td>-3.274645</td>\n", - " <td>54.991685</td>\n", - " <td>98</td>\n", - " <td>3</td>\n", - " <td>2</td>\n", - " <td>...</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>2</td>\n", - " <td>2</td>\n", - " <td>-1</td>\n", - " <td>-1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2287425</th>\n", - " <td>2019984107419</td>\n", - " <td>2019</td>\n", - " <td>984107419</td>\n", - " <td>336525.0</td>\n", - " <td>584226.0</td>\n", - " <td>-2.997491</td>\n", - " <td>55.148292</td>\n", - " <td>98</td>\n", - " <td>3</td>\n", - " <td>1</td>\n", - " <td>...</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>2</td>\n", - " <td>2</td>\n", - " <td>-1</td>\n", - " <td>-1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2287426</th>\n", - " <td>201998QC01004</td>\n", - " <td>2019</td>\n", - " <td>98QC01004</td>\n", - " <td>291367.0</td>\n", - " <td>608364.0</td>\n", - " <td>-3.715064</td>\n", - " <td>55.357237</td>\n", - " <td>98</td>\n", - " <td>2</td>\n", - " <td>1</td>\n", - " <td>...</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>2</td>\n", - " <td>1</td>\n", - " <td>-1</td>\n", - " <td>-1</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "<p>2287427 rows × 36 columns</p>\n", - "</div>" - ], - "text/plain": [ - " accident_index accident_year accident_reference \\\n", - "0 200501BS00001 2005 01BS00001 \n", - "1 200501BS00002 2005 01BS00002 \n", - "2 200501BS00003 2005 01BS00003 \n", - "3 200501BS00004 2005 01BS00004 \n", - "4 200501BS00005 2005 01BS00005 \n", - "... ... ... ... \n", - "2287422 2019984106919 2019 984106919 \n", - "2287423 2019984107019 2019 984107019 \n", - "2287424 2019984107219 2019 984107219 \n", - "2287425 2019984107419 2019 984107419 \n", - "2287426 201998QC01004 2019 98QC01004 \n", - "\n", - " location_easting_osgr location_northing_osgr longitude latitude \\\n", - "0 525680.0 178240.0 -0.191170 51.489096 \n", - "1 524170.0 181650.0 -0.211708 51.520075 \n", - "2 524520.0 182240.0 -0.206458 51.525301 \n", - "3 526900.0 177530.0 -0.173862 51.482442 \n", - "4 528060.0 179040.0 -0.156618 51.495752 \n", - "... ... ... ... ... \n", - "2287422 312635.0 573392.0 -3.368899 55.047323 \n", - "2287423 337522.0 591682.0 -2.983499 55.215407 \n", - "2287424 318544.0 567087.0 -3.274645 54.991685 \n", - "2287425 336525.0 584226.0 -2.997491 55.148292 \n", - "2287426 291367.0 608364.0 -3.715064 55.357237 \n", - "\n", - " police_force accident_severity number_of_vehicles ... \\\n", - "0 1 2 1 ... \n", - "1 1 3 1 ... \n", - "2 1 3 2 ... \n", - "3 1 3 1 ... \n", - "4 1 3 1 ... \n", - "... ... ... ... ... \n", - "2287422 98 3 1 ... \n", - "2287423 98 3 4 ... \n", - "2287424 98 3 2 ... \n", - "2287425 98 3 1 ... \n", - "2287426 98 2 1 ... \n", - "\n", - " pedestrian_crossing_physical_facilities light_conditions \\\n", - "0 1 1 \n", - "1 5 4 \n", - "2 0 4 \n", - "3 0 1 \n", - "4 0 7 \n", - "... ... ... \n", - "2287422 0 1 \n", - "2287423 0 1 \n", - "2287424 0 1 \n", - "2287425 0 1 \n", - "2287426 0 1 \n", - "\n", - " weather_conditions road_surface_conditions \\\n", - "0 2 2 \n", - "1 1 1 \n", - "2 1 1 \n", - "3 1 1 \n", - "4 1 2 \n", - "... ... ... \n", - "2287422 1 2 \n", - "2287423 1 2 \n", - "2287424 1 1 \n", - "2287425 1 1 \n", - "2287426 1 1 \n", - "\n", - " special_conditions_at_site carriageway_hazards urban_or_rural_area \\\n", - "0 0 0 1 \n", - "1 0 0 1 \n", - "2 0 0 1 \n", - "3 0 0 1 \n", - "4 0 0 1 \n", - "... ... ... ... \n", - "2287422 0 0 2 \n", - "2287423 0 0 2 \n", - "2287424 0 0 2 \n", - "2287425 0 0 2 \n", - "2287426 0 0 2 \n", - "\n", - " did_police_officer_attend_scene_of_accident trunk_road_flag \\\n", - "0 1 2 \n", - "1 1 2 \n", - "2 1 2 \n", - "3 1 2 \n", - "4 1 2 \n", - "... ... ... \n", - "2287422 1 -1 \n", - "2287423 1 -1 \n", - "2287424 2 -1 \n", - "2287425 2 -1 \n", - "2287426 1 -1 \n", - "\n", - " lsoa_of_accident_location \n", - "0 E01002849 \n", - "1 E01002909 \n", - "2 E01002857 \n", - "3 E01002840 \n", - "4 E01002863 \n", - "... ... \n", - "2287422 -1 \n", - "2287423 -1 \n", - "2287424 -1 \n", - "2287425 -1 \n", - "2287426 -1 \n", - "\n", - "[2287427 rows x 36 columns]" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "Acc = Acc.reset_index(drop=True)\n", - "Acc\n", - "Acc" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style scoped>\n", - " .dataframe tbody tr th:only-of-type {\n", - " vertical-align: middle;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: right;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>accident_index</th>\n", - " <th>Age_Band_of_Driver</th>\n", - " <th>Age_of_Vehicle</th>\n", - " <th>Driver_Home_Area_Type</th>\n", - " <th>Driver_IMD_Decile</th>\n", - " <th>Engine_Capacity_.CC.</th>\n", - " <th>Hit_Object_in_Carriageway</th>\n", - " <th>Hit_Object_off_Carriageway</th>\n", - " <th>Journey_Purpose_of_Driver</th>\n", - " <th>Junction_Location</th>\n", - " <th>...</th>\n", - " <th>Towing_and_Articulation</th>\n", - " <th>Vehicle_Leaving_Carriageway</th>\n", - " <th>Vehicle_Location.Restricted_Lane</th>\n", - " <th>Vehicle_Manoeuvre</th>\n", - " <th>Vehicle_Reference</th>\n", - " <th>Vehicle_Type</th>\n", - " <th>Was_Vehicle_Left_Hand_Drive</th>\n", - " <th>X1st_Point_of_Impact</th>\n", - " <th>Year</th>\n", - " <th>accident_severity</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>200501BS00002</td>\n", - " <td>36 - 45</td>\n", - " <td>3.0</td>\n", - " <td>Data missing or out of range</td>\n", - " <td>NaN</td>\n", - " <td>8268.0</td>\n", - " <td>None</td>\n", - " <td>None</td>\n", - " <td>Journey as part of work</td>\n", - " <td>Leaving roundabout</td>\n", - " <td>...</td>\n", - " <td>No tow/articulation</td>\n", - " <td>Did not leave carriageway</td>\n", - " <td>0.0</td>\n", - " <td>Slowing or stopping</td>\n", - " <td>1</td>\n", - " <td>Bus or coach (17 or more pass seats)</td>\n", - " <td>No</td>\n", - " <td>Nearside</td>\n", - " <td>2005</td>\n", - " <td>3</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1</th>\n", - " <td>200501BS00003</td>\n", - " <td>26 - 35</td>\n", - " <td>5.0</td>\n", - " <td>Urban area</td>\n", - " <td>3.0</td>\n", - " <td>8300.0</td>\n", - " <td>Parked vehicle</td>\n", - " <td>None</td>\n", - " <td>Journey as part of work</td>\n", - " <td>Not at or within 20 metres of junction</td>\n", - " <td>...</td>\n", - " <td>No tow/articulation</td>\n", - " <td>Did not leave carriageway</td>\n", - " <td>0.0</td>\n", - " <td>Going ahead right-hand bend</td>\n", - " <td>1</td>\n", - " <td>Bus or coach (17 or more pass seats)</td>\n", - " <td>No</td>\n", - " <td>Nearside</td>\n", - " <td>2005</td>\n", - " <td>3</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2</th>\n", - " <td>200501BS00004</td>\n", - " <td>46 - 55</td>\n", - " <td>4.0</td>\n", - " <td>Urban area</td>\n", - " <td>1.0</td>\n", - " <td>1769.0</td>\n", - " <td>None</td>\n", - " <td>None</td>\n", - " <td>Other/Not known (2005-10)</td>\n", - " <td>Not at or within 20 metres of junction</td>\n", - " <td>...</td>\n", - " <td>No tow/articulation</td>\n", - " <td>Did not leave carriageway</td>\n", - " <td>0.0</td>\n", - " <td>Going ahead other</td>\n", - " <td>1</td>\n", - " <td>Car</td>\n", - " <td>No</td>\n", - " <td>Front</td>\n", - " <td>2005</td>\n", - " <td>3</td>\n", - " </tr>\n", - " <tr>\n", - " <th>3</th>\n", - " <td>200501BS00005</td>\n", - " <td>46 - 55</td>\n", - " <td>10.0</td>\n", - " <td>Data missing or out of range</td>\n", - " <td>NaN</td>\n", - " <td>85.0</td>\n", - " <td>Kerb</td>\n", - " <td>None</td>\n", - " <td>Other/Not known (2005-10)</td>\n", - " <td>Not at or within 20 metres of junction</td>\n", - " <td>...</td>\n", - " <td>No tow/articulation</td>\n", - " <td>Did not leave carriageway</td>\n", - " <td>0.0</td>\n", - " <td>Going ahead other</td>\n", - " <td>1</td>\n", - " <td>Motorcycle 125cc and under</td>\n", - " <td>No</td>\n", - " <td>Front</td>\n", - " <td>2005</td>\n", - " <td>3</td>\n", - " </tr>\n", - " <tr>\n", - " <th>4</th>\n", - " <td>200501BS00006</td>\n", - " <td>46 - 55</td>\n", - " <td>1.0</td>\n", - " <td>Urban area</td>\n", - " <td>4.0</td>\n", - " <td>2976.0</td>\n", - " <td>None</td>\n", - " <td>None</td>\n", - " <td>Other/Not known (2005-10)</td>\n", - " <td>Not at or within 20 metres of junction</td>\n", - " <td>...</td>\n", - " <td>No tow/articulation</td>\n", - " <td>Did not leave carriageway</td>\n", - " <td>0.0</td>\n", - " <td>Moving off</td>\n", - " <td>1</td>\n", - " <td>Car</td>\n", - " <td>No</td>\n", - " <td>Did not impact</td>\n", - " <td>2005</td>\n", - " <td>3</td>\n", - " </tr>\n", - " <tr>\n", - " <th>...</th>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2027639</th>\n", - " <td>2016984131116</td>\n", - " <td>21 - 25</td>\n", - " <td>14.0</td>\n", - " <td>Urban area</td>\n", - " <td>NaN</td>\n", - " <td>1598.0</td>\n", - " <td>None</td>\n", - " <td>None</td>\n", - " <td>Not known</td>\n", - " <td>Mid Junction - on roundabout or on main road</td>\n", - " <td>...</td>\n", - " <td>No tow/articulation</td>\n", - " <td>Did not leave carriageway</td>\n", - " <td>0.0</td>\n", - " <td>Going ahead other</td>\n", - " <td>1</td>\n", - " <td>Car</td>\n", - " <td>No</td>\n", - " <td>Front</td>\n", - " <td>2016</td>\n", - " <td>3</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2027640</th>\n", - " <td>2016984131116</td>\n", - " <td>56 - 65</td>\n", - " <td>NaN</td>\n", - " <td>Small town</td>\n", - " <td>NaN</td>\n", - " <td>1598.0</td>\n", - " <td>None</td>\n", - " <td>None</td>\n", - " <td>Commuting to/from work</td>\n", - " <td>Mid Junction - on roundabout or on main road</td>\n", - " <td>...</td>\n", - " <td>No tow/articulation</td>\n", - " <td>Offside</td>\n", - " <td>0.0</td>\n", - " <td>Going ahead other</td>\n", - " <td>2</td>\n", - " <td>Car</td>\n", - " <td>No</td>\n", - " <td>Front</td>\n", - " <td>2016</td>\n", - " <td>3</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2027641</th>\n", - " <td>2016984131216</td>\n", - " <td>56 - 65</td>\n", - " <td>NaN</td>\n", - " <td>Urban area</td>\n", - " <td>3.0</td>\n", - " <td>NaN</td>\n", - " <td>None</td>\n", - " <td>Central crash barrier</td>\n", - " <td>Journey as part of work</td>\n", - " <td>Not at or within 20 metres of junction</td>\n", - " <td>...</td>\n", - " <td>No tow/articulation</td>\n", - " <td>Offside on to central reservation</td>\n", - " <td>0.0</td>\n", - " <td>Going ahead other</td>\n", - " <td>1</td>\n", - " <td>Goods 7.5 tonnes mgw and over</td>\n", - " <td>No</td>\n", - " <td>Offside</td>\n", - " <td>2016</td>\n", - " <td>3</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2027642</th>\n", - " <td>2016984131316</td>\n", - " <td>16 - 20</td>\n", - " <td>13.0</td>\n", - " <td>Urban area</td>\n", - " <td>6.0</td>\n", - " <td>1796.0</td>\n", - " <td>None</td>\n", - " <td>Tree</td>\n", - " <td>Not known</td>\n", - " <td>Not at or within 20 metres of junction</td>\n", - " <td>...</td>\n", - " <td>No tow/articulation</td>\n", - " <td>Offside</td>\n", - " <td>0.0</td>\n", - " <td>Going ahead other</td>\n", - " <td>1</td>\n", - " <td>Car</td>\n", - " <td>No</td>\n", - " <td>Front</td>\n", - " <td>2016</td>\n", - " <td>3</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2027643</th>\n", - " <td>2016984133416</td>\n", - " <td>46 - 55</td>\n", - " <td>5.0</td>\n", - " <td>Urban area</td>\n", - " <td>NaN</td>\n", - " <td>2184.0</td>\n", - " <td>None</td>\n", - " <td>Wall or fence</td>\n", - " <td>Not known</td>\n", - " <td>Not at or within 20 metres of junction</td>\n", - " <td>...</td>\n", - " <td>No tow/articulation</td>\n", - " <td>Nearside</td>\n", - " <td>0.0</td>\n", - " <td>Going ahead other</td>\n", - " <td>1</td>\n", - " <td>Car</td>\n", - " <td>No</td>\n", - " <td>Nearside</td>\n", - " <td>2016</td>\n", - " <td>3</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "<p>2027644 rows × 25 columns</p>\n", - "</div>" - ], - "text/plain": [ - " accident_index Age_Band_of_Driver Age_of_Vehicle \\\n", - "0 200501BS00002 36 - 45 3.0 \n", - "1 200501BS00003 26 - 35 5.0 \n", - "2 200501BS00004 46 - 55 4.0 \n", - "3 200501BS00005 46 - 55 10.0 \n", - "4 200501BS00006 46 - 55 1.0 \n", - "... ... ... ... \n", - "2027639 2016984131116 21 - 25 14.0 \n", - "2027640 2016984131116 56 - 65 NaN \n", - "2027641 2016984131216 56 - 65 NaN \n", - "2027642 2016984131316 16 - 20 13.0 \n", - "2027643 2016984133416 46 - 55 5.0 \n", - "\n", - " Driver_Home_Area_Type Driver_IMD_Decile \\\n", - "0 Data missing or out of range NaN \n", - "1 Urban area 3.0 \n", - "2 Urban area 1.0 \n", - "3 Data missing or out of range NaN \n", - "4 Urban area 4.0 \n", - "... ... ... \n", - "2027639 Urban area NaN \n", - "2027640 Small town NaN \n", - "2027641 Urban area 3.0 \n", - "2027642 Urban area 6.0 \n", - "2027643 Urban area NaN \n", - "\n", - " Engine_Capacity_.CC. Hit_Object_in_Carriageway \\\n", - "0 8268.0 None \n", - "1 8300.0 Parked vehicle \n", - "2 1769.0 None \n", - "3 85.0 Kerb \n", - "4 2976.0 None \n", - "... ... ... \n", - "2027639 1598.0 None \n", - "2027640 1598.0 None \n", - "2027641 NaN None \n", - "2027642 1796.0 None \n", - "2027643 2184.0 None \n", - "\n", - " Hit_Object_off_Carriageway Journey_Purpose_of_Driver \\\n", - "0 None Journey as part of work \n", - "1 None Journey as part of work \n", - "2 None Other/Not known (2005-10) \n", - "3 None Other/Not known (2005-10) \n", - "4 None Other/Not known (2005-10) \n", - "... ... ... \n", - "2027639 None Not known \n", - "2027640 None Commuting to/from work \n", - "2027641 Central crash barrier Journey as part of work \n", - "2027642 Tree Not known \n", - "2027643 Wall or fence Not known \n", - "\n", - " Junction_Location ... \\\n", - "0 Leaving roundabout ... \n", - "1 Not at or within 20 metres of junction ... \n", - "2 Not at or within 20 metres of junction ... \n", - "3 Not at or within 20 metres of junction ... \n", - "4 Not at or within 20 metres of junction ... \n", - "... ... ... \n", - "2027639 Mid Junction - on roundabout or on main road ... \n", - "2027640 Mid Junction - on roundabout or on main road ... \n", - "2027641 Not at or within 20 metres of junction ... \n", - "2027642 Not at or within 20 metres of junction ... \n", - "2027643 Not at or within 20 metres of junction ... \n", - "\n", - " Towing_and_Articulation Vehicle_Leaving_Carriageway \\\n", - "0 No tow/articulation Did not leave carriageway \n", - "1 No tow/articulation Did not leave carriageway \n", - "2 No tow/articulation Did not leave carriageway \n", - "3 No tow/articulation Did not leave carriageway \n", - "4 No tow/articulation Did not leave carriageway \n", - "... ... ... \n", - "2027639 No tow/articulation Did not leave carriageway \n", - "2027640 No tow/articulation Offside \n", - "2027641 No tow/articulation Offside on to central reservation \n", - "2027642 No tow/articulation Offside \n", - "2027643 No tow/articulation Nearside \n", - "\n", - " Vehicle_Location.Restricted_Lane Vehicle_Manoeuvre \\\n", - "0 0.0 Slowing or stopping \n", - "1 0.0 Going ahead right-hand bend \n", - "2 0.0 Going ahead other \n", - "3 0.0 Going ahead other \n", - "4 0.0 Moving off \n", - "... ... ... \n", - "2027639 0.0 Going ahead other \n", - "2027640 0.0 Going ahead other \n", - "2027641 0.0 Going ahead other \n", - "2027642 0.0 Going ahead other \n", - "2027643 0.0 Going ahead other \n", - "\n", - " Vehicle_Reference Vehicle_Type \\\n", - "0 1 Bus or coach (17 or more pass seats) \n", - "1 1 Bus or coach (17 or more pass seats) \n", - "2 1 Car \n", - "3 1 Motorcycle 125cc and under \n", - "4 1 Car \n", - "... ... ... \n", - "2027639 1 Car \n", - "2027640 2 Car \n", - "2027641 1 Goods 7.5 tonnes mgw and over \n", - "2027642 1 Car \n", - "2027643 1 Car \n", - "\n", - " Was_Vehicle_Left_Hand_Drive X1st_Point_of_Impact Year \\\n", - "0 No Nearside 2005 \n", - "1 No Nearside 2005 \n", - "2 No Front 2005 \n", - "3 No Front 2005 \n", - "4 No Did not impact 2005 \n", - "... ... ... ... \n", - "2027639 No Front 2016 \n", - "2027640 No Front 2016 \n", - "2027641 No Offside 2016 \n", - "2027642 No Front 2016 \n", - "2027643 No Nearside 2016 \n", - "\n", - " accident_severity \n", - "0 3 \n", - "1 3 \n", - "2 3 \n", - "3 3 \n", - "4 3 \n", - "... ... \n", - "2027639 3 \n", - "2027640 3 \n", - "2027641 3 \n", - "2027642 3 \n", - "2027643 3 \n", - "\n", - "[2027644 rows x 25 columns]" - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "vech23= pd.merge(vechilee,Acc[['accident_index','accident_severity']],on='accident_index')\n", - "\n", - "vech23" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " sum_sq df F PR(>F)\n", - "Vehicle_Type 7.737798 1.0 56.955431 4.648715e-14\n", - "Residual 2716.869752 19998.0 NaN NaN\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "import scipy.stats as stats\n", - "np.random.seed(12)\n", - "vech2= vech23[['accident_severity', 'Vehicle_Type']]\n", - "vech2 = vech2.reset_index(drop=True)\n", - "vech2[\"Vehicle_Type\"] = vech2[\"Vehicle_Type\"].astype('category')\n", - "vech2[\"Vehicle_Type\"] = vech2[\"Vehicle_Type\"].cat.codes\n", - "vech2 = vech2.dropna()\n", - "vech2\n", - "vech2 = vech2[['accident_severity', 'Vehicle_Type']]\n", - "vech2 = vech2.reset_index(drop=True)\n", - "#AccRT\n", - "#vech2['accident_severity']=vech2['accident_severity'].replace({1: 'Fatal', 2: 'Serious', 3: 'Slight'})\n", - "#AccRT\n", - "\n", - "\n", - "# Group age data by ra\n", - "\n", - "\n", - "vech2 = vech2[['accident_severity', 'Vehicle_Type']][:20000]\n", - "vech2 = vech2.reset_index(drop=True)\n", - "from statsmodels.formula.api import ols\n", - "import statsmodels.api as sm\n", - "severity_frame = pd.DataFrame({\"severity\":vech2['accident_severity'],\"Vehicle_Type\":vech2['Vehicle_Type']})\n", - "severity_frame\n", - "model = ols('severity~Vehicle_Type', # Model formula\n", - " data = severity_frame).fit()\n", - " \n", - "anova_result = sm.stats.anova_lm(model, typ=2)\n", - "print (anova_result)\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " sum_sq df F PR(>F)\n", - "Sex_of_Driver 3.528091 1.0 25.928964 3.574295e-07\n", - "Residual 2721.079459 19998.0 NaN NaN\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "import scipy.stats as stats\n", - "np.random.seed(12)\n", - "vech2= vech23[['accident_severity', 'Sex_of_Driver']]\n", - "vech2 = vech2.reset_index(drop=True)\n", - "vech2[\"Sex_of_Driver\"] = vech2[\"Sex_of_Driver\"].astype('category')\n", - "vech2[\"Sex_of_Driver\"] = vech2[\"Sex_of_Driver\"].cat.codes\n", - "vech2 = vech2.dropna()\n", - "vech2\n", - "vech2 = vech2[['accident_severity', 'Sex_of_Driver']]\n", - "vech2 = vech2.reset_index(drop=True)\n", - "#AccRT\n", - "#vech2['accident_severity']=vech2['accident_severity'].replace({1: 'Fatal', 2: 'Serious', 3: 'Slight'})\n", - "#AccRT\n", - "\n", - "\n", - "# Group age data by ra\n", - "\n", - "\n", - "vech2 = vech2[['accident_severity', 'Sex_of_Driver']][:20000]\n", - "vech2 = vech2.reset_index(drop=True)\n", - "from statsmodels.formula.api import ols\n", - "import statsmodels.api as sm\n", - "severity_frame = pd.DataFrame({\"severity\":vech2['accident_severity'],\"Sex_of_Driver\":vech2['Sex_of_Driver']})\n", - "severity_frame\n", - "model = ols('severity~Sex_of_Driver', # Model formula\n", - " data = severity_frame).fit()\n", - " \n", - "anova_result = sm.stats.anova_lm(model, typ=2)\n", - "print (anova_result)\n", - "\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3.6.7 64-bit", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.7" - }, - "orig_nbformat": 4, - "vscode": { - "interpreter": { - "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49" - } - } - }, - "nbformat": 4, - "nbformat_minor": 2 -}