diff --git a/anova.ipynb b/anova.ipynb
deleted file mode 100644
index 3f95c119fe1dc99f17c34589c3a10b85285802d0..0000000000000000000000000000000000000000
--- a/anova.ipynb
+++ /dev/null
@@ -1,2526 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>accident_index</th>\n",
-       "      <th>accident_year</th>\n",
-       "      <th>accident_reference</th>\n",
-       "      <th>location_easting_osgr</th>\n",
-       "      <th>location_northing_osgr</th>\n",
-       "      <th>longitude</th>\n",
-       "      <th>latitude</th>\n",
-       "      <th>police_force</th>\n",
-       "      <th>accident_severity</th>\n",
-       "      <th>number_of_vehicles</th>\n",
-       "      <th>...</th>\n",
-       "      <th>pedestrian_crossing_physical_facilities</th>\n",
-       "      <th>light_conditions</th>\n",
-       "      <th>weather_conditions</th>\n",
-       "      <th>road_surface_conditions</th>\n",
-       "      <th>special_conditions_at_site</th>\n",
-       "      <th>carriageway_hazards</th>\n",
-       "      <th>urban_or_rural_area</th>\n",
-       "      <th>did_police_officer_attend_scene_of_accident</th>\n",
-       "      <th>trunk_road_flag</th>\n",
-       "      <th>lsoa_of_accident_location</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>197901A11AD14</td>\n",
-       "      <td>1979</td>\n",
-       "      <td>01A11AD14</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>1</td>\n",
-       "      <td>3</td>\n",
-       "      <td>2</td>\n",
-       "      <td>...</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>8</td>\n",
-       "      <td>1</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>197901A1BAW34</td>\n",
-       "      <td>1979</td>\n",
-       "      <td>01A1BAW34</td>\n",
-       "      <td>198460.0</td>\n",
-       "      <td>894000.0</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>1</td>\n",
-       "      <td>3</td>\n",
-       "      <td>1</td>\n",
-       "      <td>...</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>4</td>\n",
-       "      <td>8</td>\n",
-       "      <td>3</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>197901A1BFD77</td>\n",
-       "      <td>1979</td>\n",
-       "      <td>01A1BFD77</td>\n",
-       "      <td>406380.0</td>\n",
-       "      <td>307000.0</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>1</td>\n",
-       "      <td>3</td>\n",
-       "      <td>2</td>\n",
-       "      <td>...</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>4</td>\n",
-       "      <td>8</td>\n",
-       "      <td>3</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>197901A1BGC20</td>\n",
-       "      <td>1979</td>\n",
-       "      <td>01A1BGC20</td>\n",
-       "      <td>281680.0</td>\n",
-       "      <td>440000.0</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>1</td>\n",
-       "      <td>3</td>\n",
-       "      <td>2</td>\n",
-       "      <td>...</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>4</td>\n",
-       "      <td>8</td>\n",
-       "      <td>3</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>197901A1BGF95</td>\n",
-       "      <td>1979</td>\n",
-       "      <td>01A1BGF95</td>\n",
-       "      <td>153960.0</td>\n",
-       "      <td>795000.0</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>...</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>4</td>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8602819</th>\n",
-       "      <td>2020991027064</td>\n",
-       "      <td>2020</td>\n",
-       "      <td>991027064</td>\n",
-       "      <td>343034.0</td>\n",
-       "      <td>731654.0</td>\n",
-       "      <td>-2.926320</td>\n",
-       "      <td>56.473539</td>\n",
-       "      <td>99</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8602820</th>\n",
-       "      <td>2020991029573</td>\n",
-       "      <td>2020</td>\n",
-       "      <td>991029573</td>\n",
-       "      <td>257963.0</td>\n",
-       "      <td>658891.0</td>\n",
-       "      <td>-4.267565</td>\n",
-       "      <td>55.802353</td>\n",
-       "      <td>99</td>\n",
-       "      <td>3</td>\n",
-       "      <td>1</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8602821</th>\n",
-       "      <td>2020991030297</td>\n",
-       "      <td>2020</td>\n",
-       "      <td>991030297</td>\n",
-       "      <td>383664.0</td>\n",
-       "      <td>810646.0</td>\n",
-       "      <td>-2.271903</td>\n",
-       "      <td>57.186317</td>\n",
-       "      <td>99</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>2</td>\n",
-       "      <td>1</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8602822</th>\n",
-       "      <td>2020991030900</td>\n",
-       "      <td>2020</td>\n",
-       "      <td>991030900</td>\n",
-       "      <td>277161.0</td>\n",
-       "      <td>674852.0</td>\n",
-       "      <td>-3.968753</td>\n",
-       "      <td>55.950940</td>\n",
-       "      <td>99</td>\n",
-       "      <td>3</td>\n",
-       "      <td>2</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8602823</th>\n",
-       "      <td>2020991032575</td>\n",
-       "      <td>2020</td>\n",
-       "      <td>991032575</td>\n",
-       "      <td>240402.0</td>\n",
-       "      <td>681950.0</td>\n",
-       "      <td>-4.561040</td>\n",
-       "      <td>56.003843</td>\n",
-       "      <td>99</td>\n",
-       "      <td>3</td>\n",
-       "      <td>1</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>2</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>8602824 rows × 36 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "        accident_index  accident_year accident_reference  \\\n",
-       "0        197901A11AD14           1979          01A11AD14   \n",
-       "1        197901A1BAW34           1979          01A1BAW34   \n",
-       "2        197901A1BFD77           1979          01A1BFD77   \n",
-       "3        197901A1BGC20           1979          01A1BGC20   \n",
-       "4        197901A1BGF95           1979          01A1BGF95   \n",
-       "...                ...            ...                ...   \n",
-       "8602819  2020991027064           2020          991027064   \n",
-       "8602820  2020991029573           2020          991029573   \n",
-       "8602821  2020991030297           2020          991030297   \n",
-       "8602822  2020991030900           2020          991030900   \n",
-       "8602823  2020991032575           2020          991032575   \n",
-       "\n",
-       "         location_easting_osgr  location_northing_osgr  longitude   latitude  \\\n",
-       "0                          NaN                     NaN        NaN        NaN   \n",
-       "1                     198460.0                894000.0        NaN        NaN   \n",
-       "2                     406380.0                307000.0        NaN        NaN   \n",
-       "3                     281680.0                440000.0        NaN        NaN   \n",
-       "4                     153960.0                795000.0        NaN        NaN   \n",
-       "...                        ...                     ...        ...        ...   \n",
-       "8602819               343034.0                731654.0  -2.926320  56.473539   \n",
-       "8602820               257963.0                658891.0  -4.267565  55.802353   \n",
-       "8602821               383664.0                810646.0  -2.271903  57.186317   \n",
-       "8602822               277161.0                674852.0  -3.968753  55.950940   \n",
-       "8602823               240402.0                681950.0  -4.561040  56.003843   \n",
-       "\n",
-       "         police_force  accident_severity  number_of_vehicles  ...  \\\n",
-       "0                   1                  3                   2  ...   \n",
-       "1                   1                  3                   1  ...   \n",
-       "2                   1                  3                   2  ...   \n",
-       "3                   1                  3                   2  ...   \n",
-       "4                   1                  2                   2  ...   \n",
-       "...               ...                ...                 ...  ...   \n",
-       "8602819            99                  2                   2  ...   \n",
-       "8602820            99                  3                   1  ...   \n",
-       "8602821            99                  2                   2  ...   \n",
-       "8602822            99                  3                   2  ...   \n",
-       "8602823            99                  3                   1  ...   \n",
-       "\n",
-       "         pedestrian_crossing_physical_facilities light_conditions  \\\n",
-       "0                                             -1                1   \n",
-       "1                                             -1                4   \n",
-       "2                                             -1                4   \n",
-       "3                                             -1                4   \n",
-       "4                                             -1                4   \n",
-       "...                                          ...              ...   \n",
-       "8602819                                        0                1   \n",
-       "8602820                                        0                1   \n",
-       "8602821                                        0                1   \n",
-       "8602822                                        0                1   \n",
-       "8602823                                        0                1   \n",
-       "\n",
-       "         weather_conditions road_surface_conditions  \\\n",
-       "0                         8                       1   \n",
-       "1                         8                       3   \n",
-       "2                         8                       3   \n",
-       "3                         8                       3   \n",
-       "4                         3                       3   \n",
-       "...                     ...                     ...   \n",
-       "8602819                   1                       1   \n",
-       "8602820                   1                       1   \n",
-       "8602821                   1                       1   \n",
-       "8602822                   1                       1   \n",
-       "8602823                   1                       1   \n",
-       "\n",
-       "         special_conditions_at_site carriageway_hazards urban_or_rural_area  \\\n",
-       "0                                -1                   0                  -1   \n",
-       "1                                -1                   0                  -1   \n",
-       "2                                -1                   0                  -1   \n",
-       "3                                -1                   0                  -1   \n",
-       "4                                -1                   0                  -1   \n",
-       "...                             ...                 ...                 ...   \n",
-       "8602819                           0                   0                   1   \n",
-       "8602820                           0                   0                   1   \n",
-       "8602821                           0                   0                   2   \n",
-       "8602822                           0                   0                   1   \n",
-       "8602823                           0                   2                   1   \n",
-       "\n",
-       "         did_police_officer_attend_scene_of_accident  trunk_road_flag  \\\n",
-       "0                                                 -1               -1   \n",
-       "1                                                 -1               -1   \n",
-       "2                                                 -1               -1   \n",
-       "3                                                 -1               -1   \n",
-       "4                                                 -1               -1   \n",
-       "...                                              ...              ...   \n",
-       "8602819                                            1               -1   \n",
-       "8602820                                            2               -1   \n",
-       "8602821                                            1               -1   \n",
-       "8602822                                            2               -1   \n",
-       "8602823                                            1               -1   \n",
-       "\n",
-       "         lsoa_of_accident_location  \n",
-       "0                               -1  \n",
-       "1                               -1  \n",
-       "2                               -1  \n",
-       "3                               -1  \n",
-       "4                               -1  \n",
-       "...                            ...  \n",
-       "8602819                         -1  \n",
-       "8602820                         -1  \n",
-       "8602821                         -1  \n",
-       "8602822                         -1  \n",
-       "8602823                         -1  \n",
-       "\n",
-       "[8602824 rows x 36 columns]"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import pandas as pd\n",
-    "# import the usual suspects ...\n",
-    "import pandas as pd\n",
-    "import numpy as np\n",
-    "import glob\n",
-    "\n",
-    "import matplotlib.pyplot as plt\n",
-    "import seaborn as sns\n",
-    "\n",
-    "# suppress all warnings\n",
-    "import warnings\n",
-    "warnings.filterwarnings(\"ignore\")\n",
-    "accidents = pd.read_csv ('/Users/Asfandyar/Downloads/dft-road-casualty-statistics-accident-1979-2020.csv')\n",
-    "accidents"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0          False\n",
-       "1          False\n",
-       "2          False\n",
-       "3          False\n",
-       "4          False\n",
-       "           ...  \n",
-       "8602819    False\n",
-       "8602820    False\n",
-       "8602821    False\n",
-       "8602822    False\n",
-       "8602823    False\n",
-       "Name: accident_year, Length: 8602824, dtype: bool"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "accidents\n",
-    "mask = (accidents['accident_year'] > 2004) & (accidents['accident_year'] <= 2019)\n",
-    "mask"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "Acc=accidents.loc[mask]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 253,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>accident_severity</th>\n",
-       "      <th>first_road_class</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>2</td>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>3</td>\n",
-       "      <td>4</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>3</td>\n",
-       "      <td>5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>3</td>\n",
-       "      <td>6</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9995</th>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9996</th>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9997</th>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9998</th>\n",
-       "      <td>2</td>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9999</th>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>10000 rows × 2 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "      accident_severity  first_road_class\n",
-       "0                     2                 3\n",
-       "1                     3                 4\n",
-       "2                     3                 5\n",
-       "3                     3                 3\n",
-       "4                     3                 6\n",
-       "...                 ...               ...\n",
-       "9995                  3                 3\n",
-       "9996                  3                 3\n",
-       "9997                  3                 3\n",
-       "9998                  2                 3\n",
-       "9999                  3                 3\n",
-       "\n",
-       "[10000 rows x 2 columns]"
-      ]
-     },
-     "execution_count": 253,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "AccRT = Acc[['accident_severity', 'first_road_class']][:10000]\n",
-    "AccRT = AccRT.reset_index(drop=True)\n",
-    "AccRT"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 279,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "severity~first_road_class\n",
-      "F_onewayResult(statistic=3.291830175326872, pvalue=0.0372058822331273)\n",
-      "                       sum_sq       df         F    PR(>F)\n",
-      "first_road_class     0.667436      1.0  4.602263  0.031942\n",
-      "Residual          2900.179764  19998.0       NaN       NaN\n"
-     ]
-    }
-   ],
-   "source": [
-    "import numpy as np\n",
-    "import pandas as pd\n",
-    "import matplotlib.pyplot as plt\n",
-    "import scipy.stats as stats\n",
-    "np.random.seed(12)\n",
-    "AccRT = Acc[['accident_severity', 'first_road_class']][:20000]\n",
-    "AccRT = AccRT.reset_index(drop=True)\n",
-    "#AccRT\n",
-    "AccRT['accident_severity']=AccRT['accident_severity'].replace({1: 'Fatal', 2: 'Serious', 3: 'Slight'})\n",
-    "#AccRT\n",
-    "\n",
-    "\n",
-    "# Group age data by race\n",
-    "severity_frame = pd.DataFrame({\"severity\":AccRT['accident_severity'],\"first_road_class\":AccRT['first_road_class']})\n",
-    "groups = severity_frame.groupby(\"severity\").groups\n",
-    "\n",
-    "# Etract individual groups\n",
-    "Fatal = AccRT['first_road_class'].to_numpy()[groups[\"Fatal\"]]\n",
-    "Serious = AccRT['first_road_class'].to_numpy()[groups[\"Serious\"]]\n",
-    "Slight = AccRT['first_road_class'].to_numpy()[groups[\"Slight\"]]\n",
-    "\n",
-    "\n",
-    "# Perform the ANOVA\n",
-    "print('severity~first_road_class')\n",
-    "print(stats.f_oneway(Fatal,Serious,Slight))\n",
-    "AccRT = Acc[['accident_severity', 'first_road_class']][:20000]\n",
-    "AccRT = AccRT.reset_index(drop=True)\n",
-    "from statsmodels.formula.api import ols\n",
-    "import statsmodels.api as sm\n",
-    "severity_frame = pd.DataFrame({\"severity\":AccRT['accident_severity'],\"first_road_class\":AccRT['first_road_class']})\n",
-    "severity_frame\n",
-    "model = ols('severity~first_road_class',                # Model formula\n",
-    "            data = severity_frame).fit()\n",
-    "                \n",
-    "anova_result = sm.stats.anova_lm(model, typ=2)\n",
-    "print (anova_result)\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 280,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "severity~road_type\n",
-      "F_onewayResult(statistic=0.5792752078545762, pvalue=0.5603137255090602)\n",
-      "                sum_sq       df         F    PR(>F)\n",
-      "road_type     0.121945      1.0  0.840706  0.359206\n",
-      "Residual   2900.725255  19998.0       NaN       NaN\n"
-     ]
-    }
-   ],
-   "source": [
-    "import numpy as np\n",
-    "import pandas as pd\n",
-    "import matplotlib.pyplot as plt\n",
-    "import scipy.stats as stats\n",
-    "np.random.seed(12)\n",
-    "AccRT = Acc[['accident_severity', 'road_type']][:20000]\n",
-    "AccRT = AccRT.reset_index(drop=True)\n",
-    "#AccRT\n",
-    "AccRT['accident_severity']=AccRT['accident_severity'].replace({1: 'Fatal', 2: 'Serious', 3: 'Slight'})\n",
-    "#AccRT\n",
-    "\n",
-    "\n",
-    "# Group age data by race\n",
-    "severity_frame = pd.DataFrame({\"severity\":AccRT['accident_severity'],\"road_type\":AccRT['road_type']})\n",
-    "groups = severity_frame.groupby(\"severity\").groups\n",
-    "\n",
-    "# Etract individual groups\n",
-    "Fatal = AccRT['road_type'].to_numpy()[groups[\"Fatal\"]]\n",
-    "Serious = AccRT['road_type'].to_numpy()[groups[\"Serious\"]]\n",
-    "Slight = AccRT['road_type'].to_numpy()[groups[\"Slight\"]]\n",
-    "\n",
-    "\n",
-    "# Perform the ANOVA\n",
-    "print('severity~road_type')\n",
-    "print(stats.f_oneway(Fatal,Serious,Slight))\n",
-    "AccRT = Acc[['accident_severity', 'road_type']][:20000]\n",
-    "AccRT = AccRT.reset_index(drop=True)\n",
-    "from statsmodels.formula.api import ols\n",
-    "import statsmodels.api as sm\n",
-    "severity_frame = pd.DataFrame({\"severity\":AccRT['accident_severity'],\"road_type\":AccRT['road_type']})\n",
-    "severity_frame\n",
-    "model = ols('severity~road_type',                # Model formula\n",
-    "            data = severity_frame).fit()\n",
-    "                \n",
-    "anova_result = sm.stats.anova_lm(model, typ=2)\n",
-    "print (anova_result)\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 60,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "                      sum_sq       df         F    PR(>F)\n",
-      "junction_detail     3.204045      1.0  22.11262  0.000003\n",
-      "Residual         2897.643155  19998.0       NaN       NaN\n"
-     ]
-    }
-   ],
-   "source": [
-    "import numpy as np\n",
-    "import pandas as pd\n",
-    "import matplotlib.pyplot as plt\n",
-    "import scipy.stats as stats\n",
-    "np.random.seed(12)\n",
-    "AccRT = Acc[['accident_severity', 'junction_detail']][:20000]\n",
-    "AccRT = AccRT.reset_index(drop=True)\n",
-    "#AccRT\n",
-    "AccRT['accident_severity']=AccRT['accident_severity'].replace({1: 'Fatal', 2: 'Serious', 3: 'Slight'})\n",
-    "\n",
-    "\n",
-    "\n",
-    "AccRT = Acc[['accident_severity', 'junction_detail']][:20000]\n",
-    "AccRT = AccRT.reset_index(drop=True)\n",
-    "from statsmodels.formula.api import ols\n",
-    "import statsmodels.api as sm\n",
-    "severity_frame = pd.DataFrame({\"severity\":AccRT['accident_severity'],\"junction_detail\":AccRT['junction_detail']})\n",
-    "severity_frame\n",
-    "model = ols('severity~junction_detail',                # Model formula\n",
-    "            data = severity_frame).fit()\n",
-    "                \n",
-    "anova_result = sm.stats.anova_lm(model, typ=2)\n",
-    "print (anova_result)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 63,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0         -1\n",
-       "1          2\n",
-       "2         -1\n",
-       "3         -1\n",
-       "4         -1\n",
-       "          ..\n",
-       "2287422   -1\n",
-       "2287423   -1\n",
-       "2287424    4\n",
-       "2287425    4\n",
-       "2287426   -1\n",
-       "Name: junction_control, Length: 2287427, dtype: int64"
-      ]
-     },
-     "execution_count": 63,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Acc['junction_control']"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 68,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "                         sum_sq         df             F  PR(>F)\n",
-      "junction_control    1756.967410        1.0  10523.832139     0.0\n",
-      "Residual          381888.567304  2287425.0           NaN     NaN\n"
-     ]
-    }
-   ],
-   "source": [
-    "import numpy as np\n",
-    "import pandas as pd\n",
-    "import matplotlib.pyplot as plt\n",
-    "import scipy.stats as stats\n",
-    "np.random.seed(12)\n",
-    "\n",
-    "\n",
-    "\n",
-    "# Group age data by \n",
-    "AccRT = Acc[['accident_severity', 'junction_control']]\n",
-    "AccRT = AccRT.reset_index(drop=True)\n",
-    "from statsmodels.formula.api import ols\n",
-    "import statsmodels.api as sm\n",
-    "severity_frame = pd.DataFrame({\"severity\":AccRT['accident_severity'],\"junction_control\":AccRT['junction_control']})\n",
-    "severity_frame\n",
-    "model = ols('severity~junction_control',                # Model formula\n",
-    "            data = severity_frame).fit()\n",
-    "                \n",
-    "anova_result = sm.stats.anova_lm(model, typ=2)\n",
-    "print (anova_result)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 71,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "['accident_index', 'accident_year', 'accident_reference', 'location_easting_osgr', 'location_northing_osgr', 'longitude', 'latitude', 'police_force', 'accident_severity', 'number_of_vehicles', 'number_of_casualties', 'date', 'day_of_week', 'time', 'local_authority_district', 'local_authority_ons_district', 'local_authority_highway', 'first_road_class', 'first_road_number', 'road_type', 'speed_limit', 'junction_detail', 'junction_control', 'second_road_class', 'second_road_number', 'pedestrian_crossing_human_control', 'pedestrian_crossing_physical_facilities', 'light_conditions', 'weather_conditions', 'road_surface_conditions', 'special_conditions_at_site', 'carriageway_hazards', 'urban_or_rural_area', 'did_police_officer_attend_scene_of_accident', 'trunk_road_flag', 'lsoa_of_accident_location']\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(Acc.columns.tolist())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 70,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "                    sum_sq         df          F        PR(>F)\n",
-      "day_of_week       4.208090        1.0  25.090335  5.471043e-07\n",
-      "Residual     383641.326624  2287425.0        NaN           NaN\n"
-     ]
-    }
-   ],
-   "source": [
-    "import numpy as np\n",
-    "import pandas as pd\n",
-    "import matplotlib.pyplot as plt\n",
-    "import scipy.stats as stats\n",
-    "np.random.seed(12)\n",
-    "\n",
-    "\n",
-    "\n",
-    "# Group age data by \n",
-    "AccRT = Acc[['accident_severity', 'day_of_week']]\n",
-    "AccRT = AccRT.reset_index(drop=True)\n",
-    "from statsmodels.formula.api import ols\n",
-    "import statsmodels.api as sm\n",
-    "severity_frame = pd.DataFrame({\"severity\":AccRT['accident_severity'],\"day_of_week\":AccRT['day_of_week']})\n",
-    "severity_frame\n",
-    "model = ols('severity~day_of_week',                # Model formula\n",
-    "            data = severity_frame).fit()\n",
-    "                \n",
-    "anova_result = sm.stats.anova_lm(model, typ=2)\n",
-    "print (anova_result)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 75,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "                         sum_sq         df            F  PR(>F)\n",
-      "light_conditions    1403.800907        1.0  8400.676862     0.0\n",
-      "Residual          382241.733807  2287425.0          NaN     NaN\n"
-     ]
-    }
-   ],
-   "source": [
-    "import numpy as np\n",
-    "import pandas as pd\n",
-    "import matplotlib.pyplot as plt\n",
-    "import scipy.stats as stats\n",
-    "np.random.seed(12)\n",
-    "\n",
-    "\n",
-    "\n",
-    "# Group age data by \n",
-    "AccRT = Acc[['accident_severity', 'light_conditions']]\n",
-    "AccRT = AccRT.reset_index(drop=True)\n",
-    "from statsmodels.formula.api import ols\n",
-    "import statsmodels.api as sm\n",
-    "severity_frame = pd.DataFrame({\"severity\":AccRT['accident_severity'],\"light_conditions\":AccRT['light_conditions']})\n",
-    "severity_frame\n",
-    "model = ols('severity~light_conditions',                # Model formula\n",
-    "            data = severity_frame).fit()\n",
-    "                \n",
-    "anova_result = sm.stats.anova_lm(model, typ=2)\n",
-    "print (anova_result)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 76,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "                           sum_sq         df            F         PR(>F)\n",
-      "weather_conditions     190.013728        1.0  1133.487794  2.002963e-248\n",
-      "Residual            383455.520986  2287425.0          NaN            NaN\n"
-     ]
-    }
-   ],
-   "source": [
-    "import numpy as np\n",
-    "import pandas as pd\n",
-    "import matplotlib.pyplot as plt\n",
-    "import scipy.stats as stats\n",
-    "np.random.seed(12)\n",
-    "\n",
-    "\n",
-    "\n",
-    "# Group age data by \n",
-    "AccRT = Acc[['accident_severity', 'weather_conditions']]\n",
-    "AccRT = AccRT.reset_index(drop=True)\n",
-    "from statsmodels.formula.api import ols\n",
-    "import statsmodels.api as sm\n",
-    "severity_frame = pd.DataFrame({\"severity\":AccRT['accident_severity'],\"weather_conditions\":AccRT['weather_conditions']})\n",
-    "severity_frame\n",
-    "model = ols('severity~weather_conditions',                # Model formula\n",
-    "            data = severity_frame).fit()\n",
-    "                \n",
-    "anova_result = sm.stats.anova_lm(model, typ=2)\n",
-    "print (anova_result)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pandas as pd\n",
-    "# import the usual suspects ...\n",
-    "import pandas as pd\n",
-    "import numpy as np\n",
-    "import glob\n",
-    "\n",
-    "import matplotlib.pyplot as plt\n",
-    "import seaborn as sns\n",
-    "vechilee = pd.read_csv ('/Users/Asfandyar/Downloads/archive/Vehicle_Information.csv',encoding= 'unicode_escape')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>accident_index</th>\n",
-       "      <th>accident_year</th>\n",
-       "      <th>accident_reference</th>\n",
-       "      <th>location_easting_osgr</th>\n",
-       "      <th>location_northing_osgr</th>\n",
-       "      <th>longitude</th>\n",
-       "      <th>latitude</th>\n",
-       "      <th>police_force</th>\n",
-       "      <th>accident_severity</th>\n",
-       "      <th>number_of_vehicles</th>\n",
-       "      <th>...</th>\n",
-       "      <th>pedestrian_crossing_physical_facilities</th>\n",
-       "      <th>light_conditions</th>\n",
-       "      <th>weather_conditions</th>\n",
-       "      <th>road_surface_conditions</th>\n",
-       "      <th>special_conditions_at_site</th>\n",
-       "      <th>carriageway_hazards</th>\n",
-       "      <th>urban_or_rural_area</th>\n",
-       "      <th>did_police_officer_attend_scene_of_accident</th>\n",
-       "      <th>trunk_road_flag</th>\n",
-       "      <th>lsoa_of_accident_location</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>6224198</th>\n",
-       "      <td>200501BS00001</td>\n",
-       "      <td>2005</td>\n",
-       "      <td>01BS00001</td>\n",
-       "      <td>525680.0</td>\n",
-       "      <td>178240.0</td>\n",
-       "      <td>-0.191170</td>\n",
-       "      <td>51.489096</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>1</td>\n",
-       "      <td>...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>E01002849</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6224199</th>\n",
-       "      <td>200501BS00002</td>\n",
-       "      <td>2005</td>\n",
-       "      <td>01BS00002</td>\n",
-       "      <td>524170.0</td>\n",
-       "      <td>181650.0</td>\n",
-       "      <td>-0.211708</td>\n",
-       "      <td>51.520075</td>\n",
-       "      <td>1</td>\n",
-       "      <td>3</td>\n",
-       "      <td>1</td>\n",
-       "      <td>...</td>\n",
-       "      <td>5</td>\n",
-       "      <td>4</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>E01002909</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6224200</th>\n",
-       "      <td>200501BS00003</td>\n",
-       "      <td>2005</td>\n",
-       "      <td>01BS00003</td>\n",
-       "      <td>524520.0</td>\n",
-       "      <td>182240.0</td>\n",
-       "      <td>-0.206458</td>\n",
-       "      <td>51.525301</td>\n",
-       "      <td>1</td>\n",
-       "      <td>3</td>\n",
-       "      <td>2</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>4</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>E01002857</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6224201</th>\n",
-       "      <td>200501BS00004</td>\n",
-       "      <td>2005</td>\n",
-       "      <td>01BS00004</td>\n",
-       "      <td>526900.0</td>\n",
-       "      <td>177530.0</td>\n",
-       "      <td>-0.173862</td>\n",
-       "      <td>51.482442</td>\n",
-       "      <td>1</td>\n",
-       "      <td>3</td>\n",
-       "      <td>1</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>E01002840</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6224202</th>\n",
-       "      <td>200501BS00005</td>\n",
-       "      <td>2005</td>\n",
-       "      <td>01BS00005</td>\n",
-       "      <td>528060.0</td>\n",
-       "      <td>179040.0</td>\n",
-       "      <td>-0.156618</td>\n",
-       "      <td>51.495752</td>\n",
-       "      <td>1</td>\n",
-       "      <td>3</td>\n",
-       "      <td>1</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>7</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>E01002863</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8511620</th>\n",
-       "      <td>2019984106919</td>\n",
-       "      <td>2019</td>\n",
-       "      <td>984106919</td>\n",
-       "      <td>312635.0</td>\n",
-       "      <td>573392.0</td>\n",
-       "      <td>-3.368899</td>\n",
-       "      <td>55.047323</td>\n",
-       "      <td>98</td>\n",
-       "      <td>3</td>\n",
-       "      <td>1</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>2</td>\n",
-       "      <td>1</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8511621</th>\n",
-       "      <td>2019984107019</td>\n",
-       "      <td>2019</td>\n",
-       "      <td>984107019</td>\n",
-       "      <td>337522.0</td>\n",
-       "      <td>591682.0</td>\n",
-       "      <td>-2.983499</td>\n",
-       "      <td>55.215407</td>\n",
-       "      <td>98</td>\n",
-       "      <td>3</td>\n",
-       "      <td>4</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>2</td>\n",
-       "      <td>1</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8511622</th>\n",
-       "      <td>2019984107219</td>\n",
-       "      <td>2019</td>\n",
-       "      <td>984107219</td>\n",
-       "      <td>318544.0</td>\n",
-       "      <td>567087.0</td>\n",
-       "      <td>-3.274645</td>\n",
-       "      <td>54.991685</td>\n",
-       "      <td>98</td>\n",
-       "      <td>3</td>\n",
-       "      <td>2</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8511623</th>\n",
-       "      <td>2019984107419</td>\n",
-       "      <td>2019</td>\n",
-       "      <td>984107419</td>\n",
-       "      <td>336525.0</td>\n",
-       "      <td>584226.0</td>\n",
-       "      <td>-2.997491</td>\n",
-       "      <td>55.148292</td>\n",
-       "      <td>98</td>\n",
-       "      <td>3</td>\n",
-       "      <td>1</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8511624</th>\n",
-       "      <td>201998QC01004</td>\n",
-       "      <td>2019</td>\n",
-       "      <td>98QC01004</td>\n",
-       "      <td>291367.0</td>\n",
-       "      <td>608364.0</td>\n",
-       "      <td>-3.715064</td>\n",
-       "      <td>55.357237</td>\n",
-       "      <td>98</td>\n",
-       "      <td>2</td>\n",
-       "      <td>1</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>2</td>\n",
-       "      <td>1</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>2287427 rows × 36 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "        accident_index  accident_year accident_reference  \\\n",
-       "6224198  200501BS00001           2005          01BS00001   \n",
-       "6224199  200501BS00002           2005          01BS00002   \n",
-       "6224200  200501BS00003           2005          01BS00003   \n",
-       "6224201  200501BS00004           2005          01BS00004   \n",
-       "6224202  200501BS00005           2005          01BS00005   \n",
-       "...                ...            ...                ...   \n",
-       "8511620  2019984106919           2019          984106919   \n",
-       "8511621  2019984107019           2019          984107019   \n",
-       "8511622  2019984107219           2019          984107219   \n",
-       "8511623  2019984107419           2019          984107419   \n",
-       "8511624  201998QC01004           2019          98QC01004   \n",
-       "\n",
-       "         location_easting_osgr  location_northing_osgr  longitude   latitude  \\\n",
-       "6224198               525680.0                178240.0  -0.191170  51.489096   \n",
-       "6224199               524170.0                181650.0  -0.211708  51.520075   \n",
-       "6224200               524520.0                182240.0  -0.206458  51.525301   \n",
-       "6224201               526900.0                177530.0  -0.173862  51.482442   \n",
-       "6224202               528060.0                179040.0  -0.156618  51.495752   \n",
-       "...                        ...                     ...        ...        ...   \n",
-       "8511620               312635.0                573392.0  -3.368899  55.047323   \n",
-       "8511621               337522.0                591682.0  -2.983499  55.215407   \n",
-       "8511622               318544.0                567087.0  -3.274645  54.991685   \n",
-       "8511623               336525.0                584226.0  -2.997491  55.148292   \n",
-       "8511624               291367.0                608364.0  -3.715064  55.357237   \n",
-       "\n",
-       "         police_force  accident_severity  number_of_vehicles  ...  \\\n",
-       "6224198             1                  2                   1  ...   \n",
-       "6224199             1                  3                   1  ...   \n",
-       "6224200             1                  3                   2  ...   \n",
-       "6224201             1                  3                   1  ...   \n",
-       "6224202             1                  3                   1  ...   \n",
-       "...               ...                ...                 ...  ...   \n",
-       "8511620            98                  3                   1  ...   \n",
-       "8511621            98                  3                   4  ...   \n",
-       "8511622            98                  3                   2  ...   \n",
-       "8511623            98                  3                   1  ...   \n",
-       "8511624            98                  2                   1  ...   \n",
-       "\n",
-       "         pedestrian_crossing_physical_facilities light_conditions  \\\n",
-       "6224198                                        1                1   \n",
-       "6224199                                        5                4   \n",
-       "6224200                                        0                4   \n",
-       "6224201                                        0                1   \n",
-       "6224202                                        0                7   \n",
-       "...                                          ...              ...   \n",
-       "8511620                                        0                1   \n",
-       "8511621                                        0                1   \n",
-       "8511622                                        0                1   \n",
-       "8511623                                        0                1   \n",
-       "8511624                                        0                1   \n",
-       "\n",
-       "         weather_conditions road_surface_conditions  \\\n",
-       "6224198                   2                       2   \n",
-       "6224199                   1                       1   \n",
-       "6224200                   1                       1   \n",
-       "6224201                   1                       1   \n",
-       "6224202                   1                       2   \n",
-       "...                     ...                     ...   \n",
-       "8511620                   1                       2   \n",
-       "8511621                   1                       2   \n",
-       "8511622                   1                       1   \n",
-       "8511623                   1                       1   \n",
-       "8511624                   1                       1   \n",
-       "\n",
-       "         special_conditions_at_site carriageway_hazards urban_or_rural_area  \\\n",
-       "6224198                           0                   0                   1   \n",
-       "6224199                           0                   0                   1   \n",
-       "6224200                           0                   0                   1   \n",
-       "6224201                           0                   0                   1   \n",
-       "6224202                           0                   0                   1   \n",
-       "...                             ...                 ...                 ...   \n",
-       "8511620                           0                   0                   2   \n",
-       "8511621                           0                   0                   2   \n",
-       "8511622                           0                   0                   2   \n",
-       "8511623                           0                   0                   2   \n",
-       "8511624                           0                   0                   2   \n",
-       "\n",
-       "         did_police_officer_attend_scene_of_accident  trunk_road_flag  \\\n",
-       "6224198                                            1                2   \n",
-       "6224199                                            1                2   \n",
-       "6224200                                            1                2   \n",
-       "6224201                                            1                2   \n",
-       "6224202                                            1                2   \n",
-       "...                                              ...              ...   \n",
-       "8511620                                            1               -1   \n",
-       "8511621                                            1               -1   \n",
-       "8511622                                            2               -1   \n",
-       "8511623                                            2               -1   \n",
-       "8511624                                            1               -1   \n",
-       "\n",
-       "         lsoa_of_accident_location  \n",
-       "6224198                  E01002849  \n",
-       "6224199                  E01002909  \n",
-       "6224200                  E01002857  \n",
-       "6224201                  E01002840  \n",
-       "6224202                  E01002863  \n",
-       "...                            ...  \n",
-       "8511620                         -1  \n",
-       "8511621                         -1  \n",
-       "8511622                         -1  \n",
-       "8511623                         -1  \n",
-       "8511624                         -1  \n",
-       "\n",
-       "[2287427 rows x 36 columns]"
-      ]
-     },
-     "execution_count": 13,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Acc"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "vechilee = vechilee.rename({'Accident_Index': 'accident_index'}, axis=1)\n",
-    "Acc = Acc.rename({'accident_index': 'accident_index'}, axis=1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "\n",
-    "mask = (vechilee['Year'] > 2004)\n",
-    "vechilee=vechilee.loc[mask]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 57,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0          3\n",
-       "1          4\n",
-       "2          5\n",
-       "3          6\n",
-       "4          2\n",
-       "          ..\n",
-       "2287422    7\n",
-       "2287423    5\n",
-       "2287424    6\n",
-       "2287425    7\n",
-       "2287426    1\n",
-       "Name: day_of_week, Length: 2287427, dtype: int64"
-      ]
-     },
-     "execution_count": 57,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Acc['day_of_week']"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>accident_index</th>\n",
-       "      <th>accident_year</th>\n",
-       "      <th>accident_reference</th>\n",
-       "      <th>location_easting_osgr</th>\n",
-       "      <th>location_northing_osgr</th>\n",
-       "      <th>longitude</th>\n",
-       "      <th>latitude</th>\n",
-       "      <th>police_force</th>\n",
-       "      <th>accident_severity</th>\n",
-       "      <th>number_of_vehicles</th>\n",
-       "      <th>...</th>\n",
-       "      <th>pedestrian_crossing_physical_facilities</th>\n",
-       "      <th>light_conditions</th>\n",
-       "      <th>weather_conditions</th>\n",
-       "      <th>road_surface_conditions</th>\n",
-       "      <th>special_conditions_at_site</th>\n",
-       "      <th>carriageway_hazards</th>\n",
-       "      <th>urban_or_rural_area</th>\n",
-       "      <th>did_police_officer_attend_scene_of_accident</th>\n",
-       "      <th>trunk_road_flag</th>\n",
-       "      <th>lsoa_of_accident_location</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>200501BS00001</td>\n",
-       "      <td>2005</td>\n",
-       "      <td>01BS00001</td>\n",
-       "      <td>525680.0</td>\n",
-       "      <td>178240.0</td>\n",
-       "      <td>-0.191170</td>\n",
-       "      <td>51.489096</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>1</td>\n",
-       "      <td>...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>E01002849</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>200501BS00002</td>\n",
-       "      <td>2005</td>\n",
-       "      <td>01BS00002</td>\n",
-       "      <td>524170.0</td>\n",
-       "      <td>181650.0</td>\n",
-       "      <td>-0.211708</td>\n",
-       "      <td>51.520075</td>\n",
-       "      <td>1</td>\n",
-       "      <td>3</td>\n",
-       "      <td>1</td>\n",
-       "      <td>...</td>\n",
-       "      <td>5</td>\n",
-       "      <td>4</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>E01002909</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>200501BS00003</td>\n",
-       "      <td>2005</td>\n",
-       "      <td>01BS00003</td>\n",
-       "      <td>524520.0</td>\n",
-       "      <td>182240.0</td>\n",
-       "      <td>-0.206458</td>\n",
-       "      <td>51.525301</td>\n",
-       "      <td>1</td>\n",
-       "      <td>3</td>\n",
-       "      <td>2</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>4</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>E01002857</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>200501BS00004</td>\n",
-       "      <td>2005</td>\n",
-       "      <td>01BS00004</td>\n",
-       "      <td>526900.0</td>\n",
-       "      <td>177530.0</td>\n",
-       "      <td>-0.173862</td>\n",
-       "      <td>51.482442</td>\n",
-       "      <td>1</td>\n",
-       "      <td>3</td>\n",
-       "      <td>1</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>E01002840</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>200501BS00005</td>\n",
-       "      <td>2005</td>\n",
-       "      <td>01BS00005</td>\n",
-       "      <td>528060.0</td>\n",
-       "      <td>179040.0</td>\n",
-       "      <td>-0.156618</td>\n",
-       "      <td>51.495752</td>\n",
-       "      <td>1</td>\n",
-       "      <td>3</td>\n",
-       "      <td>1</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>7</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>E01002863</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2287422</th>\n",
-       "      <td>2019984106919</td>\n",
-       "      <td>2019</td>\n",
-       "      <td>984106919</td>\n",
-       "      <td>312635.0</td>\n",
-       "      <td>573392.0</td>\n",
-       "      <td>-3.368899</td>\n",
-       "      <td>55.047323</td>\n",
-       "      <td>98</td>\n",
-       "      <td>3</td>\n",
-       "      <td>1</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>2</td>\n",
-       "      <td>1</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2287423</th>\n",
-       "      <td>2019984107019</td>\n",
-       "      <td>2019</td>\n",
-       "      <td>984107019</td>\n",
-       "      <td>337522.0</td>\n",
-       "      <td>591682.0</td>\n",
-       "      <td>-2.983499</td>\n",
-       "      <td>55.215407</td>\n",
-       "      <td>98</td>\n",
-       "      <td>3</td>\n",
-       "      <td>4</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>2</td>\n",
-       "      <td>1</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2287424</th>\n",
-       "      <td>2019984107219</td>\n",
-       "      <td>2019</td>\n",
-       "      <td>984107219</td>\n",
-       "      <td>318544.0</td>\n",
-       "      <td>567087.0</td>\n",
-       "      <td>-3.274645</td>\n",
-       "      <td>54.991685</td>\n",
-       "      <td>98</td>\n",
-       "      <td>3</td>\n",
-       "      <td>2</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2287425</th>\n",
-       "      <td>2019984107419</td>\n",
-       "      <td>2019</td>\n",
-       "      <td>984107419</td>\n",
-       "      <td>336525.0</td>\n",
-       "      <td>584226.0</td>\n",
-       "      <td>-2.997491</td>\n",
-       "      <td>55.148292</td>\n",
-       "      <td>98</td>\n",
-       "      <td>3</td>\n",
-       "      <td>1</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2287426</th>\n",
-       "      <td>201998QC01004</td>\n",
-       "      <td>2019</td>\n",
-       "      <td>98QC01004</td>\n",
-       "      <td>291367.0</td>\n",
-       "      <td>608364.0</td>\n",
-       "      <td>-3.715064</td>\n",
-       "      <td>55.357237</td>\n",
-       "      <td>98</td>\n",
-       "      <td>2</td>\n",
-       "      <td>1</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>2</td>\n",
-       "      <td>1</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>2287427 rows × 36 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "        accident_index  accident_year accident_reference  \\\n",
-       "0        200501BS00001           2005          01BS00001   \n",
-       "1        200501BS00002           2005          01BS00002   \n",
-       "2        200501BS00003           2005          01BS00003   \n",
-       "3        200501BS00004           2005          01BS00004   \n",
-       "4        200501BS00005           2005          01BS00005   \n",
-       "...                ...            ...                ...   \n",
-       "2287422  2019984106919           2019          984106919   \n",
-       "2287423  2019984107019           2019          984107019   \n",
-       "2287424  2019984107219           2019          984107219   \n",
-       "2287425  2019984107419           2019          984107419   \n",
-       "2287426  201998QC01004           2019          98QC01004   \n",
-       "\n",
-       "         location_easting_osgr  location_northing_osgr  longitude   latitude  \\\n",
-       "0                     525680.0                178240.0  -0.191170  51.489096   \n",
-       "1                     524170.0                181650.0  -0.211708  51.520075   \n",
-       "2                     524520.0                182240.0  -0.206458  51.525301   \n",
-       "3                     526900.0                177530.0  -0.173862  51.482442   \n",
-       "4                     528060.0                179040.0  -0.156618  51.495752   \n",
-       "...                        ...                     ...        ...        ...   \n",
-       "2287422               312635.0                573392.0  -3.368899  55.047323   \n",
-       "2287423               337522.0                591682.0  -2.983499  55.215407   \n",
-       "2287424               318544.0                567087.0  -3.274645  54.991685   \n",
-       "2287425               336525.0                584226.0  -2.997491  55.148292   \n",
-       "2287426               291367.0                608364.0  -3.715064  55.357237   \n",
-       "\n",
-       "         police_force  accident_severity  number_of_vehicles  ...  \\\n",
-       "0                   1                  2                   1  ...   \n",
-       "1                   1                  3                   1  ...   \n",
-       "2                   1                  3                   2  ...   \n",
-       "3                   1                  3                   1  ...   \n",
-       "4                   1                  3                   1  ...   \n",
-       "...               ...                ...                 ...  ...   \n",
-       "2287422            98                  3                   1  ...   \n",
-       "2287423            98                  3                   4  ...   \n",
-       "2287424            98                  3                   2  ...   \n",
-       "2287425            98                  3                   1  ...   \n",
-       "2287426            98                  2                   1  ...   \n",
-       "\n",
-       "         pedestrian_crossing_physical_facilities light_conditions  \\\n",
-       "0                                              1                1   \n",
-       "1                                              5                4   \n",
-       "2                                              0                4   \n",
-       "3                                              0                1   \n",
-       "4                                              0                7   \n",
-       "...                                          ...              ...   \n",
-       "2287422                                        0                1   \n",
-       "2287423                                        0                1   \n",
-       "2287424                                        0                1   \n",
-       "2287425                                        0                1   \n",
-       "2287426                                        0                1   \n",
-       "\n",
-       "         weather_conditions road_surface_conditions  \\\n",
-       "0                         2                       2   \n",
-       "1                         1                       1   \n",
-       "2                         1                       1   \n",
-       "3                         1                       1   \n",
-       "4                         1                       2   \n",
-       "...                     ...                     ...   \n",
-       "2287422                   1                       2   \n",
-       "2287423                   1                       2   \n",
-       "2287424                   1                       1   \n",
-       "2287425                   1                       1   \n",
-       "2287426                   1                       1   \n",
-       "\n",
-       "         special_conditions_at_site carriageway_hazards urban_or_rural_area  \\\n",
-       "0                                 0                   0                   1   \n",
-       "1                                 0                   0                   1   \n",
-       "2                                 0                   0                   1   \n",
-       "3                                 0                   0                   1   \n",
-       "4                                 0                   0                   1   \n",
-       "...                             ...                 ...                 ...   \n",
-       "2287422                           0                   0                   2   \n",
-       "2287423                           0                   0                   2   \n",
-       "2287424                           0                   0                   2   \n",
-       "2287425                           0                   0                   2   \n",
-       "2287426                           0                   0                   2   \n",
-       "\n",
-       "         did_police_officer_attend_scene_of_accident  trunk_road_flag  \\\n",
-       "0                                                  1                2   \n",
-       "1                                                  1                2   \n",
-       "2                                                  1                2   \n",
-       "3                                                  1                2   \n",
-       "4                                                  1                2   \n",
-       "...                                              ...              ...   \n",
-       "2287422                                            1               -1   \n",
-       "2287423                                            1               -1   \n",
-       "2287424                                            2               -1   \n",
-       "2287425                                            2               -1   \n",
-       "2287426                                            1               -1   \n",
-       "\n",
-       "         lsoa_of_accident_location  \n",
-       "0                        E01002849  \n",
-       "1                        E01002909  \n",
-       "2                        E01002857  \n",
-       "3                        E01002840  \n",
-       "4                        E01002863  \n",
-       "...                            ...  \n",
-       "2287422                         -1  \n",
-       "2287423                         -1  \n",
-       "2287424                         -1  \n",
-       "2287425                         -1  \n",
-       "2287426                         -1  \n",
-       "\n",
-       "[2287427 rows x 36 columns]"
-      ]
-     },
-     "execution_count": 23,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Acc = Acc.reset_index(drop=True)\n",
-    "Acc\n",
-    "Acc"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 37,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>accident_index</th>\n",
-       "      <th>Age_Band_of_Driver</th>\n",
-       "      <th>Age_of_Vehicle</th>\n",
-       "      <th>Driver_Home_Area_Type</th>\n",
-       "      <th>Driver_IMD_Decile</th>\n",
-       "      <th>Engine_Capacity_.CC.</th>\n",
-       "      <th>Hit_Object_in_Carriageway</th>\n",
-       "      <th>Hit_Object_off_Carriageway</th>\n",
-       "      <th>Journey_Purpose_of_Driver</th>\n",
-       "      <th>Junction_Location</th>\n",
-       "      <th>...</th>\n",
-       "      <th>Towing_and_Articulation</th>\n",
-       "      <th>Vehicle_Leaving_Carriageway</th>\n",
-       "      <th>Vehicle_Location.Restricted_Lane</th>\n",
-       "      <th>Vehicle_Manoeuvre</th>\n",
-       "      <th>Vehicle_Reference</th>\n",
-       "      <th>Vehicle_Type</th>\n",
-       "      <th>Was_Vehicle_Left_Hand_Drive</th>\n",
-       "      <th>X1st_Point_of_Impact</th>\n",
-       "      <th>Year</th>\n",
-       "      <th>accident_severity</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>200501BS00002</td>\n",
-       "      <td>36 - 45</td>\n",
-       "      <td>3.0</td>\n",
-       "      <td>Data missing or out of range</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>8268.0</td>\n",
-       "      <td>None</td>\n",
-       "      <td>None</td>\n",
-       "      <td>Journey as part of work</td>\n",
-       "      <td>Leaving roundabout</td>\n",
-       "      <td>...</td>\n",
-       "      <td>No tow/articulation</td>\n",
-       "      <td>Did not leave carriageway</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>Slowing or stopping</td>\n",
-       "      <td>1</td>\n",
-       "      <td>Bus or coach (17 or more pass seats)</td>\n",
-       "      <td>No</td>\n",
-       "      <td>Nearside</td>\n",
-       "      <td>2005</td>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>200501BS00003</td>\n",
-       "      <td>26 - 35</td>\n",
-       "      <td>5.0</td>\n",
-       "      <td>Urban area</td>\n",
-       "      <td>3.0</td>\n",
-       "      <td>8300.0</td>\n",
-       "      <td>Parked vehicle</td>\n",
-       "      <td>None</td>\n",
-       "      <td>Journey as part of work</td>\n",
-       "      <td>Not at or within 20 metres of junction</td>\n",
-       "      <td>...</td>\n",
-       "      <td>No tow/articulation</td>\n",
-       "      <td>Did not leave carriageway</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>Going ahead right-hand bend</td>\n",
-       "      <td>1</td>\n",
-       "      <td>Bus or coach (17 or more pass seats)</td>\n",
-       "      <td>No</td>\n",
-       "      <td>Nearside</td>\n",
-       "      <td>2005</td>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>200501BS00004</td>\n",
-       "      <td>46 - 55</td>\n",
-       "      <td>4.0</td>\n",
-       "      <td>Urban area</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1769.0</td>\n",
-       "      <td>None</td>\n",
-       "      <td>None</td>\n",
-       "      <td>Other/Not known (2005-10)</td>\n",
-       "      <td>Not at or within 20 metres of junction</td>\n",
-       "      <td>...</td>\n",
-       "      <td>No tow/articulation</td>\n",
-       "      <td>Did not leave carriageway</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>Going ahead other</td>\n",
-       "      <td>1</td>\n",
-       "      <td>Car</td>\n",
-       "      <td>No</td>\n",
-       "      <td>Front</td>\n",
-       "      <td>2005</td>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>200501BS00005</td>\n",
-       "      <td>46 - 55</td>\n",
-       "      <td>10.0</td>\n",
-       "      <td>Data missing or out of range</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>85.0</td>\n",
-       "      <td>Kerb</td>\n",
-       "      <td>None</td>\n",
-       "      <td>Other/Not known (2005-10)</td>\n",
-       "      <td>Not at or within 20 metres of junction</td>\n",
-       "      <td>...</td>\n",
-       "      <td>No tow/articulation</td>\n",
-       "      <td>Did not leave carriageway</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>Going ahead other</td>\n",
-       "      <td>1</td>\n",
-       "      <td>Motorcycle 125cc and under</td>\n",
-       "      <td>No</td>\n",
-       "      <td>Front</td>\n",
-       "      <td>2005</td>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>200501BS00006</td>\n",
-       "      <td>46 - 55</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>Urban area</td>\n",
-       "      <td>4.0</td>\n",
-       "      <td>2976.0</td>\n",
-       "      <td>None</td>\n",
-       "      <td>None</td>\n",
-       "      <td>Other/Not known (2005-10)</td>\n",
-       "      <td>Not at or within 20 metres of junction</td>\n",
-       "      <td>...</td>\n",
-       "      <td>No tow/articulation</td>\n",
-       "      <td>Did not leave carriageway</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>Moving off</td>\n",
-       "      <td>1</td>\n",
-       "      <td>Car</td>\n",
-       "      <td>No</td>\n",
-       "      <td>Did not impact</td>\n",
-       "      <td>2005</td>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2027639</th>\n",
-       "      <td>2016984131116</td>\n",
-       "      <td>21 - 25</td>\n",
-       "      <td>14.0</td>\n",
-       "      <td>Urban area</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>1598.0</td>\n",
-       "      <td>None</td>\n",
-       "      <td>None</td>\n",
-       "      <td>Not known</td>\n",
-       "      <td>Mid Junction - on roundabout or on main road</td>\n",
-       "      <td>...</td>\n",
-       "      <td>No tow/articulation</td>\n",
-       "      <td>Did not leave carriageway</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>Going ahead other</td>\n",
-       "      <td>1</td>\n",
-       "      <td>Car</td>\n",
-       "      <td>No</td>\n",
-       "      <td>Front</td>\n",
-       "      <td>2016</td>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2027640</th>\n",
-       "      <td>2016984131116</td>\n",
-       "      <td>56 - 65</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>Small town</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>1598.0</td>\n",
-       "      <td>None</td>\n",
-       "      <td>None</td>\n",
-       "      <td>Commuting to/from work</td>\n",
-       "      <td>Mid Junction - on roundabout or on main road</td>\n",
-       "      <td>...</td>\n",
-       "      <td>No tow/articulation</td>\n",
-       "      <td>Offside</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>Going ahead other</td>\n",
-       "      <td>2</td>\n",
-       "      <td>Car</td>\n",
-       "      <td>No</td>\n",
-       "      <td>Front</td>\n",
-       "      <td>2016</td>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2027641</th>\n",
-       "      <td>2016984131216</td>\n",
-       "      <td>56 - 65</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>Urban area</td>\n",
-       "      <td>3.0</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>None</td>\n",
-       "      <td>Central crash barrier</td>\n",
-       "      <td>Journey as part of work</td>\n",
-       "      <td>Not at or within 20 metres of junction</td>\n",
-       "      <td>...</td>\n",
-       "      <td>No tow/articulation</td>\n",
-       "      <td>Offside on to central reservation</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>Going ahead other</td>\n",
-       "      <td>1</td>\n",
-       "      <td>Goods 7.5 tonnes mgw and over</td>\n",
-       "      <td>No</td>\n",
-       "      <td>Offside</td>\n",
-       "      <td>2016</td>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2027642</th>\n",
-       "      <td>2016984131316</td>\n",
-       "      <td>16 - 20</td>\n",
-       "      <td>13.0</td>\n",
-       "      <td>Urban area</td>\n",
-       "      <td>6.0</td>\n",
-       "      <td>1796.0</td>\n",
-       "      <td>None</td>\n",
-       "      <td>Tree</td>\n",
-       "      <td>Not known</td>\n",
-       "      <td>Not at or within 20 metres of junction</td>\n",
-       "      <td>...</td>\n",
-       "      <td>No tow/articulation</td>\n",
-       "      <td>Offside</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>Going ahead other</td>\n",
-       "      <td>1</td>\n",
-       "      <td>Car</td>\n",
-       "      <td>No</td>\n",
-       "      <td>Front</td>\n",
-       "      <td>2016</td>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2027643</th>\n",
-       "      <td>2016984133416</td>\n",
-       "      <td>46 - 55</td>\n",
-       "      <td>5.0</td>\n",
-       "      <td>Urban area</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>2184.0</td>\n",
-       "      <td>None</td>\n",
-       "      <td>Wall or fence</td>\n",
-       "      <td>Not known</td>\n",
-       "      <td>Not at or within 20 metres of junction</td>\n",
-       "      <td>...</td>\n",
-       "      <td>No tow/articulation</td>\n",
-       "      <td>Nearside</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>Going ahead other</td>\n",
-       "      <td>1</td>\n",
-       "      <td>Car</td>\n",
-       "      <td>No</td>\n",
-       "      <td>Nearside</td>\n",
-       "      <td>2016</td>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>2027644 rows × 25 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "        accident_index Age_Band_of_Driver  Age_of_Vehicle  \\\n",
-       "0        200501BS00002            36 - 45             3.0   \n",
-       "1        200501BS00003            26 - 35             5.0   \n",
-       "2        200501BS00004            46 - 55             4.0   \n",
-       "3        200501BS00005            46 - 55            10.0   \n",
-       "4        200501BS00006            46 - 55             1.0   \n",
-       "...                ...                ...             ...   \n",
-       "2027639  2016984131116            21 - 25            14.0   \n",
-       "2027640  2016984131116            56 - 65             NaN   \n",
-       "2027641  2016984131216            56 - 65             NaN   \n",
-       "2027642  2016984131316            16 - 20            13.0   \n",
-       "2027643  2016984133416            46 - 55             5.0   \n",
-       "\n",
-       "                Driver_Home_Area_Type  Driver_IMD_Decile  \\\n",
-       "0        Data missing or out of range                NaN   \n",
-       "1                          Urban area                3.0   \n",
-       "2                          Urban area                1.0   \n",
-       "3        Data missing or out of range                NaN   \n",
-       "4                          Urban area                4.0   \n",
-       "...                               ...                ...   \n",
-       "2027639                    Urban area                NaN   \n",
-       "2027640                    Small town                NaN   \n",
-       "2027641                    Urban area                3.0   \n",
-       "2027642                    Urban area                6.0   \n",
-       "2027643                    Urban area                NaN   \n",
-       "\n",
-       "         Engine_Capacity_.CC. Hit_Object_in_Carriageway  \\\n",
-       "0                      8268.0                      None   \n",
-       "1                      8300.0            Parked vehicle   \n",
-       "2                      1769.0                      None   \n",
-       "3                        85.0                      Kerb   \n",
-       "4                      2976.0                      None   \n",
-       "...                       ...                       ...   \n",
-       "2027639                1598.0                      None   \n",
-       "2027640                1598.0                      None   \n",
-       "2027641                   NaN                      None   \n",
-       "2027642                1796.0                      None   \n",
-       "2027643                2184.0                      None   \n",
-       "\n",
-       "        Hit_Object_off_Carriageway  Journey_Purpose_of_Driver  \\\n",
-       "0                             None    Journey as part of work   \n",
-       "1                             None    Journey as part of work   \n",
-       "2                             None  Other/Not known (2005-10)   \n",
-       "3                             None  Other/Not known (2005-10)   \n",
-       "4                             None  Other/Not known (2005-10)   \n",
-       "...                            ...                        ...   \n",
-       "2027639                       None                  Not known   \n",
-       "2027640                       None     Commuting to/from work   \n",
-       "2027641      Central crash barrier    Journey as part of work   \n",
-       "2027642                       Tree                  Not known   \n",
-       "2027643              Wall or fence                  Not known   \n",
-       "\n",
-       "                                    Junction_Location  ...  \\\n",
-       "0                                  Leaving roundabout  ...   \n",
-       "1              Not at or within 20 metres of junction  ...   \n",
-       "2              Not at or within 20 metres of junction  ...   \n",
-       "3              Not at or within 20 metres of junction  ...   \n",
-       "4              Not at or within 20 metres of junction  ...   \n",
-       "...                                               ...  ...   \n",
-       "2027639  Mid Junction - on roundabout or on main road  ...   \n",
-       "2027640  Mid Junction - on roundabout or on main road  ...   \n",
-       "2027641        Not at or within 20 metres of junction  ...   \n",
-       "2027642        Not at or within 20 metres of junction  ...   \n",
-       "2027643        Not at or within 20 metres of junction  ...   \n",
-       "\n",
-       "        Towing_and_Articulation        Vehicle_Leaving_Carriageway  \\\n",
-       "0           No tow/articulation          Did not leave carriageway   \n",
-       "1           No tow/articulation          Did not leave carriageway   \n",
-       "2           No tow/articulation          Did not leave carriageway   \n",
-       "3           No tow/articulation          Did not leave carriageway   \n",
-       "4           No tow/articulation          Did not leave carriageway   \n",
-       "...                         ...                                ...   \n",
-       "2027639     No tow/articulation          Did not leave carriageway   \n",
-       "2027640     No tow/articulation                            Offside   \n",
-       "2027641     No tow/articulation  Offside on to central reservation   \n",
-       "2027642     No tow/articulation                            Offside   \n",
-       "2027643     No tow/articulation                           Nearside   \n",
-       "\n",
-       "        Vehicle_Location.Restricted_Lane            Vehicle_Manoeuvre  \\\n",
-       "0                                    0.0          Slowing or stopping   \n",
-       "1                                    0.0  Going ahead right-hand bend   \n",
-       "2                                    0.0            Going ahead other   \n",
-       "3                                    0.0            Going ahead other   \n",
-       "4                                    0.0                   Moving off   \n",
-       "...                                  ...                          ...   \n",
-       "2027639                              0.0            Going ahead other   \n",
-       "2027640                              0.0            Going ahead other   \n",
-       "2027641                              0.0            Going ahead other   \n",
-       "2027642                              0.0            Going ahead other   \n",
-       "2027643                              0.0            Going ahead other   \n",
-       "\n",
-       "        Vehicle_Reference                          Vehicle_Type  \\\n",
-       "0                       1  Bus or coach (17 or more pass seats)   \n",
-       "1                       1  Bus or coach (17 or more pass seats)   \n",
-       "2                       1                                   Car   \n",
-       "3                       1            Motorcycle 125cc and under   \n",
-       "4                       1                                   Car   \n",
-       "...                   ...                                   ...   \n",
-       "2027639                 1                                   Car   \n",
-       "2027640                 2                                   Car   \n",
-       "2027641                 1         Goods 7.5 tonnes mgw and over   \n",
-       "2027642                 1                                   Car   \n",
-       "2027643                 1                                   Car   \n",
-       "\n",
-       "        Was_Vehicle_Left_Hand_Drive  X1st_Point_of_Impact  Year  \\\n",
-       "0                                No              Nearside  2005   \n",
-       "1                                No              Nearside  2005   \n",
-       "2                                No                 Front  2005   \n",
-       "3                                No                 Front  2005   \n",
-       "4                                No        Did not impact  2005   \n",
-       "...                             ...                   ...   ...   \n",
-       "2027639                          No                 Front  2016   \n",
-       "2027640                          No                 Front  2016   \n",
-       "2027641                          No               Offside  2016   \n",
-       "2027642                          No                 Front  2016   \n",
-       "2027643                          No              Nearside  2016   \n",
-       "\n",
-       "         accident_severity  \n",
-       "0                        3  \n",
-       "1                        3  \n",
-       "2                        3  \n",
-       "3                        3  \n",
-       "4                        3  \n",
-       "...                    ...  \n",
-       "2027639                  3  \n",
-       "2027640                  3  \n",
-       "2027641                  3  \n",
-       "2027642                  3  \n",
-       "2027643                  3  \n",
-       "\n",
-       "[2027644 rows x 25 columns]"
-      ]
-     },
-     "execution_count": 37,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "vech23= pd.merge(vechilee,Acc[['accident_index','accident_severity']],on='accident_index')\n",
-    "\n",
-    "vech23"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 53,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "                   sum_sq       df          F        PR(>F)\n",
-      "Vehicle_Type     7.737798      1.0  56.955431  4.648715e-14\n",
-      "Residual      2716.869752  19998.0        NaN           NaN\n"
-     ]
-    }
-   ],
-   "source": [
-    "import numpy as np\n",
-    "import pandas as pd\n",
-    "import matplotlib.pyplot as plt\n",
-    "import scipy.stats as stats\n",
-    "np.random.seed(12)\n",
-    "vech2= vech23[['accident_severity', 'Vehicle_Type']]\n",
-    "vech2 = vech2.reset_index(drop=True)\n",
-    "vech2[\"Vehicle_Type\"] = vech2[\"Vehicle_Type\"].astype('category')\n",
-    "vech2[\"Vehicle_Type\"] = vech2[\"Vehicle_Type\"].cat.codes\n",
-    "vech2 = vech2.dropna()\n",
-    "vech2\n",
-    "vech2 = vech2[['accident_severity', 'Vehicle_Type']]\n",
-    "vech2 = vech2.reset_index(drop=True)\n",
-    "#AccRT\n",
-    "#vech2['accident_severity']=vech2['accident_severity'].replace({1: 'Fatal', 2: 'Serious', 3: 'Slight'})\n",
-    "#AccRT\n",
-    "\n",
-    "\n",
-    "# Group age data by ra\n",
-    "\n",
-    "\n",
-    "vech2 = vech2[['accident_severity', 'Vehicle_Type']][:20000]\n",
-    "vech2 = vech2.reset_index(drop=True)\n",
-    "from statsmodels.formula.api import ols\n",
-    "import statsmodels.api as sm\n",
-    "severity_frame = pd.DataFrame({\"severity\":vech2['accident_severity'],\"Vehicle_Type\":vech2['Vehicle_Type']})\n",
-    "severity_frame\n",
-    "model = ols('severity~Vehicle_Type',                # Model formula\n",
-    "            data = severity_frame).fit()\n",
-    "                \n",
-    "anova_result = sm.stats.anova_lm(model, typ=2)\n",
-    "print (anova_result)\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 55,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "                    sum_sq       df          F        PR(>F)\n",
-      "Sex_of_Driver     3.528091      1.0  25.928964  3.574295e-07\n",
-      "Residual       2721.079459  19998.0        NaN           NaN\n"
-     ]
-    }
-   ],
-   "source": [
-    "import numpy as np\n",
-    "import pandas as pd\n",
-    "import matplotlib.pyplot as plt\n",
-    "import scipy.stats as stats\n",
-    "np.random.seed(12)\n",
-    "vech2= vech23[['accident_severity', 'Sex_of_Driver']]\n",
-    "vech2 = vech2.reset_index(drop=True)\n",
-    "vech2[\"Sex_of_Driver\"] = vech2[\"Sex_of_Driver\"].astype('category')\n",
-    "vech2[\"Sex_of_Driver\"] = vech2[\"Sex_of_Driver\"].cat.codes\n",
-    "vech2 = vech2.dropna()\n",
-    "vech2\n",
-    "vech2 = vech2[['accident_severity', 'Sex_of_Driver']]\n",
-    "vech2 = vech2.reset_index(drop=True)\n",
-    "#AccRT\n",
-    "#vech2['accident_severity']=vech2['accident_severity'].replace({1: 'Fatal', 2: 'Serious', 3: 'Slight'})\n",
-    "#AccRT\n",
-    "\n",
-    "\n",
-    "# Group age data by ra\n",
-    "\n",
-    "\n",
-    "vech2 = vech2[['accident_severity', 'Sex_of_Driver']][:20000]\n",
-    "vech2 = vech2.reset_index(drop=True)\n",
-    "from statsmodels.formula.api import ols\n",
-    "import statsmodels.api as sm\n",
-    "severity_frame = pd.DataFrame({\"severity\":vech2['accident_severity'],\"Sex_of_Driver\":vech2['Sex_of_Driver']})\n",
-    "severity_frame\n",
-    "model = ols('severity~Sex_of_Driver',                # Model formula\n",
-    "            data = severity_frame).fit()\n",
-    "                \n",
-    "anova_result = sm.stats.anova_lm(model, typ=2)\n",
-    "print (anova_result)\n",
-    "\n"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3.6.7 64-bit",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.7"
-  },
-  "orig_nbformat": 4,
-  "vscode": {
-   "interpreter": {
-    "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}