diff --git a/Disertationmainfile3.ipynb b/Disertationmainfile3.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..4b34508d004cba385c00e956deedcba505be4e50
--- /dev/null
+++ b/Disertationmainfile3.ipynb
@@ -0,0 +1,7546 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pyspark import SparkConf\n",
+    "from pyspark import SparkContext\n",
+    "from pyspark.sql import SparkSession\n",
+    "from pyspark.sql.functions import udf\n",
+    "from pyspark.sql.types import IntegerType\n",
+    "from pyspark.sql.types import LongType\n",
+    "from pyspark.sql.types import FloatType\n",
+    "from pyspark.rdd import RDD\n",
+    "from pyspark.sql.types import StringType\n",
+    "from pyspark.sql.functions import col\n",
+    "import pyspark.sql.functions as F\n",
+    "import csv\n",
+    "from datetime import datetime\n",
+    "from functools import reduce\n",
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "# local[*]: run Spark in local-mode(parallel computing) with as many working processors as logical cores on your machine\n",
+    "# If we want Spark to run locally with 'k' worker threads, we can specify as \"local[k]\".\n",
+    "master = \"local[*]\"\n",
+    "# The `appName` field is a name to be shown on the Spark cluster UI page\n",
+    "app_name = \"Big data Analysis of Road Crash Data\"\n",
+    "# Setup configuration parameters for Spark\n",
+    "spark_conf = SparkConf().setMaster(master).setAppName(app_name)\n",
+    "# creating a SparkContext object \n",
+    "spark = SparkSession.builder.config(conf=spark_conf).getOrCreate()\n",
+    "sc = spark.sparkContext\n",
+    "sc.setLogLevel('ERROR')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "Accident_Information_df = spark.read.format('csv')\\\n",
+    "            .option('header',True).option('escape','\"')\\\n",
+    "            .load('/Users/Asfandyar/Downloads/dft-road-casualty-statistics-accident-1979-2020.csv')\n",
+    "# changing the type of column(\"Year'\") to interger type\n",
+    "#Accident_Information_df = Accident_Information_df.withColumn('Year',F.col('Year').cast(IntegerType()))\n",
+    "#Accident_Information_df=Accident_Information_df.filter(Accident_Information_df.Year<2017)\n",
+    "#Accident_Information_df.sort(\"Year\").show(truncate=False)\n",
+    "A2018 = Accident_Information_df\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
n",
+      "|accident_index|accident_year|accident_reference|location_easting_osgr|location_northing_osgr|longitude| latitude|police_force|accident_severity|number_of_vehicles|number_of_casualties|      date|day_of_week| time|local_authority_district|local_authority_ons_district|local_authority_highway|first_road_class|first_road_number|road_type|speed_limit|junction_detail|junction_control|second_road_class|second_road_number|pedestrian_crossing_human_control|pedestrian_crossing_physical_facilities|light_conditions|weather_conditions|road_surface_conditions|special_conditions_at_site|carriageway_hazards|urban_or_rural_area|did_police_officer_attend_scene_of_accident|trunk_road_flag|lsoa_of_accident_location|\n",
n",
+      "| 200501BS00001|         2005|         01BS00001|               525680|                178240| -0.19117|51.489096|           1|                2|                 1|                   1|04/01/2005|          3|17:42|                      12|                   E09000020|              E09000020|               3|             3218|        6|         30|              0|              -1|               -1|                -1|                                0|                                      1|               1|                 2|                      2|                         0|                  0|                  1|                                          1|              2|                E01002849|\n",
+      "| 200501BS00002|         2005|         01BS00002|               524170|                181650|-0.211708|51.520075|           1|                3|                 1|                   1|05/01/2005|          4|17:36|                      12|                   E09000020|              E09000020|               4|              450|        3|         30|              6|               2|                5|                 0|                                0|                                      5|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002909|\n",
+      "| 200501BS00003|         2005|         01BS00003|               524520|                182240|-0.206458|51.525301|           1|                3|                 2|                   1|06/01/2005|          5|00:15|                      12|                   E09000020|              E09000020|               5|                0|        6|         30|              0|              -1|               -1|                -1|                                0|                                      0|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002857|\n",
+      "| 200501BS00004|         2005|         01BS00004|               526900|                177530|-0.173862|51.482442|           1|                3|                 1|                   1|07/01/2005|          6|10:35|                      12|                   E09000020|              E09000020|               3|             3220|        6|         30|              0|              -1|               -1|                -1|                                0|                                      0|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002840|\n",
+      "| 200501BS00005|         2005|         01BS00005|               528060|                179040|-0.156618|51.495752|           1|                3|                 1|                   1|10/01/2005|          2|21:13|                      12|                   E09000020|              E09000020|               6|                0|        6|         30|              0|              -1|               -1|                -1|                                0|                                      0|               7|                 1|                      2|                         0|                  0|                  1|                                          1|              2|                E01002863|\n",
+      "| 200501BS00006|         2005|         01BS00006|               524770|                181160|-0.203238| 51.51554|           1|                3|                 2|                   1|11/01/2005|          3|12:40|                      12|                   E09000020|              E09000020|               6|                0|        6|         30|              0|              -1|               -1|                -1|                                0|                                      0|               1|                 2|                      2|                         6|                  0|                  1|                                          1|              2|                E01002832|\n",
+      "| 200501BS00007|         2005|         01BS00007|               524220|                180830|-0.211277|51.512695|           1|                3|                 2|                   1|13/01/2005|          5|20:40|                      12|                   E09000020|              E09000020|               5|                0|        6|         30|              3|               4|                6|                 0|                                0|                                      0|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002875|\n",
+      "| 200501BS00009|         2005|         01BS00009|               525890|                179710|-0.187623| 51.50226|           1|                3|                 1|                   2|14/01/2005|          6|17:35|                      12|                   E09000020|              E09000020|               3|              315|        3|         30|              0|              -1|               -1|                -1|                                0|                                      0|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002889|\n",
+      "| 200501BS00010|         2005|         01BS00010|               527350|                177650|-0.167342| 51.48342|           1|                3|                 2|                   2|15/01/2005|          7|22:43|                      12|                   E09000020|              E09000020|               3|             3212|        6|         30|              6|               2|                4|               304|                                0|                                      5|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002900|\n",
+      "| 200501BS00011|         2005|         01BS00011|               524550|                180810|-0.206531|51.512443|           1|                3|                 2|                   5|15/01/2005|          7|16:00|                      12|                   E09000020|              E09000020|               4|              450|        6|         30|              3|               4|                5|                 0|                                0|                                      8|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002875|\n",
+      "| 200501BS00012|         2005|         01BS00012|               526240|                178900|-0.182872|51.494902|           1|                3|                 1|                   1|16/01/2005|          1|00:42|                      12|                   E09000020|              E09000020|               3|                4|        6|         30|              6|               2|                4|               325|                                0|                                      5|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002835|\n",
+      "| 200501BS00014|         2005|         01BS00014|               526170|                177690|-0.184312|51.484044|           1|                3|                 2|                   1|25/01/2005|          3|20:48|                      12|                   E09000020|              E09000020|               3|             3220|        6|         30|              6|               2|                3|               308|                                0|                                      5|               4|                 1|                      2|                         0|                  0|                  1|                                          1|              2|                E01002912|\n",
+      "| 200501BS00015|         2005|         01BS00015|               525590|                178520|-0.192366|51.491632|           1|                3|                 1|                   1|11/01/2005|          3|12:55|                      12|                   E09000020|              E09000020|               6|                0|        2|         30|              3|               4|                3|              3220|                                0|                                      1|               1|                 2|                      2|                         0|                  0|                  1|                                          1|              2|                E01002849|\n",
+      "| 200501BS00016|         2005|         01BS00016|               527990|                178690|-0.157753|51.492622|           1|                3|                 2|                   1|18/01/2005|          3|05:01|                      12|                   E09000020|              E09000020|               3|             3217|        2|         30|              3|               4|                3|              3216|                                0|                                      0|               4|                 2|                      2|                         0|                  0|                  1|                                          1|              2|                E01002902|\n",
+      "| 200501BS00017|         2005|         01BS00017|               526700|                178970|-0.176224|51.495429|           1|                3|                 1|                   2|18/01/2005|          3|11:15|                      12|                   E09000020|              E09000020|               3|                4|        3|         30|              0|              -1|               -1|                -1|                                0|                                      0|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002821|\n",
+      "| 200501BS00018|         2005|         01BS00018|               526460|                177460| -0.18022|51.481912|           1|                3|                 1|                   1|18/01/2005|          3|10:50|                      12|                   E09000020|              E09000020|               3|             3217|        6|         30|              3|               4|                6|                 0|                                0|                                      1|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002840|\n",
+      "| 200501BS00019|         2005|         01BS00019|               524680|                179450|-0.205139|51.500191|           1|                2|                 2|                   1|20/01/2005|          5|00:15|                      12|                   E09000020|              E09000020|               6|                0|        6|         30|              3|               4|                6|                 0|                                0|                                      0|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002864|\n",
+      "| 200501BS00020|         2005|         01BS00020|               527000|                179020|-0.171887|51.495811|           1|                3|                 2|                   1|21/01/2005|          6|09:15|                      12|                   E09000020|              E09000020|               3|             3218|        6|         30|              3|               4|                3|                 4|                                0|                                      0|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002821|\n",
+      "| 200501BS00021|         2005|         01BS00021|               527810|                178010| -0.16059|51.486552|           1|                3|                 2|                   1|21/01/2005|          6|21:16|                      12|                   E09000020|              E09000020|               4|              302|        6|         30|              0|              -1|               -1|                -1|                                0|                                      0|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002901|\n",
+      "| 200501BS00022|         2005|         01BS00022|               526790|                178980|-0.174925|51.495498|           1|                2|                 1|                   1|08/01/2005|          7|03:00|                      12|                   E09000020|              E09000020|               3|                4|        6|         30|              3|               4|                6|                 0|                                0|                                      0|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002821|\n",
n",
+      "only showing top 20 rows\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "\n",
+    "A2005=A2018.filter(A2018.accident_year>2004)\n",
+    "A20052020=A2005.filter(A2005.accident_year<2020)\n",
+    "A20052020.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "A2018=A20052020"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pyspark.sql.functions import col, when\n",
+    "valueWhenTrue1 =\"M\"\n",
+    "valueWhenTrue2 =\"A\"\n",
+    "valueWhenTrue3 = \"A\"\n",
+    "valueWhenTrue4 = \"B\"\n",
+    "valueWhenTrue5 = \"C\"\n",
+    "valueWhenTrue6 = \"U\"\n",
+    "\n",
+    "\n",
+    "A2018=A2018.withColumn(\n",
+    "    \"first_road_class\",\n",
+    "    when(\n",
+    "        col(\"first_road_class\") == 1,\n",
+    "        \"Motorway\"\n",
+    "    ).otherwise(col(\"first_road_class\")),\n",
+    ")\n",
+    "A2018=A2018.withColumn(\n",
+    "    \"first_road_class\",\n",
+    "    when(\n",
+    "        col(\"first_road_class\") == 2,\n",
+    "        \"A\"\n",
+    "    ).otherwise(col(\"first_road_class\")),\n",
+    ")\n",
+    "A2018=A2018.withColumn(\n",
+    "    \"first_road_class\",\n",
+    "    when(\n",
+    "        col(\"first_road_class\") == 3,\n",
+    "        \"A\"\n",
+    "    ).otherwise(col(\"first_road_class\")),\n",
+    ")\n",
+    "A2018=A2018.withColumn(\n",
+    "    \"first_road_class\",\n",
+    "    when(\n",
+    "        col(\"first_road_class\") == 4,\n",
+    "        \"B,C & U\"\n",
+    "    ).otherwise(col(\"first_road_class\")),\n",
+    ")\n",
+    "A2018=A2018.withColumn(\n",
+    "    \"first_road_class\",\n",
+    "    when(\n",
+    "        col(\"first_road_class\") == 5,\n",
+    "        \"B,C & U\"\n",
+    "    ).otherwise(col(\"first_road_class\")),\n",
+    ")\n",
+    "A2018=A2018.withColumn(\n",
+    "    \"first_road_class\",\n",
+    "    when(\n",
+    "        col(\"first_road_class\") == 6,\n",
+    "        \"B,C & U\"\n",
+    "    ).otherwise(col(\"first_road_class\")),\n",
+    ")\n",
+    "\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
n",
+      "|accident_index|accident_year|accident_reference|location_easting_osgr|location_northing_osgr|longitude| latitude|police_force|accident_severity|number_of_vehicles|number_of_casualties|      date|day_of_week| time|local_authority_district|local_authority_ons_district|local_authority_highway|first_road_class|first_road_number|road_type|speed_limit|junction_detail|junction_control|second_road_class|second_road_number|pedestrian_crossing_human_control|pedestrian_crossing_physical_facilities|light_conditions|weather_conditions|road_surface_conditions|special_conditions_at_site|carriageway_hazards|urban_or_rural_area|did_police_officer_attend_scene_of_accident|trunk_road_flag|lsoa_of_accident_location|\n",
n",
+      "| 200501BS00001|         2005|         01BS00001|               525680|                178240| -0.19117|51.489096|           1|                2|                 1|                   1|04/01/2005|          3|17:42|                      12|                   E09000020|              E09000020|               A|             3218|        6|         30|              0|              -1|               -1|                -1|                                0|                                      1|               1|                 2|                      2|                         0|                  0|                  1|                                          1|              2|                E01002849|\n",
+      "| 200501BS00002|         2005|         01BS00002|               524170|                181650|-0.211708|51.520075|           1|                3|                 1|                   1|05/01/2005|          4|17:36|                      12|                   E09000020|              E09000020|         B,C & U|              450|        3|         30|              6|               2|                5|                 0|                                0|                                      5|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002909|\n",
+      "| 200501BS00003|         2005|         01BS00003|               524520|                182240|-0.206458|51.525301|           1|                3|                 2|                   1|06/01/2005|          5|00:15|                      12|                   E09000020|              E09000020|         B,C & U|                0|        6|         30|              0|              -1|               -1|                -1|                                0|                                      0|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002857|\n",
+      "| 200501BS00004|         2005|         01BS00004|               526900|                177530|-0.173862|51.482442|           1|                3|                 1|                   1|07/01/2005|          6|10:35|                      12|                   E09000020|              E09000020|               A|             3220|        6|         30|              0|              -1|               -1|                -1|                                0|                                      0|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002840|\n",
+      "| 200501BS00005|         2005|         01BS00005|               528060|                179040|-0.156618|51.495752|           1|                3|                 1|                   1|10/01/2005|          2|21:13|                      12|                   E09000020|              E09000020|         B,C & U|                0|        6|         30|              0|              -1|               -1|                -1|                                0|                                      0|               7|                 1|                      2|                         0|                  0|                  1|                                          1|              2|                E01002863|\n",
+      "| 200501BS00006|         2005|         01BS00006|               524770|                181160|-0.203238| 51.51554|           1|                3|                 2|                   1|11/01/2005|          3|12:40|                      12|                   E09000020|              E09000020|         B,C & U|                0|        6|         30|              0|              -1|               -1|                -1|                                0|                                      0|               1|                 2|                      2|                         6|                  0|                  1|                                          1|              2|                E01002832|\n",
+      "| 200501BS00007|         2005|         01BS00007|               524220|                180830|-0.211277|51.512695|           1|                3|                 2|                   1|13/01/2005|          5|20:40|                      12|                   E09000020|              E09000020|         B,C & U|                0|        6|         30|              3|               4|                6|                 0|                                0|                                      0|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002875|\n",
+      "| 200501BS00009|         2005|         01BS00009|               525890|                179710|-0.187623| 51.50226|           1|                3|                 1|                   2|14/01/2005|          6|17:35|                      12|                   E09000020|              E09000020|               A|              315|        3|         30|              0|              -1|               -1|                -1|                                0|                                      0|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002889|\n",
+      "| 200501BS00010|         2005|         01BS00010|               527350|                177650|-0.167342| 51.48342|           1|                3|                 2|                   2|15/01/2005|          7|22:43|                      12|                   E09000020|              E09000020|               A|             3212|        6|         30|              6|               2|                4|               304|                                0|                                      5|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002900|\n",
+      "| 200501BS00011|         2005|         01BS00011|               524550|                180810|-0.206531|51.512443|           1|                3|                 2|                   5|15/01/2005|          7|16:00|                      12|                   E09000020|              E09000020|         B,C & U|              450|        6|         30|              3|               4|                5|                 0|                                0|                                      8|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002875|\n",
+      "| 200501BS00012|         2005|         01BS00012|               526240|                178900|-0.182872|51.494902|           1|                3|                 1|                   1|16/01/2005|          1|00:42|                      12|                   E09000020|              E09000020|               A|                4|        6|         30|              6|               2|                4|               325|                                0|                                      5|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002835|\n",
+      "| 200501BS00014|         2005|         01BS00014|               526170|                177690|-0.184312|51.484044|           1|                3|                 2|                   1|25/01/2005|          3|20:48|                      12|                   E09000020|              E09000020|               A|             3220|        6|         30|              6|               2|                3|               308|                                0|                                      5|               4|                 1|                      2|                         0|                  0|                  1|                                          1|              2|                E01002912|\n",
+      "| 200501BS00015|         2005|         01BS00015|               525590|                178520|-0.192366|51.491632|           1|                3|                 1|                   1|11/01/2005|          3|12:55|                      12|                   E09000020|              E09000020|         B,C & U|                0|        2|         30|              3|               4|                3|              3220|                                0|                                      1|               1|                 2|                      2|                         0|                  0|                  1|                                          1|              2|                E01002849|\n",
+      "| 200501BS00016|         2005|         01BS00016|               527990|                178690|-0.157753|51.492622|           1|                3|                 2|                   1|18/01/2005|          3|05:01|                      12|                   E09000020|              E09000020|               A|             3217|        2|         30|              3|               4|                3|              3216|                                0|                                      0|               4|                 2|                      2|                         0|                  0|                  1|                                          1|              2|                E01002902|\n",
+      "| 200501BS00017|         2005|         01BS00017|               526700|                178970|-0.176224|51.495429|           1|                3|                 1|                   2|18/01/2005|          3|11:15|                      12|                   E09000020|              E09000020|               A|                4|        3|         30|              0|              -1|               -1|                -1|                                0|                                      0|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002821|\n",
+      "| 200501BS00018|         2005|         01BS00018|               526460|                177460| -0.18022|51.481912|           1|                3|                 1|                   1|18/01/2005|          3|10:50|                      12|                   E09000020|              E09000020|               A|             3217|        6|         30|              3|               4|                6|                 0|                                0|                                      1|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002840|\n",
+      "| 200501BS00019|         2005|         01BS00019|               524680|                179450|-0.205139|51.500191|           1|                2|                 2|                   1|20/01/2005|          5|00:15|                      12|                   E09000020|              E09000020|         B,C & U|                0|        6|         30|              3|               4|                6|                 0|                                0|                                      0|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002864|\n",
+      "| 200501BS00020|         2005|         01BS00020|               527000|                179020|-0.171887|51.495811|           1|                3|                 2|                   1|21/01/2005|          6|09:15|                      12|                   E09000020|              E09000020|               A|             3218|        6|         30|              3|               4|                3|                 4|                                0|                                      0|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002821|\n",
+      "| 200501BS00021|         2005|         01BS00021|               527810|                178010| -0.16059|51.486552|           1|                3|                 2|                   1|21/01/2005|          6|21:16|                      12|                   E09000020|              E09000020|         B,C & U|              302|        6|         30|              0|              -1|               -1|                -1|                                0|                                      0|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002901|\n",
+      "| 200501BS00022|         2005|         01BS00022|               526790|                178980|-0.174925|51.495498|           1|                2|                 1|                   1|08/01/2005|          7|03:00|                      12|                   E09000020|              E09000020|               A|                4|        6|         30|              3|               4|                6|                 0|                                0|                                      0|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002821|\n",
n",
+      "only showing top 20 rows\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "A2018.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "+----------------+-------------+---------------+\n",
+      "|first_road_class|accident_year|Total accidents|\n",
+      "+----------------+-------------+---------------+\n",
+      "|        Motorway|         2005|           8198|\n",
+      "|         B,C & U|         2005|         101517|\n",
+      "|               A|         2005|          89020|\n",
+      "|               A|         2006|          84509|\n",
+      "|        Motorway|         2006|           7920|\n",
+      "|         B,C & U|         2006|          96732|\n",
+      "|        Motorway|         2007|           7488|\n",
+      "|         B,C & U|         2007|          92823|\n",
+      "|               A|         2007|          81804|\n",
+      "|         B,C & U|         2008|          86503|\n",
+      "|        Motorway|         2008|           6822|\n",
+      "|               A|         2008|          77266|\n",
+      "|        Motorway|         2009|           6172|\n",
+      "|               A|         2009|          74620|\n",
+      "|         B,C & U|         2009|          82762|\n",
+      "|               A|         2010|          70708|\n",
+      "|         B,C & U|         2010|          77640|\n",
+      "|        Motorway|         2010|           6066|\n",
+      "|        Motorway|         2011|           5379|\n",
+      "|         B,C & U|         2011|          75766|\n",
+      "+----------------+-------------+---------------+\n",
+      "only showing top 20 rows\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "A2018t_df = A2018.groupby(\"first_road_class\",'accident_year').agg(F.count(A2018.accident_index).alias('Total accidents'))\n",
+    "A2018t_df=A2018t_df.sort(\"accident_year\")\n",
+    "A2018t_df.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "+---------+----+---------------+\n",
+      "|road_name|year|Total accidents|\n",
+      "+---------+----+---------------+\n",
+      "|        A|2005|          89020|\n",
+      "| Motorway|2005|           8198|\n",
+      "|  B,C & U|2005|         101517|\n",
+      "|        A|2006|          84509|\n",
+      "|  B,C & U|2006|          96732|\n",
+      "| Motorway|2006|           7920|\n",
+      "|        A|2007|          81804|\n",
+      "| Motorway|2007|           7488|\n",
+      "|  B,C & U|2007|          92823|\n",
+      "| Motorway|2008|           6822|\n",
+      "|        A|2008|          77266|\n",
+      "|  B,C & U|2008|          86503|\n",
+      "|  B,C & U|2009|          82762|\n",
+      "|        A|2009|          74620|\n",
+      "| Motorway|2009|           6172|\n",
+      "|  B,C & U|2010|          77640|\n",
+      "|        A|2010|          70708|\n",
+      "| Motorway|2010|           6066|\n",
+      "|  B,C & U|2011|          75766|\n",
+      "|        A|2011|          70329|\n",
+      "+---------+----+---------------+\n",
+      "only showing top 20 rows\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "A2018t_dftt = A2018t_df.withColumnRenamed(\"first_road_class\", \"road_name\")\\\n",
+    "       .withColumnRenamed(\"accident_year\", \"year\")\n",
+    "A2018t_dftt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>road_name</th>\n",
+       "      <th>year</th>\n",
+       "      <th>Total accidents</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2005</td>\n",
+       "      <td>89020</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2005</td>\n",
+       "      <td>8198</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2005</td>\n",
+       "      <td>101517</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>84509</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>96732</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>7920</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2007</td>\n",
+       "      <td>7488</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2007</td>\n",
+       "      <td>92823</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2007</td>\n",
+       "      <td>81804</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>77266</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>6822</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>86503</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2009</td>\n",
+       "      <td>82762</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2009</td>\n",
+       "      <td>74620</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2009</td>\n",
+       "      <td>6172</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>70708</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>6066</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>77640</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2011</td>\n",
+       "      <td>70329</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2011</td>\n",
+       "      <td>75766</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2011</td>\n",
+       "      <td>5379</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>5212</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>72790</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>67569</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>4983</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>64837</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>68840</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>27</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2014</td>\n",
+       "      <td>72864</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2014</td>\n",
+       "      <td>68212</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2014</td>\n",
+       "      <td>5246</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>30</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2015</td>\n",
+       "      <td>64682</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>31</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2015</td>\n",
+       "      <td>70226</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>32</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2015</td>\n",
+       "      <td>5148</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>33</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2016</td>\n",
+       "      <td>69761</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>34</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2016</td>\n",
+       "      <td>5007</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>35</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2016</td>\n",
+       "      <td>61853</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>36</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2017</td>\n",
+       "      <td>4430</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>37</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2017</td>\n",
+       "      <td>56809</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>38</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2017</td>\n",
+       "      <td>68743</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>39</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2018</td>\n",
+       "      <td>4225</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>40</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2018</td>\n",
+       "      <td>53840</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>41</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2018</td>\n",
+       "      <td>64570</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>42</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2019</td>\n",
+       "      <td>52662</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>43</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2019</td>\n",
+       "      <td>61064</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>44</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2019</td>\n",
+       "      <td>3810</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   road_name  year  Total accidents\n",
+       "0          A  2005            89020\n",
+       "1   Motorway  2005             8198\n",
+       "2    B,C & U  2005           101517\n",
+       "3          A  2006            84509\n",
+       "4    B,C & U  2006            96732\n",
+       "5   Motorway  2006             7920\n",
+       "6   Motorway  2007             7488\n",
+       "7    B,C & U  2007            92823\n",
+       "8          A  2007            81804\n",
+       "9          A  2008            77266\n",
+       "10  Motorway  2008             6822\n",
+       "11   B,C & U  2008            86503\n",
+       "12   B,C & U  2009            82762\n",
+       "13         A  2009            74620\n",
+       "14  Motorway  2009             6172\n",
+       "15         A  2010            70708\n",
+       "16  Motorway  2010             6066\n",
+       "17   B,C & U  2010            77640\n",
+       "18         A  2011            70329\n",
+       "19   B,C & U  2011            75766\n",
+       "20  Motorway  2011             5379\n",
+       "21  Motorway  2012             5212\n",
+       "22   B,C & U  2012            72790\n",
+       "23         A  2012            67569\n",
+       "24  Motorway  2013             4983\n",
+       "25         A  2013            64837\n",
+       "26   B,C & U  2013            68840\n",
+       "27   B,C & U  2014            72864\n",
+       "28         A  2014            68212\n",
+       "29  Motorway  2014             5246\n",
+       "30         A  2015            64682\n",
+       "31   B,C & U  2015            70226\n",
+       "32  Motorway  2015             5148\n",
+       "33   B,C & U  2016            69761\n",
+       "34  Motorway  2016             5007\n",
+       "35         A  2016            61853\n",
+       "36  Motorway  2017             4430\n",
+       "37         A  2017            56809\n",
+       "38   B,C & U  2017            68743\n",
+       "39  Motorway  2018             4225\n",
+       "40         A  2018            53840\n",
+       "41   B,C & U  2018            64570\n",
+       "42         A  2019            52662\n",
+       "43   B,C & U  2019            61064\n",
+       "44  Motorway  2019             3810"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "\n",
+    "A2018t_dftt_df=A2018t_dftt.toPandas()\n",
+    "A2018t_dftt_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 68,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "362550"
+      ]
+     },
+     "execution_count": 68,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "Trafficvolume.count()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 72,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "3310056"
+      ]
+     },
+     "execution_count": 72,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "Trafficvolume.count()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "+---------+----+--------------+\n",
+      "|road_name|year|Trafficvolume |\n",
+      "+---------+----+--------------+\n",
+      "|B,C & U  |2005|106900000000.0|\n",
+      "|A        |2005|138600000000.0|\n",
+      "|Motorway |2005|60300000000.0 |\n",
+      "|Motorway |2006|61800000000.0 |\n",
+      "|B,C & U  |2006|108100000000.0|\n",
+      "|A        |2006|140500000000.0|\n",
+      "|A        |2007|139700000000.0|\n",
+      "|B,C & U  |2007|111100000000.0|\n",
+      "|Motorway |2007|62500000000.0 |\n",
+      "|B,C & U  |2008|109800000000.0|\n",
+      "|A        |2008|138500000000.0|\n",
+      "|Motorway |2008|62200000000.0 |\n",
+      "|A        |2009|138200000000.0|\n",
+      "|B,C & U  |2009|107300000000.0|\n",
+      "|Motorway |2009|61800000000.0 |\n",
+      "|A        |2010|136400000000.0|\n",
+      "|B,C & U  |2010|105800000000.0|\n",
+      "|Motorway |2010|61000000000.0 |\n",
+      "|Motorway |2011|61800000000.0 |\n",
+      "|A        |2011|137000000000.0|\n",
+      "+---------+----+--------------+\n",
+      "only showing top 20 rows\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "Trafficvolume = spark.read.format('csv')\\\n",
+    "            .option('header',True).option('escape','\"')\\\n",
+    "            .load('/Users/Asfandyar/Desktop/disertation/annualtrafficcombined.csv')\n",
+    "# changing the type of column(\"Year'\") to interger type\n",
+    "Trafficvolume = Trafficvolume.withColumn('year',F.col('year').cast(IntegerType()))\n",
+    "Trafficvolume=Trafficvolume.filter(Trafficvolume.year>2004)\n",
+    "Trafficvolume=Trafficvolume.filter(Trafficvolume.year<2020)\n",
+    "Trafficvolume.sort(\"year\").show(truncate=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "+---------+----+--------------+\n",
+      "|road_name|year| Trafficvolume|\n",
+      "+---------+----+--------------+\n",
+      "|        A|2005|138600000000.0|\n",
+      "|        A|2006|140500000000.0|\n",
+      "+---------+----+--------------+\n",
+      "only showing top 2 rows\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "Trafficvolume.filter(col(\"road_name\").contains(\"A\")).show(2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "+---------+----+--------------+\n",
+      "|road_name|year| Trafficvolume|\n",
+      "+---------+----+--------------+\n",
+      "|  B,C & U|2005|106900000000.0|\n",
+      "|        A|2005|138600000000.0|\n",
+      "| Motorway|2005| 60300000000.0|\n",
+      "| Motorway|2006| 61800000000.0|\n",
+      "|  B,C & U|2006|108100000000.0|\n",
+      "|        A|2006|140500000000.0|\n",
+      "|        A|2007|139700000000.0|\n",
+      "|  B,C & U|2007|111100000000.0|\n",
+      "| Motorway|2007| 62500000000.0|\n",
+      "|  B,C & U|2008|109800000000.0|\n",
+      "|        A|2008|138500000000.0|\n",
+      "| Motorway|2008| 62200000000.0|\n",
+      "|        A|2009|138200000000.0|\n",
+      "|  B,C & U|2009|107300000000.0|\n",
+      "| Motorway|2009| 61800000000.0|\n",
+      "|        A|2010|136400000000.0|\n",
+      "|  B,C & U|2010|105800000000.0|\n",
+      "| Motorway|2010| 61000000000.0|\n",
+      "| Motorway|2011| 61800000000.0|\n",
+      "|        A|2011|137000000000.0|\n",
+      "+---------+----+--------------+\n",
+      "only showing top 20 rows\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "TrafficvolumeGrouped=Trafficvolume.select(col(\"road_name\"),col(\"year\"),col(\"Trafficvolume\")).sort(\"year\")\n",
+    "\n",
+    "TrafficvolumeGrouped.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "AnalysisException",
+     "evalue": "cannot resolve '`all_motor_vehicles`' given input columns: [Trafficvolume, road_name, year];\n'Project [regexp_extract(road_name#873, ^[A-Za-z]+(?=), 0) AS road_name#934, regexp_replace(road_name#873, ^[A-Za-z]+_, , 1) AS road_number#935, year#879, 'all_motor_vehicles]\n+- Sort [year#879 ASC NULLS FIRST], true\n   +- Project [road_name#873, year#879, Trafficvolume#875]\n      +- Filter (year#879 < 2020)\n         +- Filter (year#879 > 2004)\n            +- Project [road_name#873, cast(year#874 as int) AS year#879, Trafficvolume#875]\n               +- Relation[road_name#873,year#874,Trafficvolume#875] csv\n",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mAnalysisException\u001b[0m                         Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-46-a3d6758cdfea>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      5\u001b[0m     \u001b[0mf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mregexp_replace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"road_name\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"^[A-Za-z]+_\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0malias\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"road_number\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      6\u001b[0m     \u001b[0;34m\"year\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m     \u001b[0;34m\"all_motor_vehicles\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      8\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      9\u001b[0m )\n",
+      "\u001b[0;32m/usr/local/Cellar/apache-spark/3.1.2/libexec/python/pyspark/sql/dataframe.py\u001b[0m in \u001b[0;36mselect\u001b[0;34m(self, *cols)\u001b[0m\n\u001b[1;32m   1667\u001b[0m         \u001b[0;34m[\u001b[0m\u001b[0mRow\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'Alice'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mage\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m12\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mRow\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'Bob'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mage\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m15\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1668\u001b[0m         \"\"\"\n\u001b[0;32m-> 1669\u001b[0;31m         \u001b[0mjdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_jdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mselect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_jcols\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mcols\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1670\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mDataFrame\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjdf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msql_ctx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1671\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/py4j/java_gateway.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m   1303\u001b[0m         \u001b[0manswer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgateway_client\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend_command\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcommand\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1304\u001b[0m         return_value = get_return_value(\n\u001b[0;32m-> 1305\u001b[0;31m             answer, self.gateway_client, self.target_id, self.name)\n\u001b[0m\u001b[1;32m   1306\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1307\u001b[0m         \u001b[0;32mfor\u001b[0m \u001b[0mtemp_arg\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtemp_args\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/usr/local/Cellar/apache-spark/3.1.2/libexec/python/pyspark/sql/utils.py\u001b[0m in \u001b[0;36mdeco\u001b[0;34m(*a, **kw)\u001b[0m\n\u001b[1;32m    115\u001b[0m                 \u001b[0;31m# Hide where the exception came from that shows a non-Pythonic\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    116\u001b[0m                 \u001b[0;31m# JVM exception message.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 117\u001b[0;31m                 \u001b[0;32mraise\u001b[0m \u001b[0mconverted\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    118\u001b[0m             \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    119\u001b[0m                 \u001b[0;32mraise\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mAnalysisException\u001b[0m: cannot resolve '`all_motor_vehicles`' given input columns: [Trafficvolume, road_name, year];\n'Project [regexp_extract(road_name#873, ^[A-Za-z]+(?=), 0) AS road_name#934, regexp_replace(road_name#873, ^[A-Za-z]+_, , 1) AS road_number#935, year#879, 'all_motor_vehicles]\n+- Sort [year#879 ASC NULLS FIRST], true\n   +- Project [road_name#873, year#879, Trafficvolume#875]\n      +- Filter (year#879 < 2020)\n         +- Filter (year#879 > 2004)\n            +- Project [road_name#873, cast(year#874 as int) AS year#879, Trafficvolume#875]\n               +- Relation[road_name#873,year#874,Trafficvolume#875] csv\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pyspark.sql.functions as f\n",
+    "\n",
+    "TrafficvolumeGroupedupdated=TrafficvolumeGrouped.select(\n",
+    "    f.regexp_extract(\"road_name\", pattern=\"^[A-Za-z]+(?=)\", idx=0).alias('road_name'),\n",
+    "    f.regexp_replace(\"road_name\", \"^[A-Za-z]+_\", \"\").alias(\"road_number\"),\n",
+    "    \"year\",\n",
+    "    \"all_motor_vehicles\"\n",
+    "\n",
+    ")\n",
+    "TrafficvolumeGroupedupdated.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'TrafficvolumeGroupedupdated' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-47-e36d62617a89>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mTrafficvolumeGroupedupdated\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mTrafficvolumeGroupedupdated\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mselect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcol\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"road_name\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mcol\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"year\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mcol\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"all_motor_vehicles\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msort\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"year\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0mTrafficvolumeGroupedupdated\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshow\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mNameError\u001b[0m: name 'TrafficvolumeGroupedupdated' is not defined"
+     ]
+    }
+   ],
+   "source": [
+    "TrafficvolumeGroupedupdated=TrafficvolumeGroupedupdated.select(col(\"road_name\"),col(\"year\"),col(\"all_motor_vehicles\")).sort(\"year\")\n",
+    "\n",
+    "TrafficvolumeGroupedupdated.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 77,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "+---------+----+------------------+\n",
+      "|road_name|year|all_motor_vehicles|\n",
+      "+---------+----+------------------+\n",
+      "|        A|2005|       7.8689351E7|\n",
+      "|        M|2005|       2.4352716E7|\n",
+      "|        U|2005|         7369477.0|\n",
+      "|        C|2005|         3816208.0|\n",
+      "|        B|2005|         4709562.0|\n",
+      "|        M|2006|       3.0686368E7|\n",
+      "|        A|2006|       7.7760371E7|\n",
+      "|        U|2006|         8209734.0|\n",
+      "|        C|2006|         4186058.0|\n",
+      "|        B|2006|         5203139.0|\n",
+      "|        A|2007|       8.0678016E7|\n",
+      "|        M|2007|       2.7693584E7|\n",
+      "|        U|2007|         7824099.0|\n",
+      "|        B|2007|         5008270.0|\n",
+      "|        C|2007|         3995513.0|\n",
+      "|        M|2008|       2.8008346E7|\n",
+      "|        A|2008|       7.6143383E7|\n",
+      "|        U|2008|       1.0834031E7|\n",
+      "|        B|2008|       1.2802995E7|\n",
+      "|        C|2008|         7579313.0|\n",
+      "+---------+----+------------------+\n",
+      "only showing top 20 rows\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "TrafficvolumeGroupedupdated_U = TrafficvolumeGroupedupdated.groupby('road_name','year').agg(F.sum(TrafficvolumeGroupedupdated['all_motor_vehicles']).alias('all_motor_vehicles'))\n",
+    "TrafficvolumeGroupedupdated_U.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>road_name</th>\n",
+       "      <th>year</th>\n",
+       "      <th>Trafficvolume</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2005</td>\n",
+       "      <td>60300000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2005</td>\n",
+       "      <td>138600000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2005</td>\n",
+       "      <td>106900000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>61800000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>140500000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>108100000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2007</td>\n",
+       "      <td>62500000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2007</td>\n",
+       "      <td>139700000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2007</td>\n",
+       "      <td>111100000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>62200000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>138500000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>109800000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2009</td>\n",
+       "      <td>61800000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2009</td>\n",
+       "      <td>138200000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2009</td>\n",
+       "      <td>107300000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>61000000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>136400000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>105800000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2011</td>\n",
+       "      <td>61800000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2011</td>\n",
+       "      <td>137000000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2011</td>\n",
+       "      <td>105500000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>62400000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>135800000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>106400000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>63300000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>135800000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>106600000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>27</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2014</td>\n",
+       "      <td>64800000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2014</td>\n",
+       "      <td>138500000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2014</td>\n",
+       "      <td>111200000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>30</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2015</td>\n",
+       "      <td>66500000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>31</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2015</td>\n",
+       "      <td>141000000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>32</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2015</td>\n",
+       "      <td>112900000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>33</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2016</td>\n",
+       "      <td>67700000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>34</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2016</td>\n",
+       "      <td>144900000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>35</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2016</td>\n",
+       "      <td>115400000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>36</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2017</td>\n",
+       "      <td>68700000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>37</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2017</td>\n",
+       "      <td>146500000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>38</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2017</td>\n",
+       "      <td>117400000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>39</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2018</td>\n",
+       "      <td>69000000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>40</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2018</td>\n",
+       "      <td>148600000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>41</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2018</td>\n",
+       "      <td>116600000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>42</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2019</td>\n",
+       "      <td>70500000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>43</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2019</td>\n",
+       "      <td>150200000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>44</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2019</td>\n",
+       "      <td>117900000000.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   road_name  year   Trafficvolume\n",
+       "0   Motorway  2005   60300000000.0\n",
+       "1          A  2005  138600000000.0\n",
+       "2    B,C & U  2005  106900000000.0\n",
+       "3   Motorway  2006   61800000000.0\n",
+       "4          A  2006  140500000000.0\n",
+       "5    B,C & U  2006  108100000000.0\n",
+       "6   Motorway  2007   62500000000.0\n",
+       "7          A  2007  139700000000.0\n",
+       "8    B,C & U  2007  111100000000.0\n",
+       "9   Motorway  2008   62200000000.0\n",
+       "10         A  2008  138500000000.0\n",
+       "11   B,C & U  2008  109800000000.0\n",
+       "12  Motorway  2009   61800000000.0\n",
+       "13         A  2009  138200000000.0\n",
+       "14   B,C & U  2009  107300000000.0\n",
+       "15  Motorway  2010   61000000000.0\n",
+       "16         A  2010  136400000000.0\n",
+       "17   B,C & U  2010  105800000000.0\n",
+       "18  Motorway  2011   61800000000.0\n",
+       "19         A  2011  137000000000.0\n",
+       "20   B,C & U  2011  105500000000.0\n",
+       "21  Motorway  2012   62400000000.0\n",
+       "22         A  2012  135800000000.0\n",
+       "23   B,C & U  2012  106400000000.0\n",
+       "24  Motorway  2013   63300000000.0\n",
+       "25         A  2013  135800000000.0\n",
+       "26   B,C & U  2013  106600000000.0\n",
+       "27  Motorway  2014   64800000000.0\n",
+       "28         A  2014  138500000000.0\n",
+       "29   B,C & U  2014  111200000000.0\n",
+       "30  Motorway  2015   66500000000.0\n",
+       "31         A  2015  141000000000.0\n",
+       "32   B,C & U  2015  112900000000.0\n",
+       "33  Motorway  2016   67700000000.0\n",
+       "34         A  2016  144900000000.0\n",
+       "35   B,C & U  2016  115400000000.0\n",
+       "36  Motorway  2017   68700000000.0\n",
+       "37         A  2017  146500000000.0\n",
+       "38   B,C & U  2017  117400000000.0\n",
+       "39  Motorway  2018   69000000000.0\n",
+       "40         A  2018  148600000000.0\n",
+       "41   B,C & U  2018  116600000000.0\n",
+       "42  Motorway  2019   70500000000.0\n",
+       "43         A  2019  150200000000.0\n",
+       "44   B,C & U  2019  117900000000.0"
+      ]
+     },
+     "execution_count": 49,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "TrafficvolumeGroupedupdated_df=TrafficvolumeGrouped.toPandas()\n",
+    "TrafficvolumeGroupedupdated_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 79,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>year</th>\n",
+       "      <th>M</th>\n",
+       "      <th>A</th>\n",
+       "      <th>B</th>\n",
+       "      <th>C</th>\n",
+       "      <th>U</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>2005</td>\n",
+       "      <td>2,186</td>\n",
+       "      <td>29,035</td>\n",
+       "      <td>18,758</td>\n",
+       "      <td>52,480</td>\n",
+       "      <td>138,679</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2006</td>\n",
+       "      <td>2,209</td>\n",
+       "      <td>29,040</td>\n",
+       "      <td>18,652</td>\n",
+       "      <td>52,487</td>\n",
+       "      <td>142,670</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2007</td>\n",
+       "      <td>2,212</td>\n",
+       "      <td>29,045</td>\n",
+       "      <td>18,806</td>\n",
+       "      <td>52,458</td>\n",
+       "      <td>142,847</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>2008</td>\n",
+       "      <td>2,211</td>\n",
+       "      <td>29,012</td>\n",
+       "      <td>18,741</td>\n",
+       "      <td>52,552</td>\n",
+       "      <td>142,594</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>2009</td>\n",
+       "      <td>2,212</td>\n",
+       "      <td>29,061</td>\n",
+       "      <td>18,729</td>\n",
+       "      <td>52,700</td>\n",
+       "      <td>142,384</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>2010</td>\n",
+       "      <td>2,211</td>\n",
+       "      <td>29,022</td>\n",
+       "      <td>18,760</td>\n",
+       "      <td>52,709</td>\n",
+       "      <td>142,275</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>2011</td>\n",
+       "      <td>2,218</td>\n",
+       "      <td>29,039</td>\n",
+       "      <td>18,770</td>\n",
+       "      <td>52,712</td>\n",
+       "      <td>142,265</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>2012</td>\n",
+       "      <td>2,248</td>\n",
+       "      <td>29,044</td>\n",
+       "      <td>18,774</td>\n",
+       "      <td>52,756</td>\n",
+       "      <td>142,551</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>2013</td>\n",
+       "      <td>2,262</td>\n",
+       "      <td>29,049</td>\n",
+       "      <td>18,776</td>\n",
+       "      <td>52,819</td>\n",
+       "      <td>143,273</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>2014</td>\n",
+       "      <td>2,265</td>\n",
+       "      <td>29,071</td>\n",
+       "      <td>18,770</td>\n",
+       "      <td>52,866</td>\n",
+       "      <td>143,549</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>2015</td>\n",
+       "      <td>2,270</td>\n",
+       "      <td>29,065</td>\n",
+       "      <td>18,819</td>\n",
+       "      <td>52,420</td>\n",
+       "      <td>143,305</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>2016</td>\n",
+       "      <td>2,268</td>\n",
+       "      <td>29,090</td>\n",
+       "      <td>18,825</td>\n",
+       "      <td>52,475</td>\n",
+       "      <td>143,911</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>2017</td>\n",
+       "      <td>2,292</td>\n",
+       "      <td>29,140</td>\n",
+       "      <td>18,842</td>\n",
+       "      <td>52,379</td>\n",
+       "      <td>143,305</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>2018</td>\n",
+       "      <td>2,313</td>\n",
+       "      <td>29,440</td>\n",
+       "      <td>18,836</td>\n",
+       "      <td>53,392</td>\n",
+       "      <td>142,715</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>2019</td>\n",
+       "      <td>2,320</td>\n",
+       "      <td>29,489</td>\n",
+       "      <td>18,842</td>\n",
+       "      <td>53,371</td>\n",
+       "      <td>143,965</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    year      M       A       B       C        U\n",
+       "0   2005  2,186  29,035  18,758  52,480  138,679\n",
+       "1   2006  2,209  29,040  18,652  52,487  142,670\n",
+       "2   2007  2,212  29,045  18,806  52,458  142,847\n",
+       "3   2008  2,211  29,012  18,741  52,552  142,594\n",
+       "4   2009  2,212  29,061  18,729  52,700  142,384\n",
+       "5   2010  2,211  29,022  18,760  52,709  142,275\n",
+       "6   2011  2,218  29,039  18,770  52,712  142,265\n",
+       "7   2012  2,248  29,044  18,774  52,756  142,551\n",
+       "8   2013  2,262  29,049  18,776  52,819  143,273\n",
+       "9   2014  2,265  29,071  18,770  52,866  143,549\n",
+       "10  2015  2,270  29,065  18,819  52,420  143,305\n",
+       "11  2016  2,268  29,090  18,825  52,475  143,911\n",
+       "12  2017  2,292  29,140  18,842  52,379  143,305\n",
+       "13  2018  2,313  29,440  18,836  53,392  142,715\n",
+       "14  2019  2,320  29,489  18,842  53,371  143,965"
+      ]
+     },
+     "execution_count": 79,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "df = pd.read_csv ('/Users/Asfandyar/Desktop/disertation/diseration_final/Road lengths (miles).csv')\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 80,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>year</th>\n",
+       "      <th>road_name</th>\n",
+       "      <th>link_length_km</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>2005</td>\n",
+       "      <td>M</td>\n",
+       "      <td>2,186</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2006</td>\n",
+       "      <td>M</td>\n",
+       "      <td>2,209</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2007</td>\n",
+       "      <td>M</td>\n",
+       "      <td>2,212</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>2008</td>\n",
+       "      <td>M</td>\n",
+       "      <td>2,211</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>2009</td>\n",
+       "      <td>M</td>\n",
+       "      <td>2,212</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>70</th>\n",
+       "      <td>2015</td>\n",
+       "      <td>U</td>\n",
+       "      <td>143,305</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>71</th>\n",
+       "      <td>2016</td>\n",
+       "      <td>U</td>\n",
+       "      <td>143,911</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>72</th>\n",
+       "      <td>2017</td>\n",
+       "      <td>U</td>\n",
+       "      <td>143,305</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>73</th>\n",
+       "      <td>2018</td>\n",
+       "      <td>U</td>\n",
+       "      <td>142,715</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>74</th>\n",
+       "      <td>2019</td>\n",
+       "      <td>U</td>\n",
+       "      <td>143,965</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>75 rows × 3 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    year road_name link_length_km\n",
+       "0   2005         M          2,186\n",
+       "1   2006         M          2,209\n",
+       "2   2007         M          2,212\n",
+       "3   2008         M          2,211\n",
+       "4   2009         M          2,212\n",
+       "..   ...       ...            ...\n",
+       "70  2015         U        143,305\n",
+       "71  2016         U        143,911\n",
+       "72  2017         U        143,305\n",
+       "73  2018         U        142,715\n",
+       "74  2019         U        143,965\n",
+       "\n",
+       "[75 rows x 3 columns]"
+      ]
+     },
+     "execution_count": 80,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "roadlenghth=df.melt(id_vars=[\"year\"], \n",
+    "        var_name=\"road_name\", \n",
+    "        value_name=\"link_length_km\")\n",
+    "roadlenghth"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 81,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>road_name</th>\n",
+       "      <th>year</th>\n",
+       "      <th>all_motor_vehicles</th>\n",
+       "      <th>link_length_km</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2005</td>\n",
+       "      <td>78689351.0</td>\n",
+       "      <td>29,035</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>M</td>\n",
+       "      <td>2005</td>\n",
+       "      <td>24352716.0</td>\n",
+       "      <td>2,186</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>U</td>\n",
+       "      <td>2005</td>\n",
+       "      <td>7369477.0</td>\n",
+       "      <td>138,679</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>C</td>\n",
+       "      <td>2005</td>\n",
+       "      <td>3816208.0</td>\n",
+       "      <td>52,480</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>B</td>\n",
+       "      <td>2005</td>\n",
+       "      <td>4709562.0</td>\n",
+       "      <td>18,758</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>70</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2019</td>\n",
+       "      <td>64275975.0</td>\n",
+       "      <td>29,489</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>71</th>\n",
+       "      <td>M</td>\n",
+       "      <td>2019</td>\n",
+       "      <td>25629481.0</td>\n",
+       "      <td>2,320</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>72</th>\n",
+       "      <td>C</td>\n",
+       "      <td>2019</td>\n",
+       "      <td>6641590.0</td>\n",
+       "      <td>53,371</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>73</th>\n",
+       "      <td>U</td>\n",
+       "      <td>2019</td>\n",
+       "      <td>7504917.0</td>\n",
+       "      <td>143,965</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>74</th>\n",
+       "      <td>B</td>\n",
+       "      <td>2019</td>\n",
+       "      <td>10353469.0</td>\n",
+       "      <td>18,842</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>75 rows × 4 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   road_name  year  all_motor_vehicles link_length_km\n",
+       "0          A  2005          78689351.0         29,035\n",
+       "1          M  2005          24352716.0          2,186\n",
+       "2          U  2005           7369477.0        138,679\n",
+       "3          C  2005           3816208.0         52,480\n",
+       "4          B  2005           4709562.0         18,758\n",
+       "..       ...   ...                 ...            ...\n",
+       "70         A  2019          64275975.0         29,489\n",
+       "71         M  2019          25629481.0          2,320\n",
+       "72         C  2019           6641590.0         53,371\n",
+       "73         U  2019           7504917.0        143,965\n",
+       "74         B  2019          10353469.0         18,842\n",
+       "\n",
+       "[75 rows x 4 columns]"
+      ]
+     },
+     "execution_count": 81,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "result22=pd.merge(TrafficvolumeGroupedupdated_df, roadlenghth, on=['year','road_name'])\n",
+    "result22"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 82,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "numpy.float64"
+      ]
+     },
+     "execution_count": 82,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "result22[\"link_length_km\"]=result22[\"link_length_km\"].str.replace(',','')\n",
+    "result22[\"link_length_km\"] = result22[\"link_length_km\"].astype(float)\n",
+    "type(result22[\"all_motor_vehicles\"][0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 83,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>road_name</th>\n",
+       "      <th>year</th>\n",
+       "      <th>all_motor_vehicles</th>\n",
+       "      <th>link_length_km</th>\n",
+       "      <th>Trafficvolume</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2005</td>\n",
+       "      <td>78689351.0</td>\n",
+       "      <td>29035.0</td>\n",
+       "      <td>2.284745e+12</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>M</td>\n",
+       "      <td>2005</td>\n",
+       "      <td>24352716.0</td>\n",
+       "      <td>2186.0</td>\n",
+       "      <td>5.323504e+10</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>U</td>\n",
+       "      <td>2005</td>\n",
+       "      <td>7369477.0</td>\n",
+       "      <td>138679.0</td>\n",
+       "      <td>1.021992e+12</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>C</td>\n",
+       "      <td>2005</td>\n",
+       "      <td>3816208.0</td>\n",
+       "      <td>52480.0</td>\n",
+       "      <td>2.002746e+11</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>B</td>\n",
+       "      <td>2005</td>\n",
+       "      <td>4709562.0</td>\n",
+       "      <td>18758.0</td>\n",
+       "      <td>8.834196e+10</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>70</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2019</td>\n",
+       "      <td>64275975.0</td>\n",
+       "      <td>29489.0</td>\n",
+       "      <td>1.895434e+12</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>71</th>\n",
+       "      <td>M</td>\n",
+       "      <td>2019</td>\n",
+       "      <td>25629481.0</td>\n",
+       "      <td>2320.0</td>\n",
+       "      <td>5.946040e+10</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>72</th>\n",
+       "      <td>C</td>\n",
+       "      <td>2019</td>\n",
+       "      <td>6641590.0</td>\n",
+       "      <td>53371.0</td>\n",
+       "      <td>3.544683e+11</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>73</th>\n",
+       "      <td>U</td>\n",
+       "      <td>2019</td>\n",
+       "      <td>7504917.0</td>\n",
+       "      <td>143965.0</td>\n",
+       "      <td>1.080445e+12</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>74</th>\n",
+       "      <td>B</td>\n",
+       "      <td>2019</td>\n",
+       "      <td>10353469.0</td>\n",
+       "      <td>18842.0</td>\n",
+       "      <td>1.950801e+11</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>75 rows × 5 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   road_name  year  all_motor_vehicles  link_length_km  Trafficvolume\n",
+       "0          A  2005          78689351.0         29035.0   2.284745e+12\n",
+       "1          M  2005          24352716.0          2186.0   5.323504e+10\n",
+       "2          U  2005           7369477.0        138679.0   1.021992e+12\n",
+       "3          C  2005           3816208.0         52480.0   2.002746e+11\n",
+       "4          B  2005           4709562.0         18758.0   8.834196e+10\n",
+       "..       ...   ...                 ...             ...            ...\n",
+       "70         A  2019          64275975.0         29489.0   1.895434e+12\n",
+       "71         M  2019          25629481.0          2320.0   5.946040e+10\n",
+       "72         C  2019           6641590.0         53371.0   3.544683e+11\n",
+       "73         U  2019           7504917.0        143965.0   1.080445e+12\n",
+       "74         B  2019          10353469.0         18842.0   1.950801e+11\n",
+       "\n",
+       "[75 rows x 5 columns]"
+      ]
+     },
+     "execution_count": 83,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "\n",
+    "result22[\"Trafficvolume\"] = result22[\"all_motor_vehicles\"] * result22[\"link_length_km\"]\n",
+    "result22"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>road_name</th>\n",
+       "      <th>year</th>\n",
+       "      <th>Trafficvolume</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2005</td>\n",
+       "      <td>60300000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2005</td>\n",
+       "      <td>138600000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2005</td>\n",
+       "      <td>106900000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>61800000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>140500000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>108100000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2007</td>\n",
+       "      <td>62500000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2007</td>\n",
+       "      <td>139700000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2007</td>\n",
+       "      <td>111100000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>62200000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>138500000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>109800000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2009</td>\n",
+       "      <td>61800000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2009</td>\n",
+       "      <td>138200000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2009</td>\n",
+       "      <td>107300000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>61000000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>136400000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>105800000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2011</td>\n",
+       "      <td>61800000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2011</td>\n",
+       "      <td>137000000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2011</td>\n",
+       "      <td>105500000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>62400000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>135800000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>106400000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>63300000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>135800000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>106600000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>27</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2014</td>\n",
+       "      <td>64800000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2014</td>\n",
+       "      <td>138500000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2014</td>\n",
+       "      <td>111200000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>30</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2015</td>\n",
+       "      <td>66500000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>31</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2015</td>\n",
+       "      <td>141000000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>32</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2015</td>\n",
+       "      <td>112900000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>33</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2016</td>\n",
+       "      <td>67700000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>34</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2016</td>\n",
+       "      <td>144900000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>35</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2016</td>\n",
+       "      <td>115400000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>36</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2017</td>\n",
+       "      <td>68700000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>37</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2017</td>\n",
+       "      <td>146500000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>38</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2017</td>\n",
+       "      <td>117400000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>39</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2018</td>\n",
+       "      <td>69000000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>40</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2018</td>\n",
+       "      <td>148600000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>41</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2018</td>\n",
+       "      <td>116600000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>42</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2019</td>\n",
+       "      <td>70500000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>43</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2019</td>\n",
+       "      <td>150200000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>44</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2019</td>\n",
+       "      <td>117900000000.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   road_name  year   Trafficvolume\n",
+       "0   Motorway  2005   60300000000.0\n",
+       "1          A  2005  138600000000.0\n",
+       "2    B,C & U  2005  106900000000.0\n",
+       "3   Motorway  2006   61800000000.0\n",
+       "4          A  2006  140500000000.0\n",
+       "5    B,C & U  2006  108100000000.0\n",
+       "6   Motorway  2007   62500000000.0\n",
+       "7          A  2007  139700000000.0\n",
+       "8    B,C & U  2007  111100000000.0\n",
+       "9   Motorway  2008   62200000000.0\n",
+       "10         A  2008  138500000000.0\n",
+       "11   B,C & U  2008  109800000000.0\n",
+       "12  Motorway  2009   61800000000.0\n",
+       "13         A  2009  138200000000.0\n",
+       "14   B,C & U  2009  107300000000.0\n",
+       "15  Motorway  2010   61000000000.0\n",
+       "16         A  2010  136400000000.0\n",
+       "17   B,C & U  2010  105800000000.0\n",
+       "18  Motorway  2011   61800000000.0\n",
+       "19         A  2011  137000000000.0\n",
+       "20   B,C & U  2011  105500000000.0\n",
+       "21  Motorway  2012   62400000000.0\n",
+       "22         A  2012  135800000000.0\n",
+       "23   B,C & U  2012  106400000000.0\n",
+       "24  Motorway  2013   63300000000.0\n",
+       "25         A  2013  135800000000.0\n",
+       "26   B,C & U  2013  106600000000.0\n",
+       "27  Motorway  2014   64800000000.0\n",
+       "28         A  2014  138500000000.0\n",
+       "29   B,C & U  2014  111200000000.0\n",
+       "30  Motorway  2015   66500000000.0\n",
+       "31         A  2015  141000000000.0\n",
+       "32   B,C & U  2015  112900000000.0\n",
+       "33  Motorway  2016   67700000000.0\n",
+       "34         A  2016  144900000000.0\n",
+       "35   B,C & U  2016  115400000000.0\n",
+       "36  Motorway  2017   68700000000.0\n",
+       "37         A  2017  146500000000.0\n",
+       "38   B,C & U  2017  117400000000.0\n",
+       "39  Motorway  2018   69000000000.0\n",
+       "40         A  2018  148600000000.0\n",
+       "41   B,C & U  2018  116600000000.0\n",
+       "42  Motorway  2019   70500000000.0\n",
+       "43         A  2019  150200000000.0\n",
+       "44   B,C & U  2019  117900000000.0"
+      ]
+     },
+     "execution_count": 50,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "result22=TrafficvolumeGroupedupdated_df\n",
+    "result22"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "KeyError",
+     "evalue": "\"['all_motor_vehicles' 'link_length_km'] not found in axis\"",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-51-4d1b0de1ae13>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mresult22\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mresult22\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'all_motor_vehicles'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'link_length_km'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m \u001b[0mresult22\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mdrop\u001b[0;34m(self, labels, axis, index, columns, level, inplace, errors)\u001b[0m\n\u001b[1;32m   4172\u001b[0m             \u001b[0mlevel\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlevel\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   4173\u001b[0m             \u001b[0minplace\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minplace\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4174\u001b[0;31m             \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   4175\u001b[0m         )\n\u001b[1;32m   4176\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36mdrop\u001b[0;34m(self, labels, axis, index, columns, level, inplace, errors)\u001b[0m\n\u001b[1;32m   3887\u001b[0m         \u001b[0;32mfor\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabels\u001b[0m \u001b[0;32min\u001b[0m \u001b[0maxes\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3888\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mlabels\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3889\u001b[0;31m                 \u001b[0mobj\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mobj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_drop_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlabels\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlevel\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlevel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   3890\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3891\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0minplace\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m_drop_axis\u001b[0;34m(self, labels, axis, level, errors)\u001b[0m\n\u001b[1;32m   3921\u001b[0m                 \u001b[0mnew_axis\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlabels\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlevel\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlevel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3922\u001b[0m             \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3923\u001b[0;31m                 \u001b[0mnew_axis\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlabels\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   3924\u001b[0m             \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreindex\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0maxis_name\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mnew_axis\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3925\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mdrop\u001b[0;34m(self, labels, errors)\u001b[0m\n\u001b[1;32m   5285\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mmask\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0many\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   5286\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0merrors\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;34m\"ignore\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 5287\u001b[0;31m                 \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"{labels[mask]} not found in axis\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   5288\u001b[0m             \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mindexer\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m~\u001b[0m\u001b[0mmask\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   5289\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdelete\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mKeyError\u001b[0m: \"['all_motor_vehicles' 'link_length_km'] not found in axis\""
+     ]
+    }
+   ],
+   "source": [
+    "result22=result22.drop(['all_motor_vehicles', 'link_length_km'], axis=1)\n",
+    "result22"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 85,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>road_name</th>\n",
+       "      <th>year</th>\n",
+       "      <th>Total accidents</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>U</td>\n",
+       "      <td>2005</td>\n",
+       "      <td>60026</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2005</td>\n",
+       "      <td>89020</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>B</td>\n",
+       "      <td>2005</td>\n",
+       "      <td>24991</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>C</td>\n",
+       "      <td>2005</td>\n",
+       "      <td>16500</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>M</td>\n",
+       "      <td>2005</td>\n",
+       "      <td>8198</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>70</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2019</td>\n",
+       "      <td>52662</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>71</th>\n",
+       "      <td>M</td>\n",
+       "      <td>2019</td>\n",
+       "      <td>3810</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>72</th>\n",
+       "      <td>C</td>\n",
+       "      <td>2019</td>\n",
+       "      <td>6067</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>73</th>\n",
+       "      <td>U</td>\n",
+       "      <td>2019</td>\n",
+       "      <td>40459</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>74</th>\n",
+       "      <td>B</td>\n",
+       "      <td>2019</td>\n",
+       "      <td>14538</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>75 rows × 3 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   road_name  year  Total accidents\n",
+       "0          U  2005            60026\n",
+       "1          A  2005            89020\n",
+       "2          B  2005            24991\n",
+       "3          C  2005            16500\n",
+       "4          M  2005             8198\n",
+       "..       ...   ...              ...\n",
+       "70         A  2019            52662\n",
+       "71         M  2019             3810\n",
+       "72         C  2019             6067\n",
+       "73         U  2019            40459\n",
+       "74         B  2019            14538\n",
+       "\n",
+       "[75 rows x 3 columns]"
+      ]
+     },
+     "execution_count": 85,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "A2018t_dftt_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "numpy.float64"
+      ]
+     },
+     "execution_count": 54,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "result23['Trafficvolume'] = result23['Trafficvolume'].astype(float)\n",
+    "type(result23['Trafficvolume'][0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "numpy.float64"
+      ]
+     },
+     "execution_count": 28,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "type(result23['Trafficvolume'][0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>road_name</th>\n",
+       "      <th>year</th>\n",
+       "      <th>Total accidents</th>\n",
+       "      <th>Trafficvolume</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2005</td>\n",
+       "      <td>89020</td>\n",
+       "      <td>138600000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2005</td>\n",
+       "      <td>8198</td>\n",
+       "      <td>60300000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2005</td>\n",
+       "      <td>101517</td>\n",
+       "      <td>106900000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>84509</td>\n",
+       "      <td>140500000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>96732</td>\n",
+       "      <td>108100000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>7920</td>\n",
+       "      <td>61800000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2007</td>\n",
+       "      <td>7488</td>\n",
+       "      <td>62500000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2007</td>\n",
+       "      <td>92823</td>\n",
+       "      <td>111100000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2007</td>\n",
+       "      <td>81804</td>\n",
+       "      <td>139700000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>77266</td>\n",
+       "      <td>138500000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>6822</td>\n",
+       "      <td>62200000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>86503</td>\n",
+       "      <td>109800000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2009</td>\n",
+       "      <td>82762</td>\n",
+       "      <td>107300000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2009</td>\n",
+       "      <td>74620</td>\n",
+       "      <td>138200000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2009</td>\n",
+       "      <td>6172</td>\n",
+       "      <td>61800000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>70708</td>\n",
+       "      <td>136400000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>6066</td>\n",
+       "      <td>61000000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>77640</td>\n",
+       "      <td>105800000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2011</td>\n",
+       "      <td>70329</td>\n",
+       "      <td>137000000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2011</td>\n",
+       "      <td>75766</td>\n",
+       "      <td>105500000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2011</td>\n",
+       "      <td>5379</td>\n",
+       "      <td>61800000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>5212</td>\n",
+       "      <td>62400000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>72790</td>\n",
+       "      <td>106400000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>67569</td>\n",
+       "      <td>135800000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>4983</td>\n",
+       "      <td>63300000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>64837</td>\n",
+       "      <td>135800000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>68840</td>\n",
+       "      <td>106600000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>27</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2014</td>\n",
+       "      <td>72864</td>\n",
+       "      <td>111200000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2014</td>\n",
+       "      <td>68212</td>\n",
+       "      <td>138500000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2014</td>\n",
+       "      <td>5246</td>\n",
+       "      <td>64800000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>30</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2015</td>\n",
+       "      <td>64682</td>\n",
+       "      <td>141000000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>31</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2015</td>\n",
+       "      <td>70226</td>\n",
+       "      <td>112900000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>32</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2015</td>\n",
+       "      <td>5148</td>\n",
+       "      <td>66500000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>33</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2016</td>\n",
+       "      <td>69761</td>\n",
+       "      <td>115400000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>34</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2016</td>\n",
+       "      <td>5007</td>\n",
+       "      <td>67700000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>35</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2016</td>\n",
+       "      <td>61853</td>\n",
+       "      <td>144900000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>36</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2017</td>\n",
+       "      <td>4430</td>\n",
+       "      <td>68700000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>37</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2017</td>\n",
+       "      <td>56809</td>\n",
+       "      <td>146500000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>38</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2017</td>\n",
+       "      <td>68743</td>\n",
+       "      <td>117400000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>39</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2018</td>\n",
+       "      <td>4225</td>\n",
+       "      <td>69000000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>40</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2018</td>\n",
+       "      <td>53840</td>\n",
+       "      <td>148600000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>41</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2018</td>\n",
+       "      <td>64570</td>\n",
+       "      <td>116600000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>42</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2019</td>\n",
+       "      <td>52662</td>\n",
+       "      <td>150200000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>43</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2019</td>\n",
+       "      <td>61064</td>\n",
+       "      <td>117900000000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>44</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2019</td>\n",
+       "      <td>3810</td>\n",
+       "      <td>70500000000.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   road_name  year  Total accidents   Trafficvolume\n",
+       "0          A  2005            89020  138600000000.0\n",
+       "1   Motorway  2005             8198   60300000000.0\n",
+       "2    B,C & U  2005           101517  106900000000.0\n",
+       "3          A  2006            84509  140500000000.0\n",
+       "4    B,C & U  2006            96732  108100000000.0\n",
+       "5   Motorway  2006             7920   61800000000.0\n",
+       "6   Motorway  2007             7488   62500000000.0\n",
+       "7    B,C & U  2007            92823  111100000000.0\n",
+       "8          A  2007            81804  139700000000.0\n",
+       "9          A  2008            77266  138500000000.0\n",
+       "10  Motorway  2008             6822   62200000000.0\n",
+       "11   B,C & U  2008            86503  109800000000.0\n",
+       "12   B,C & U  2009            82762  107300000000.0\n",
+       "13         A  2009            74620  138200000000.0\n",
+       "14  Motorway  2009             6172   61800000000.0\n",
+       "15         A  2010            70708  136400000000.0\n",
+       "16  Motorway  2010             6066   61000000000.0\n",
+       "17   B,C & U  2010            77640  105800000000.0\n",
+       "18         A  2011            70329  137000000000.0\n",
+       "19   B,C & U  2011            75766  105500000000.0\n",
+       "20  Motorway  2011             5379   61800000000.0\n",
+       "21  Motorway  2012             5212   62400000000.0\n",
+       "22   B,C & U  2012            72790  106400000000.0\n",
+       "23         A  2012            67569  135800000000.0\n",
+       "24  Motorway  2013             4983   63300000000.0\n",
+       "25         A  2013            64837  135800000000.0\n",
+       "26   B,C & U  2013            68840  106600000000.0\n",
+       "27   B,C & U  2014            72864  111200000000.0\n",
+       "28         A  2014            68212  138500000000.0\n",
+       "29  Motorway  2014             5246   64800000000.0\n",
+       "30         A  2015            64682  141000000000.0\n",
+       "31   B,C & U  2015            70226  112900000000.0\n",
+       "32  Motorway  2015             5148   66500000000.0\n",
+       "33   B,C & U  2016            69761  115400000000.0\n",
+       "34  Motorway  2016             5007   67700000000.0\n",
+       "35         A  2016            61853  144900000000.0\n",
+       "36  Motorway  2017             4430   68700000000.0\n",
+       "37         A  2017            56809  146500000000.0\n",
+       "38   B,C & U  2017            68743  117400000000.0\n",
+       "39  Motorway  2018             4225   69000000000.0\n",
+       "40         A  2018            53840  148600000000.0\n",
+       "41   B,C & U  2018            64570  116600000000.0\n",
+       "42         A  2019            52662  150200000000.0\n",
+       "43   B,C & U  2019            61064  117900000000.0\n",
+       "44  Motorway  2019             3810   70500000000.0"
+      ]
+     },
+     "execution_count": 53,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "result23=pd.merge(A2018t_dftt_df, result22, on=['year','road_name'])\n",
+    "result23"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 55,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "+---------+----+--------------+\n",
+      "|road_name|year| Trafficvolume|\n",
+      "+---------+----+--------------+\n",
+      "| Motorway|2005| 60300000000.0|\n",
+      "|        A|2005|138600000000.0|\n",
+      "|  B,C & U|2005|106900000000.0|\n",
+      "| Motorway|2006| 61800000000.0|\n",
+      "|        A|2006|140500000000.0|\n",
+      "|  B,C & U|2006|108100000000.0|\n",
+      "| Motorway|2007| 62500000000.0|\n",
+      "|        A|2007|139700000000.0|\n",
+      "|  B,C & U|2007|111100000000.0|\n",
+      "| Motorway|2008| 62200000000.0|\n",
+      "|        A|2008|138500000000.0|\n",
+      "|  B,C & U|2008|109800000000.0|\n",
+      "| Motorway|2009| 61800000000.0|\n",
+      "|        A|2009|138200000000.0|\n",
+      "|  B,C & U|2009|107300000000.0|\n",
+      "| Motorway|2010| 61000000000.0|\n",
+      "|        A|2010|136400000000.0|\n",
+      "|  B,C & U|2010|105800000000.0|\n",
+      "| Motorway|2011| 61800000000.0|\n",
+      "|        A|2011|137000000000.0|\n",
+      "+---------+----+--------------+\n",
+      "only showing top 20 rows\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "result23_sc=spark.createDataFrame(result22) \n",
+    "result23_sc.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 56,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>road_name</th>\n",
+       "      <th>year</th>\n",
+       "      <th>Accident Probability</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2005</td>\n",
+       "      <td>6.422799e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2005</td>\n",
+       "      <td>1.359536e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2005</td>\n",
+       "      <td>9.496445e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>6.014875e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>8.948381e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>1.281553e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2007</td>\n",
+       "      <td>1.198080e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2007</td>\n",
+       "      <td>8.354905e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2007</td>\n",
+       "      <td>5.855691e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>5.578773e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>1.096785e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>7.878233e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2009</td>\n",
+       "      <td>7.713141e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2009</td>\n",
+       "      <td>5.399421e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2009</td>\n",
+       "      <td>9.987055e-08</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>5.183871e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>9.944262e-08</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>7.338374e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2011</td>\n",
+       "      <td>5.133504e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2011</td>\n",
+       "      <td>7.181611e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2011</td>\n",
+       "      <td>8.703883e-08</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>8.352564e-08</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>6.841165e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>4.975626e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>7.872038e-08</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>4.774448e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>6.457786e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>27</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2014</td>\n",
+       "      <td>6.552518e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2014</td>\n",
+       "      <td>4.925054e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2014</td>\n",
+       "      <td>8.095679e-08</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>30</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2015</td>\n",
+       "      <td>4.587376e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>31</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2015</td>\n",
+       "      <td>6.220195e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>32</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2015</td>\n",
+       "      <td>7.741353e-08</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>33</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2016</td>\n",
+       "      <td>6.045147e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>34</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2016</td>\n",
+       "      <td>7.395864e-08</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>35</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2016</td>\n",
+       "      <td>4.268668e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>36</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2017</td>\n",
+       "      <td>6.448326e-08</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>37</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2017</td>\n",
+       "      <td>3.877747e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>38</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2017</td>\n",
+       "      <td>5.855451e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>39</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2018</td>\n",
+       "      <td>6.123188e-08</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>40</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2018</td>\n",
+       "      <td>3.623149e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>41</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2018</td>\n",
+       "      <td>5.537736e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>42</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2019</td>\n",
+       "      <td>3.506125e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>43</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2019</td>\n",
+       "      <td>5.179304e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>44</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2019</td>\n",
+       "      <td>5.404255e-08</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   road_name  year  Accident Probability\n",
+       "0          A  2005          6.422799e-07\n",
+       "1   Motorway  2005          1.359536e-07\n",
+       "2    B,C & U  2005          9.496445e-07\n",
+       "3          A  2006          6.014875e-07\n",
+       "4    B,C & U  2006          8.948381e-07\n",
+       "5   Motorway  2006          1.281553e-07\n",
+       "6   Motorway  2007          1.198080e-07\n",
+       "7    B,C & U  2007          8.354905e-07\n",
+       "8          A  2007          5.855691e-07\n",
+       "9          A  2008          5.578773e-07\n",
+       "10  Motorway  2008          1.096785e-07\n",
+       "11   B,C & U  2008          7.878233e-07\n",
+       "12   B,C & U  2009          7.713141e-07\n",
+       "13         A  2009          5.399421e-07\n",
+       "14  Motorway  2009          9.987055e-08\n",
+       "15         A  2010          5.183871e-07\n",
+       "16  Motorway  2010          9.944262e-08\n",
+       "17   B,C & U  2010          7.338374e-07\n",
+       "18         A  2011          5.133504e-07\n",
+       "19   B,C & U  2011          7.181611e-07\n",
+       "20  Motorway  2011          8.703883e-08\n",
+       "21  Motorway  2012          8.352564e-08\n",
+       "22   B,C & U  2012          6.841165e-07\n",
+       "23         A  2012          4.975626e-07\n",
+       "24  Motorway  2013          7.872038e-08\n",
+       "25         A  2013          4.774448e-07\n",
+       "26   B,C & U  2013          6.457786e-07\n",
+       "27   B,C & U  2014          6.552518e-07\n",
+       "28         A  2014          4.925054e-07\n",
+       "29  Motorway  2014          8.095679e-08\n",
+       "30         A  2015          4.587376e-07\n",
+       "31   B,C & U  2015          6.220195e-07\n",
+       "32  Motorway  2015          7.741353e-08\n",
+       "33   B,C & U  2016          6.045147e-07\n",
+       "34  Motorway  2016          7.395864e-08\n",
+       "35         A  2016          4.268668e-07\n",
+       "36  Motorway  2017          6.448326e-08\n",
+       "37         A  2017          3.877747e-07\n",
+       "38   B,C & U  2017          5.855451e-07\n",
+       "39  Motorway  2018          6.123188e-08\n",
+       "40         A  2018          3.623149e-07\n",
+       "41   B,C & U  2018          5.537736e-07\n",
+       "42         A  2019          3.506125e-07\n",
+       "43   B,C & U  2019          5.179304e-07\n",
+       "44  Motorway  2019          5.404255e-08"
+      ]
+     },
+     "execution_count": 56,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "\n",
+    "result23[\"Accident Probability\"] = result23[\"Total accidents\"] / result23[\"Trafficvolume\"]\n",
+    "result23=result23.drop(['Total accidents', 'Trafficvolume'], axis=1)\n",
+    "result23"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "+---------+----+--------------------+\n",
+      "|road_name|year|Accident Probability|\n",
+      "+---------+----+--------------------+\n",
+      "|        A|2005|   642.2799422799424|\n",
+      "| Motorway|2005|   135.9535655058043|\n",
+      "|  B,C & U|2005|   949.6445275958839|\n",
+      "|        A|2006|   601.4875444839857|\n",
+      "|  B,C & U|2006|   894.8381128584645|\n",
+      "| Motorway|2006|  128.15533980582524|\n",
+      "| Motorway|2007|             119.808|\n",
+      "|  B,C & U|2007|   835.4905490549055|\n",
+      "|        A|2007|   585.5690765926987|\n",
+      "|        A|2008|   557.8772563176896|\n",
+      "| Motorway|2008|  109.67845659163987|\n",
+      "|  B,C & U|2008|   787.8233151183971|\n",
+      "|  B,C & U|2009|   771.3140726933831|\n",
+      "|        A|2009|   539.9421128798842|\n",
+      "| Motorway|2009|    99.8705501618123|\n",
+      "|        A|2010|   518.3870967741935|\n",
+      "| Motorway|2010|   99.44262295081967|\n",
+      "|  B,C & U|2010|   733.8374291115313|\n",
+      "|        A|2011|   513.3503649635037|\n",
+      "|  B,C & U|2011|   718.1611374407582|\n",
+      "+---------+----+--------------------+\n",
+      "only showing top 20 rows\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "#park.conf.set(\"spark.sql.execution.arrow.enabled\",\"true\")\n",
+    "Accidenteeachyearwrtroad=spark.createDataFrame(result23) \n",
+    "Accidenteeachyearwrtroad.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>road_name</th>\n",
+       "      <th>year</th>\n",
+       "      <th>Accident Probability</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2005</td>\n",
+       "      <td>642.279942</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>601.487544</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2007</td>\n",
+       "      <td>585.569077</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>557.877256</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2009</td>\n",
+       "      <td>539.942113</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>518.387097</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2011</td>\n",
+       "      <td>513.350365</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>497.562592</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>477.444772</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2014</td>\n",
+       "      <td>492.505415</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2015</td>\n",
+       "      <td>458.737589</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2016</td>\n",
+       "      <td>426.866805</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2017</td>\n",
+       "      <td>387.774744</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2018</td>\n",
+       "      <td>362.314939</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2019</td>\n",
+       "      <td>350.612517</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   road_name  year  Accident Probability\n",
+       "0          A  2005            642.279942\n",
+       "1          A  2006            601.487544\n",
+       "2          A  2007            585.569077\n",
+       "3          A  2008            557.877256\n",
+       "4          A  2009            539.942113\n",
+       "5          A  2010            518.387097\n",
+       "6          A  2011            513.350365\n",
+       "7          A  2012            497.562592\n",
+       "8          A  2013            477.444772\n",
+       "9          A  2014            492.505415\n",
+       "10         A  2015            458.737589\n",
+       "11         A  2016            426.866805\n",
+       "12         A  2017            387.774744\n",
+       "13         A  2018            362.314939\n",
+       "14         A  2019            350.612517"
+      ]
+     },
+     "execution_count": 57,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "A=Accidenteeachyearwrtroad.filter(Accidenteeachyearwrtroad.road_name.contains(\"A\")).toPandas()\n",
+    "B=Accidenteeachyearwrtroad.filter(Accidenteeachyearwrtroad.road_name.contains(\"B\")).toPandas()\n",
+    "C=Accidenteeachyearwrtroad.filter(Accidenteeachyearwrtroad.road_name.contains(\"C\")).toPandas()\n",
+    "M=Accidenteeachyearwrtroad.filter(Accidenteeachyearwrtroad.road_name.contains(\"M\")).toPandas()\n",
+    "U=Accidenteeachyearwrtroad.filter(Accidenteeachyearwrtroad.road_name.contains(\"U\")).toPandas()\n",
+    "A"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/png": "",
+      "text/plain": [
+       "<Figure size 1080x576 with 1 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    " \n",
+    "# set width of bar\n",
+    "barWidth = 0.2\n",
+    "fig = plt.subplots(figsize =(15, 8))\n",
+    " \n",
+    "# set height of bar\n",
+    "#resultGoodsperbillp.plot.bar(x=\"Year\", y=\"Accidents per billion mile\")\n",
+    "IT = A[\"Accident Probability\"]\n",
+    "ECE = B[\"Accident Probability\"]\n",
+    "CAC = C[\"Accident Probability\"]\n",
+    "CSE = M[\"Accident Probability\"]\n",
+    "CAR = U[\"Accident Probability\"]\n",
+    "\n",
+    "# Set position of bar on X axis\n",
+    "br1 = np.arange(len(IT))\n",
+    "br2 = [x + barWidth for x in br1]\n",
+    "br3 = [x + barWidth for x in br2]\n",
+    "br4 = [x + barWidth for x in br3]\n",
+    "br5 = [x + barWidth for x in br4]\n",
+    " \n",
+    "# Make the plot\n",
+    "plt.bar(br1, IT, color ='r', width = barWidth,\n",
+    "        edgecolor ='grey', label ='Road A')\n",
+    "plt.bar(br2, ECE, color ='g', width = barWidth,\n",
+    "        edgecolor ='grey', label ='Road B')\n",
+    "plt.bar(br3, CAC, color ='b', width = barWidth,\n",
+    "        edgecolor ='grey', label ='Road C')\n",
+    "plt.bar(br4, CAR, color ='y', width = barWidth,\n",
+    "        edgecolor ='grey', label ='Road U')\n",
+    "plt.bar(br5, CSE, width = barWidth,\n",
+    "        edgecolor ='grey', label ='Road M')\n",
+    " \n",
+    " \n",
+    "# Adding Xticks\n",
+    "plt.xlabel('year', fontweight ='bold', fontsize = 15)\n",
+    "plt.ylabel('Accidents probability', fontweight ='bold', fontsize = 15)\n",
+    "plt.xticks([r + barWidth for r in range(len(IT))],\n",
+    "        A[\"year\"])\n",
+    " \n",
+    "plt.legend()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 59,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>road_name</th>\n",
+       "      <th>year</th>\n",
+       "      <th>Accident Probability</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2005</td>\n",
+       "      <td>6.422799e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2005</td>\n",
+       "      <td>1.359536e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2005</td>\n",
+       "      <td>9.496445e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>6.014875e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>8.948381e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>1.281553e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2007</td>\n",
+       "      <td>1.198080e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2007</td>\n",
+       "      <td>8.354905e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2007</td>\n",
+       "      <td>5.855691e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>5.578773e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>1.096785e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>7.878233e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2009</td>\n",
+       "      <td>7.713141e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2009</td>\n",
+       "      <td>5.399421e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2009</td>\n",
+       "      <td>9.987055e-08</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>5.183871e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>9.944262e-08</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>7.338374e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2011</td>\n",
+       "      <td>5.133504e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2011</td>\n",
+       "      <td>7.181611e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2011</td>\n",
+       "      <td>8.703883e-08</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>8.352564e-08</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>6.841165e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>4.975626e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>7.872038e-08</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>4.774448e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>6.457786e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>27</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2014</td>\n",
+       "      <td>6.552518e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2014</td>\n",
+       "      <td>4.925054e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2014</td>\n",
+       "      <td>8.095679e-08</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>30</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2015</td>\n",
+       "      <td>4.587376e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>31</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2015</td>\n",
+       "      <td>6.220195e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>32</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2015</td>\n",
+       "      <td>7.741353e-08</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>33</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2016</td>\n",
+       "      <td>6.045147e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>34</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2016</td>\n",
+       "      <td>7.395864e-08</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>35</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2016</td>\n",
+       "      <td>4.268668e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>36</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2017</td>\n",
+       "      <td>6.448326e-08</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>37</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2017</td>\n",
+       "      <td>3.877747e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>38</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2017</td>\n",
+       "      <td>5.855451e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>39</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2018</td>\n",
+       "      <td>6.123188e-08</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>40</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2018</td>\n",
+       "      <td>3.623149e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>41</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2018</td>\n",
+       "      <td>5.537736e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>42</th>\n",
+       "      <td>A</td>\n",
+       "      <td>2019</td>\n",
+       "      <td>3.506125e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>43</th>\n",
+       "      <td>B,C &amp; U</td>\n",
+       "      <td>2019</td>\n",
+       "      <td>5.179304e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>44</th>\n",
+       "      <td>Motorway</td>\n",
+       "      <td>2019</td>\n",
+       "      <td>5.404255e-08</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   road_name  year  Accident Probability\n",
+       "0          A  2005          6.422799e-07\n",
+       "1   Motorway  2005          1.359536e-07\n",
+       "2    B,C & U  2005          9.496445e-07\n",
+       "3          A  2006          6.014875e-07\n",
+       "4    B,C & U  2006          8.948381e-07\n",
+       "5   Motorway  2006          1.281553e-07\n",
+       "6   Motorway  2007          1.198080e-07\n",
+       "7    B,C & U  2007          8.354905e-07\n",
+       "8          A  2007          5.855691e-07\n",
+       "9          A  2008          5.578773e-07\n",
+       "10  Motorway  2008          1.096785e-07\n",
+       "11   B,C & U  2008          7.878233e-07\n",
+       "12   B,C & U  2009          7.713141e-07\n",
+       "13         A  2009          5.399421e-07\n",
+       "14  Motorway  2009          9.987055e-08\n",
+       "15         A  2010          5.183871e-07\n",
+       "16  Motorway  2010          9.944262e-08\n",
+       "17   B,C & U  2010          7.338374e-07\n",
+       "18         A  2011          5.133504e-07\n",
+       "19   B,C & U  2011          7.181611e-07\n",
+       "20  Motorway  2011          8.703883e-08\n",
+       "21  Motorway  2012          8.352564e-08\n",
+       "22   B,C & U  2012          6.841165e-07\n",
+       "23         A  2012          4.975626e-07\n",
+       "24  Motorway  2013          7.872038e-08\n",
+       "25         A  2013          4.774448e-07\n",
+       "26   B,C & U  2013          6.457786e-07\n",
+       "27   B,C & U  2014          6.552518e-07\n",
+       "28         A  2014          4.925054e-07\n",
+       "29  Motorway  2014          8.095679e-08\n",
+       "30         A  2015          4.587376e-07\n",
+       "31   B,C & U  2015          6.220195e-07\n",
+       "32  Motorway  2015          7.741353e-08\n",
+       "33   B,C & U  2016          6.045147e-07\n",
+       "34  Motorway  2016          7.395864e-08\n",
+       "35         A  2016          4.268668e-07\n",
+       "36  Motorway  2017          6.448326e-08\n",
+       "37         A  2017          3.877747e-07\n",
+       "38   B,C & U  2017          5.855451e-07\n",
+       "39  Motorway  2018          6.123188e-08\n",
+       "40         A  2018          3.623149e-07\n",
+       "41   B,C & U  2018          5.537736e-07\n",
+       "42         A  2019          3.506125e-07\n",
+       "43   B,C & U  2019          5.179304e-07\n",
+       "44  Motorway  2019          5.404255e-08"
+      ]
+     },
+     "execution_count": 59,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "result23"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 411,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "numpy.float64"
+      ]
+     },
+     "execution_count": 411,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "type(result23['Accident Probability'][0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 60,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th>road_name</th>\n",
+       "      <th>A</th>\n",
+       "      <th>B,C &amp; U</th>\n",
+       "      <th>Motorway</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>year</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>2005</th>\n",
+       "      <td>6.422799e-07</td>\n",
+       "      <td>9.496445e-07</td>\n",
+       "      <td>1.359536e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2006</th>\n",
+       "      <td>6.014875e-07</td>\n",
+       "      <td>8.948381e-07</td>\n",
+       "      <td>1.281553e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2007</th>\n",
+       "      <td>5.855691e-07</td>\n",
+       "      <td>8.354905e-07</td>\n",
+       "      <td>1.198080e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2008</th>\n",
+       "      <td>5.578773e-07</td>\n",
+       "      <td>7.878233e-07</td>\n",
+       "      <td>1.096785e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2009</th>\n",
+       "      <td>5.399421e-07</td>\n",
+       "      <td>7.713141e-07</td>\n",
+       "      <td>9.987055e-08</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2010</th>\n",
+       "      <td>5.183871e-07</td>\n",
+       "      <td>7.338374e-07</td>\n",
+       "      <td>9.944262e-08</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2011</th>\n",
+       "      <td>5.133504e-07</td>\n",
+       "      <td>7.181611e-07</td>\n",
+       "      <td>8.703883e-08</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2012</th>\n",
+       "      <td>4.975626e-07</td>\n",
+       "      <td>6.841165e-07</td>\n",
+       "      <td>8.352564e-08</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2013</th>\n",
+       "      <td>4.774448e-07</td>\n",
+       "      <td>6.457786e-07</td>\n",
+       "      <td>7.872038e-08</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2014</th>\n",
+       "      <td>4.925054e-07</td>\n",
+       "      <td>6.552518e-07</td>\n",
+       "      <td>8.095679e-08</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2015</th>\n",
+       "      <td>4.587376e-07</td>\n",
+       "      <td>6.220195e-07</td>\n",
+       "      <td>7.741353e-08</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2016</th>\n",
+       "      <td>4.268668e-07</td>\n",
+       "      <td>6.045147e-07</td>\n",
+       "      <td>7.395864e-08</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2017</th>\n",
+       "      <td>3.877747e-07</td>\n",
+       "      <td>5.855451e-07</td>\n",
+       "      <td>6.448326e-08</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2018</th>\n",
+       "      <td>3.623149e-07</td>\n",
+       "      <td>5.537736e-07</td>\n",
+       "      <td>6.123188e-08</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2019</th>\n",
+       "      <td>3.506125e-07</td>\n",
+       "      <td>5.179304e-07</td>\n",
+       "      <td>5.404255e-08</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "road_name             A       B,C & U      Motorway\n",
+       "year                                               \n",
+       "2005       6.422799e-07  9.496445e-07  1.359536e-07\n",
+       "2006       6.014875e-07  8.948381e-07  1.281553e-07\n",
+       "2007       5.855691e-07  8.354905e-07  1.198080e-07\n",
+       "2008       5.578773e-07  7.878233e-07  1.096785e-07\n",
+       "2009       5.399421e-07  7.713141e-07  9.987055e-08\n",
+       "2010       5.183871e-07  7.338374e-07  9.944262e-08\n",
+       "2011       5.133504e-07  7.181611e-07  8.703883e-08\n",
+       "2012       4.975626e-07  6.841165e-07  8.352564e-08\n",
+       "2013       4.774448e-07  6.457786e-07  7.872038e-08\n",
+       "2014       4.925054e-07  6.552518e-07  8.095679e-08\n",
+       "2015       4.587376e-07  6.220195e-07  7.741353e-08\n",
+       "2016       4.268668e-07  6.045147e-07  7.395864e-08\n",
+       "2017       3.877747e-07  5.855451e-07  6.448326e-08\n",
+       "2018       3.623149e-07  5.537736e-07  6.123188e-08\n",
+       "2019       3.506125e-07  5.179304e-07  5.404255e-08"
+      ]
+     },
+     "execution_count": 60,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "result23_opiv=result23.pivot_table('Accident Probability', ['year'], 'road_name')\n",
+    "result23_opiv"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 61,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[6.42279942e-07, 9.49644528e-07, 1.35953566e-07],\n",
+       "       [6.01487544e-07, 8.94838113e-07, 1.28155340e-07],\n",
+       "       [5.85569077e-07, 8.35490549e-07, 1.19808000e-07],\n",
+       "       [5.57877256e-07, 7.87823315e-07, 1.09678457e-07],\n",
+       "       [5.39942113e-07, 7.71314073e-07, 9.98705502e-08],\n",
+       "       [5.18387097e-07, 7.33837429e-07, 9.94426230e-08],\n",
+       "       [5.13350365e-07, 7.18161137e-07, 8.70388350e-08],\n",
+       "       [4.97562592e-07, 6.84116541e-07, 8.35256410e-08],\n",
+       "       [4.77444772e-07, 6.45778612e-07, 7.87203791e-08],\n",
+       "       [4.92505415e-07, 6.55251799e-07, 8.09567901e-08],\n",
+       "       [4.58737589e-07, 6.22019486e-07, 7.74135338e-08],\n",
+       "       [4.26866805e-07, 6.04514731e-07, 7.39586411e-08],\n",
+       "       [3.87774744e-07, 5.85545145e-07, 6.44832606e-08],\n",
+       "       [3.62314939e-07, 5.53773585e-07, 6.12318841e-08],\n",
+       "       [3.50612517e-07, 5.17930450e-07, 5.40425532e-08]])"
+      ]
+     },
+     "execution_count": 61,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "dataset_table=result23_opiv \n",
+    "dataset_table.values "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 441,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[3.89627674e-08, 2.82889341e-07, 8.23868845e-08, 1.53996323e-07,\n",
+       "        5.87343321e-08],\n",
+       "       [3.74238123e-08, 2.45504978e-07, 7.56211633e-08, 1.16837963e-07,\n",
+       "        4.80592752e-08],\n",
+       "       [3.49098473e-08, 2.47299145e-07, 7.75155626e-08, 1.22236681e-07,\n",
+       "        4.76750721e-08],\n",
+       "       [3.49766807e-08, 9.07014908e-08, 3.91656652e-08, 1.10162938e-07,\n",
+       "        3.18085466e-08],\n",
+       "       [2.76508300e-08, 1.03867034e-07, 4.20038816e-08, 1.01638522e-07,\n",
+       "        3.08825045e-08],\n",
+       "       [2.70274897e-08, 1.57855282e-07, 8.55270531e-08, 9.78777857e-08,\n",
+       "        1.17858661e-07],\n",
+       "       [3.46589093e-08, 1.56377611e-07, 8.60335990e-08, 9.18201519e-08,\n",
+       "        1.14374627e-07],\n",
+       "       [3.16907830e-08, 1.51974973e-07, 8.25173364e-08, 7.95863994e-08,\n",
+       "        1.09948130e-07],\n",
+       "       [3.09490620e-08, 1.43997560e-07, 7.47498206e-08, 8.18530442e-08,\n",
+       "        1.03895088e-07],\n",
+       "       [4.18316136e-08, 1.51236539e-07, 7.95447618e-08, 7.99038052e-08,\n",
+       "        1.11770804e-07],\n",
+       "       [3.86734202e-08, 1.43694475e-07, 7.02071947e-08, 7.98037131e-08,\n",
+       "        1.14505931e-07],\n",
+       "       [4.41163389e-08, 1.42754547e-07, 6.04474594e-08, 1.33860542e-07,\n",
+       "        1.24629899e-07],\n",
+       "       [2.61999931e-08, 1.25253633e-07, 5.09385949e-08, 8.42046456e-08,\n",
+       "        1.25450231e-07],\n",
+       "       [3.81646556e-08, 8.94260266e-08, 2.27642733e-08, 1.80015380e-07,\n",
+       "        3.46230526e-08],\n",
+       "       [2.77836072e-08, 7.45232485e-08, 1.71157759e-08, 6.40762636e-08,\n",
+       "        3.74465946e-08]])"
+      ]
+     },
+     "execution_count": 441,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "dataset_table.values "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 62,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Degree of Freedom:- 1\n"
+     ]
+    }
+   ],
+   "source": [
+    "import seaborn as sns\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import scipy.stats as stats\n",
+    "\n",
+    "dataset=sns.load_dataset('tips')\n",
+    "dataset_table=pd.crosstab(dataset['sex'],dataset['smoker'])\n",
+    "#Observed Values\n",
+    "Observed_Values = dataset_table.values \n",
+    "val=stats.chi2_contingency(dataset_table)\n",
+    "\n",
+    "Expected_Values=val[3]\n",
+    "Expected_Values\n",
+    "\n",
+    "no_of_rows=len(dataset_table.iloc[0:2,0])\n",
+    "no_of_columns=len(dataset_table.iloc[0,0:2])\n",
+    "ddof=(no_of_rows-1)*(no_of_columns-1)\n",
+    "print(\"Degree of Freedom:-\",ddof)\n",
+    "alpha = 0.05\n",
+    "from scipy.stats import chi2\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 63,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "chi_square=sum([(o-e)**2./e for o,e in zip(Observed_Values,Expected_Values)])\n",
+    "chi_square_statistic=chi_square[0]+chi_square[1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 64,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[60 97]\n",
+      " [33 54]] [[59.84016393 97.15983607]\n",
+      " [33.15983607 53.84016393]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(Observed_Values,Expected_Values)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 65,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "hi [0.00042693 0.00026294]\n",
+      "hi [0.00077044 0.00047451]\n",
+      "r [0.0006898738224860787, 0.0012449447141415443]\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "0.001934818536627623"
+      ]
+     },
+     "execution_count": 65,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "r=[]\n",
+    "result = [[0,0],\n",
+    "         [0,0]]\n",
+    "for o,e in zip(Observed_Values,Expected_Values):\n",
+    "    a=(o-e)**2./e\n",
+    "    print(\"hi\",a)\n",
+    "    r.append(a[0]+a[1])\n",
+    "print(\"r\",r)\n",
+    "chi_square_statistic=r[0]+r[1]\n",
+    "chi_square_statistic\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 373,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[0.0008538602229130974, 0.00052588742205906]"
+      ]
+     },
+     "execution_count": 373,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "result[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 470,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([1.85745130e-09, 1.99654903e-08, 4.97591969e-10, 2.08401152e-10,\n",
+       "       2.67803669e-08])"
+      ]
+     },
+     "execution_count": 470,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "r[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 66,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.001934818536627623"
+      ]
+     },
+     "execution_count": 66,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chi_square_statistic=r[0]+r[1]\n",
+    "chi_square_statistic"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 67,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th>road_name</th>\n",
+       "      <th>A</th>\n",
+       "      <th>B,C &amp; U</th>\n",
+       "      <th>Motorway</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>year</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>2005</th>\n",
+       "      <td>6.422799e-07</td>\n",
+       "      <td>9.496445e-07</td>\n",
+       "      <td>1.359536e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2006</th>\n",
+       "      <td>6.014875e-07</td>\n",
+       "      <td>8.948381e-07</td>\n",
+       "      <td>1.281553e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2007</th>\n",
+       "      <td>5.855691e-07</td>\n",
+       "      <td>8.354905e-07</td>\n",
+       "      <td>1.198080e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2008</th>\n",
+       "      <td>5.578773e-07</td>\n",
+       "      <td>7.878233e-07</td>\n",
+       "      <td>1.096785e-07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2009</th>\n",
+       "      <td>5.399421e-07</td>\n",
+       "      <td>7.713141e-07</td>\n",
+       "      <td>9.987055e-08</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2010</th>\n",
+       "      <td>5.183871e-07</td>\n",
+       "      <td>7.338374e-07</td>\n",
+       "      <td>9.944262e-08</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2011</th>\n",
+       "      <td>5.133504e-07</td>\n",
+       "      <td>7.181611e-07</td>\n",
+       "      <td>8.703883e-08</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2012</th>\n",
+       "      <td>4.975626e-07</td>\n",
+       "      <td>6.841165e-07</td>\n",
+       "      <td>8.352564e-08</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2013</th>\n",
+       "      <td>4.774448e-07</td>\n",
+       "      <td>6.457786e-07</td>\n",
+       "      <td>7.872038e-08</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2014</th>\n",
+       "      <td>4.925054e-07</td>\n",
+       "      <td>6.552518e-07</td>\n",
+       "      <td>8.095679e-08</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2015</th>\n",
+       "      <td>4.587376e-07</td>\n",
+       "      <td>6.220195e-07</td>\n",
+       "      <td>7.741353e-08</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2016</th>\n",
+       "      <td>4.268668e-07</td>\n",
+       "      <td>6.045147e-07</td>\n",
+       "      <td>7.395864e-08</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2017</th>\n",
+       "      <td>3.877747e-07</td>\n",
+       "      <td>5.855451e-07</td>\n",
+       "      <td>6.448326e-08</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2018</th>\n",
+       "      <td>3.623149e-07</td>\n",
+       "      <td>5.537736e-07</td>\n",
+       "      <td>6.123188e-08</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2019</th>\n",
+       "      <td>3.506125e-07</td>\n",
+       "      <td>5.179304e-07</td>\n",
+       "      <td>5.404255e-08</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "road_name             A       B,C & U      Motorway\n",
+       "year                                               \n",
+       "2005       6.422799e-07  9.496445e-07  1.359536e-07\n",
+       "2006       6.014875e-07  8.948381e-07  1.281553e-07\n",
+       "2007       5.855691e-07  8.354905e-07  1.198080e-07\n",
+       "2008       5.578773e-07  7.878233e-07  1.096785e-07\n",
+       "2009       5.399421e-07  7.713141e-07  9.987055e-08\n",
+       "2010       5.183871e-07  7.338374e-07  9.944262e-08\n",
+       "2011       5.133504e-07  7.181611e-07  8.703883e-08\n",
+       "2012       4.975626e-07  6.841165e-07  8.352564e-08\n",
+       "2013       4.774448e-07  6.457786e-07  7.872038e-08\n",
+       "2014       4.925054e-07  6.552518e-07  8.095679e-08\n",
+       "2015       4.587376e-07  6.220195e-07  7.741353e-08\n",
+       "2016       4.268668e-07  6.045147e-07  7.395864e-08\n",
+       "2017       3.877747e-07  5.855451e-07  6.448326e-08\n",
+       "2018       3.623149e-07  5.537736e-07  6.123188e-08\n",
+       "2019       3.506125e-07  5.179304e-07  5.404255e-08"
+      ]
+     },
+     "execution_count": 67,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "result23_opiv"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015,\n",
+       "       2016, 2017, 2018, 2019])"
+      ]
+     },
+     "execution_count": 38,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "years=result23['year'].unique()\n",
+    "years"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 68,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/png": "",
+      "text/plain": [
+       "<Figure size 1000x800 with 1 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt  \n",
+    "\n",
+    "from matplotlib.pyplot import figure\n",
+    "plt.figure(figsize=(20, 16),dpi=50)\n",
+    "plt.rcParams.update({'font.size': 22})\n",
+    "#YEARList  = result23_opiv['year'].tolist()\n",
+    "YEARList=years\n",
+    "M   = result23_opiv ['Motorway'].tolist()\n",
+    "A   = result23_opiv ['A'].tolist()\n",
+    "B = result23_opiv ['B,C & U'].tolist()\n",
+    "\n",
+    "\n",
+    "\n",
+    "plt.plot(YEARList, M,   label = 'Motorway', marker='o', linewidth=3)\n",
+    "plt.plot(YEARList, A,   label = 'A road',  marker='o', linewidth=3)\n",
+    "plt.plot(YEARList, B, label = 'B,C & U road', marker='o', linewidth=3)\n",
+    "\n",
+    "\n",
+    "\n",
+    "plt.xlabel('YEAR Number')\n",
+    "plt.ylabel('Accident Probability')\n",
+    "plt.legend(loc='upper left')\n",
+    "plt.xticks(YEARList)\n",
+    "#plt.yticks([1000, 2000, 4000, 6000, 8000, 10000, 12000, 15000, 18000])\n",
+    "plt.title('Accident Probability over road type')\n",
+    "\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Over the years"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 111,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "+---------+----+------------------+\n",
+      "|road_name|year|all_motor_vehicles|\n",
+      "+---------+----+------------------+\n",
+      "|        A|2005|                 8|\n",
+      "|        A|2005|                 3|\n",
+      "|        A|2005|                13|\n",
+      "|        A|2005|                14|\n",
+      "|        A|2005|                11|\n",
+      "|        A|2005|                11|\n",
+      "|        A|2005|                13|\n",
+      "|        A|2005|                13|\n",
+      "|        A|2005|                13|\n",
+      "|        A|2005|                10|\n",
+      "|        A|2005|                17|\n",
+      "|        A|2005|                 4|\n",
+      "|        A|2005|                 5|\n",
+      "|        A|2005|                13|\n",
+      "|        A|2005|                12|\n",
+      "|        A|2005|                 7|\n",
+      "|        A|2005|                16|\n",
+      "|        A|2005|                 7|\n",
+      "|        A|2005|                 9|\n",
+      "|        A|2005|                18|\n",
+      "+---------+----+------------------+\n",
+      "only showing top 20 rows\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "TrafficvolumeGroupedupdated.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 116,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>road_name</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>B</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>M</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>U</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>C</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>A</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  road_name\n",
+       "0         B\n",
+       "1         M\n",
+       "2         U\n",
+       "3         C\n",
+       "4         A"
+      ]
+     },
+     "execution_count": 116,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "road_length_traffic"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 102,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "KeyError",
+     "evalue": "'Trafficvolume'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
+      "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m   2897\u001b[0m             \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2898\u001b[0;31m                 \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcasted_key\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2899\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
+      "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
+      "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
+      "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
+      "\u001b[0;31mKeyError\u001b[0m: 'Trafficvolume'",
+      "\nThe above exception was the direct cause of the following exception:\n",
+      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-102-20660c43864d>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     13\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     14\u001b[0m \u001b[0mresult24\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmerge\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mA2018t_df_notyear_df\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mroad_length_traffic\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mon\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'road_name'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 15\u001b[0;31m \u001b[0mresult24\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"Accident Probability\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mresult24\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"Total accidents\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0mresult24\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"Trafficvolume\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     16\u001b[0m \u001b[0mresult24\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mresult24\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Total accidents'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Trafficvolume'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     17\u001b[0m \u001b[0max\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mresult24\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplot\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'road_name'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'Accident Probability'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrot\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mtitle\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"Accidents probabilty over road type \"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mfigsize\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m20\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m10\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mcolor\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"Orange\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m   2904\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnlevels\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2905\u001b[0m                 \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_multilevel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2906\u001b[0;31m             \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2907\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mis_integer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2908\u001b[0m                 \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m   2898\u001b[0m                 \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcasted_key\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2899\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2900\u001b[0;31m                 \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2901\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2902\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mtolerance\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mKeyError\u001b[0m: 'Trafficvolume'"
+     ]
+    }
+   ],
+   "source": [
+    "A2018t_df_notyear = A2018.groupby(\"first_road_class\").agg(F.count(A2018.accident_index).alias('Total accidents'))\n",
+    "A2018t_df_notyear = A2018t_df_notyear.withColumnRenamed(\"first_road_class\", \"road_name\")\n",
+    "A2018t_df_notyear_df=A2018t_df_notyear.toPandas()\n",
+    "\n",
+    "\n",
+    "TrafficvolumeGrouped_notyear=TrafficvolumeGroupedupdated.select(col(\"road_name\"),col(\"all_motor_vehicles\"))\n",
+    "TrafficvolumeGrouped_notyear = TrafficvolumeGrouped_notyear.groupby('road_name').agg(F.sum(TrafficvolumeGroupedupdated['all_motor_vehicles']).alias('all_motor_vehicles'))\n",
+    "\n",
+    "TrafficvolumeGrouped_notyear_df=TrafficvolumeGrouped_notyear.toPandas()\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 118,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 120,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>road_name</th>\n",
+       "      <th>Total accidents</th>\n",
+       "      <th>Trafficvolume</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>B</td>\n",
+       "      <td>286824</td>\n",
+       "      <td>2.057755e+12</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>M</td>\n",
+       "      <td>86106</td>\n",
+       "      <td>8.846338e+11</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>U</td>\n",
+       "      <td>687752</td>\n",
+       "      <td>1.175274e+13</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>C</td>\n",
+       "      <td>188025</td>\n",
+       "      <td>3.374912e+12</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>A</td>\n",
+       "      <td>1038720</td>\n",
+       "      <td>3.126184e+13</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  road_name  Total accidents  Trafficvolume\n",
+       "0         B           286824   2.057755e+12\n",
+       "1         M            86106   8.846338e+11\n",
+       "2         U           687752   1.175274e+13\n",
+       "3         C           188025   3.374912e+12\n",
+       "4         A          1038720   3.126184e+13"
+      ]
+     },
+     "execution_count": 120,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "road_length_total = pd.read_csv ('/Users/Asfandyar/Desktop/disertation/diseration_final/road_length.csv')\n",
+    "road_length_traffic=pd.merge(TrafficvolumeGrouped_notyear_df, road_length_total, on=['road_name'])\n",
+    "road_length_traffic[\"link_length_km\"]=road_length_traffic[\"link_length_km\"].str.replace(',','')\n",
+    "road_length_traffic[\"link_length_km\"] = road_length_traffic[\"link_length_km\"].astype(float)\n",
+    "road_length_traffic[\"Trafficvolume\"] = road_length_traffic[\"all_motor_vehicles\"] * road_length_traffic[\"link_length_km\"]\n",
+    "road_length_traffic=road_length_traffic.drop(['all_motor_vehicles', 'link_length_km'], axis=1)\n",
+    "result24=pd.merge(A2018t_df_notyear_df, road_length_traffic, on=['road_name'])\n",
+    "result24"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 122,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/png": "",
+      "text/plain": [
+       "<Figure size 1440x720 with 1 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "result24=pd.merge(A2018t_df_notyear_df, road_length_traffic, on=['road_name'])\n",
+    "result24[\"Accident Probability\"] = result24[\"Total accidents\"] / result24[\"Trafficvolume\"]\n",
+    "result24=result24.drop(['Total accidents', 'Trafficvolume'], axis=1)\n",
+    "result24=result24.sort_values('road_name')\n",
+    "ax=result24.plot.bar('road_name','Accident Probability', rot=0,title=\"Accidents probabilty over road type \",figsize=(20, 10),color=\"Orange\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 123,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/png": "",
+      "text/plain": [
+       "<Figure size 1440x720 with 1 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "A2018=A2018.withColumn(\n",
+    "    \"Road_Type\",\n",
+    "    when(\n",
+    "        col(\"Road_Type\") == 1,\n",
+    "        \"Roundabout\"\n",
+    "    ).when(\n",
+    "        col(\"Road_Type\") == 2,\n",
+    "        \"One way street\"\n",
+    "    ).when(\n",
+    "        col(\"Road_Type\") == 3,\n",
+    "        \"Dual carriageway\"\n",
+    "    ).when(\n",
+    "        col(\"Road_Type\") == 6,\n",
+    "        \"Single carriageway\"\n",
+    "    ).when(\n",
+    "        col(\"Road_Type\") == 7,\n",
+    "        \"Slip road\"\n",
+    "    ).when(\n",
+    "        col(\"Road_Type\") == 9,\n",
+    "        \"Unknown\"\n",
+    "    ).when(\n",
+    "        col(\"Road_Type\") == 12,\n",
+    "        \"One way street/Slip road\"\n",
+    "    ).when(\n",
+    "        col(\"Road_Type\") == -1,\n",
+    "        \"Data missing or out of range\"\n",
+    "    ).otherwise(col(\"Road_Type\"))\n",
+    ")\n",
+    "dangeorusroadtype = A2018.groupby('Road_Type','first_road_class').agg(F.count(A2018.accident_index).alias('Total accidents'))\n",
+    "dangeorusroadtype_df=dangeorusroadtype.toPandas()\n",
+    "\n",
+    "dangeorusroadtype_df=dangeorusroadtype_df.rename(columns={\"first_road_class\": \"road_name\"})\n",
+    "\n",
+    "result30=pd.merge(dangeorusroadtype_df, road_length_traffic, on=['road_name'])\n",
+    "\n",
+    "result30[\"Accident Probability\"] = result30[\"Total accidents\"] / result30[\"Trafficvolume\"]\n",
+    "result30=result30.drop(['Total accidents', 'Trafficvolume'], axis=1)\n",
+    "result30=result30.drop(['road_name'], axis=1)\n",
+    "result30_df = result30.groupby('Road_Type', sort=False)[\"Accident Probability\"].sum().reset_index(name ='Accident Probability')\n",
+    "result30_df=result30_df.drop(labels=[6],axis=0)\n",
+    "result30_df=result30_df.sort_values('Road_Type')\n",
+    "ax=result30_df.plot.bar('Road_Type','Accident Probability', rot=90,title=\"Accidents probabilty over road type \",figsize=(20, 10),color=\"Orange\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 171,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "A20188=A2018.withColumn(\"date\",col(\"date\").cast(\"string\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 172,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
n",
+      "|accident_index|accident_year|accident_reference|location_easting_osgr|location_northing_osgr|longitude| latitude|police_force|accident_severity|number_of_vehicles|number_of_casualties|      date|day_of_week| time|local_authority_district|local_authority_ons_district|local_authority_highway|first_road_class|first_road_number|road_type|speed_limit|junction_detail|junction_control|second_road_class|second_road_number|pedestrian_crossing_human_control|pedestrian_crossing_physical_facilities|light_conditions|weather_conditions|road_surface_conditions|special_conditions_at_site|carriageway_hazards|urban_or_rural_area|did_police_officer_attend_scene_of_accident|trunk_road_flag|lsoa_of_accident_location|\n",
n",
+      "| 200501BS00001|         2005|         01BS00001|               525680|                178240| -0.19117|51.489096|           1|                2|                 1|                   1|04/01/2005|          3|17:42|                      12|                   E09000020|              E09000020|               3|             3218|        6|         30|              0|              -1|               -1|                -1|                                0|                                      1|               1|                 2|                      2|                         0|                  0|                  1|                                          1|              2|                E01002849|\n",
n",
+      "only showing top 1 row\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "A20188.show(1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 177,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "root\n",
+      " |-- Count_point_id: string (nullable = true)\n",
+      " |-- Direction_of_travel: string (nullable = true)\n",
+      " |-- year: integer (nullable = true)\n",
+      " |-- Count_date: string (nullable = true)\n",
+      " |-- hour: string (nullable = true)\n",
+      " |-- Region_id: string (nullable = true)\n",
+      " |-- Region_name: string (nullable = true)\n",
+      " |-- Region_ons_code: string (nullable = true)\n",
+      " |-- Local_authority_id: string (nullable = true)\n",
+      " |-- Local_authority_name: string (nullable = true)\n",
+      " |-- Local_authority_code: string (nullable = true)\n",
+      " |-- Road_name: string (nullable = true)\n",
+      " |-- Road_category: string (nullable = true)\n",
+      " |-- Road_type: string (nullable = true)\n",
+      " |-- Start_junction_road_name: string (nullable = true)\n",
+      " |-- End_junction_road_name: string (nullable = true)\n",
+      " |-- Easting: string (nullable = true)\n",
+      " |-- Northing: string (nullable = true)\n",
+      " |-- Latitude: string (nullable = true)\n",
+      " |-- Longitude: string (nullable = true)\n",
+      " |-- Link_length_km: string (nullable = true)\n",
+      " |-- Link_length_miles: string (nullable = true)\n",
+      " |-- Pedal_cycles: string (nullable = true)\n",
+      " |-- Two_wheeled_motor_vehicles: string (nullable = true)\n",
+      " |-- Cars_and_taxis: string (nullable = true)\n",
+      " |-- Buses_and_coaches: string (nullable = true)\n",
+      " |-- LGVs: string (nullable = true)\n",
+      " |-- HGVs_2_rigid_axle: string (nullable = true)\n",
+      " |-- HGVs_3_rigid_axle: string (nullable = true)\n",
+      " |-- HGVs_4_or_more_rigid_axle: string (nullable = true)\n",
+      " |-- HGVs_3_or_4_articulated_axle: string (nullable = true)\n",
+      " |-- HGVs_5_articulated_axle: string (nullable = true)\n",
+      " |-- HGVs_6_articulated_axle: string (nullable = true)\n",
+      " |-- All_HGVs: string (nullable = true)\n",
+      " |-- All_motor_vehicles: string (nullable = true)\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "Trafficvolume.printSchema()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 183,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
n",
+      "|accident_index|accident_year|accident_reference|location_easting_osgr|location_northing_osgr|longitude| latitude|police_force|accident_severity|number_of_vehicles|number_of_casualties|      date|day_of_week| time|local_authority_district|local_authority_ons_district|local_authority_highway|first_road_class|first_road_number|road_type|speed_limit|junction_detail|junction_control|second_road_class|second_road_number|pedestrian_crossing_human_control|pedestrian_crossing_physical_facilities|light_conditions|weather_conditions|road_surface_conditions|special_conditions_at_site|carriageway_hazards|urban_or_rural_area|did_police_officer_attend_scene_of_accident|trunk_road_flag|lsoa_of_accident_location|timestamp|\n",
n",
+      "| 200501BS00001|         2005|         01BS00001|               525680|                178240| -0.19117|51.489096|           1|                2|                 1|                   1|04/01/2005|          3|17:42|                      12|                   E09000020|              E09000020|               3|             3218|        6|         30|              0|              -1|               -1|                -1|                                0|                                      1|               1|                 2|                      2|                         0|                  0|                  1|                                          1|              2|                E01002849|     null|\n",
+      "| 200501BS00002|         2005|         01BS00002|               524170|                181650|-0.211708|51.520075|           1|                3|                 1|                   1|05/01/2005|          4|17:36|                      12|                   E09000020|              E09000020|               4|              450|        3|         30|              6|               2|                5|                 0|                                0|                                      5|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002909|     null|\n",
+      "| 200501BS00003|         2005|         01BS00003|               524520|                182240|-0.206458|51.525301|           1|                3|                 2|                   1|06/01/2005|          5|00:15|                      12|                   E09000020|              E09000020|               5|                0|        6|         30|              0|              -1|               -1|                -1|                                0|                                      0|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002857|     null|\n",
+      "| 200501BS00004|         2005|         01BS00004|               526900|                177530|-0.173862|51.482442|           1|                3|                 1|                   1|07/01/2005|          6|10:35|                      12|                   E09000020|              E09000020|               3|             3220|        6|         30|              0|              -1|               -1|                -1|                                0|                                      0|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002840|     null|\n",
+      "| 200501BS00005|         2005|         01BS00005|               528060|                179040|-0.156618|51.495752|           1|                3|                 1|                   1|10/01/2005|          2|21:13|                      12|                   E09000020|              E09000020|               6|                0|        6|         30|              0|              -1|               -1|                -1|                                0|                                      0|               7|                 1|                      2|                         0|                  0|                  1|                                          1|              2|                E01002863|     null|\n",
+      "| 200501BS00006|         2005|         01BS00006|               524770|                181160|-0.203238| 51.51554|           1|                3|                 2|                   1|11/01/2005|          3|12:40|                      12|                   E09000020|              E09000020|               6|                0|        6|         30|              0|              -1|               -1|                -1|                                0|                                      0|               1|                 2|                      2|                         6|                  0|                  1|                                          1|              2|                E01002832|     null|\n",
+      "| 200501BS00007|         2005|         01BS00007|               524220|                180830|-0.211277|51.512695|           1|                3|                 2|                   1|13/01/2005|          5|20:40|                      12|                   E09000020|              E09000020|               5|                0|        6|         30|              3|               4|                6|                 0|                                0|                                      0|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002875|     null|\n",
+      "| 200501BS00009|         2005|         01BS00009|               525890|                179710|-0.187623| 51.50226|           1|                3|                 1|                   2|14/01/2005|          6|17:35|                      12|                   E09000020|              E09000020|               3|              315|        3|         30|              0|              -1|               -1|                -1|                                0|                                      0|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002889|     null|\n",
+      "| 200501BS00010|         2005|         01BS00010|               527350|                177650|-0.167342| 51.48342|           1|                3|                 2|                   2|15/01/2005|          7|22:43|                      12|                   E09000020|              E09000020|               3|             3212|        6|         30|              6|               2|                4|               304|                                0|                                      5|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002900|     null|\n",
+      "| 200501BS00011|         2005|         01BS00011|               524550|                180810|-0.206531|51.512443|           1|                3|                 2|                   5|15/01/2005|          7|16:00|                      12|                   E09000020|              E09000020|               4|              450|        6|         30|              3|               4|                5|                 0|                                0|                                      8|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002875|     null|\n",
+      "| 200501BS00012|         2005|         01BS00012|               526240|                178900|-0.182872|51.494902|           1|                3|                 1|                   1|16/01/2005|          1|00:42|                      12|                   E09000020|              E09000020|               3|                4|        6|         30|              6|               2|                4|               325|                                0|                                      5|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002835|     null|\n",
+      "| 200501BS00014|         2005|         01BS00014|               526170|                177690|-0.184312|51.484044|           1|                3|                 2|                   1|25/01/2005|          3|20:48|                      12|                   E09000020|              E09000020|               3|             3220|        6|         30|              6|               2|                3|               308|                                0|                                      5|               4|                 1|                      2|                         0|                  0|                  1|                                          1|              2|                E01002912|     null|\n",
+      "| 200501BS00015|         2005|         01BS00015|               525590|                178520|-0.192366|51.491632|           1|                3|                 1|                   1|11/01/2005|          3|12:55|                      12|                   E09000020|              E09000020|               6|                0|        2|         30|              3|               4|                3|              3220|                                0|                                      1|               1|                 2|                      2|                         0|                  0|                  1|                                          1|              2|                E01002849|     null|\n",
+      "| 200501BS00016|         2005|         01BS00016|               527990|                178690|-0.157753|51.492622|           1|                3|                 2|                   1|18/01/2005|          3|05:01|                      12|                   E09000020|              E09000020|               3|             3217|        2|         30|              3|               4|                3|              3216|                                0|                                      0|               4|                 2|                      2|                         0|                  0|                  1|                                          1|              2|                E01002902|     null|\n",
+      "| 200501BS00017|         2005|         01BS00017|               526700|                178970|-0.176224|51.495429|           1|                3|                 1|                   2|18/01/2005|          3|11:15|                      12|                   E09000020|              E09000020|               3|                4|        3|         30|              0|              -1|               -1|                -1|                                0|                                      0|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002821|     null|\n",
+      "| 200501BS00018|         2005|         01BS00018|               526460|                177460| -0.18022|51.481912|           1|                3|                 1|                   1|18/01/2005|          3|10:50|                      12|                   E09000020|              E09000020|               3|             3217|        6|         30|              3|               4|                6|                 0|                                0|                                      1|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002840|     null|\n",
+      "| 200501BS00019|         2005|         01BS00019|               524680|                179450|-0.205139|51.500191|           1|                2|                 2|                   1|20/01/2005|          5|00:15|                      12|                   E09000020|              E09000020|               6|                0|        6|         30|              3|               4|                6|                 0|                                0|                                      0|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002864|     null|\n",
+      "| 200501BS00020|         2005|         01BS00020|               527000|                179020|-0.171887|51.495811|           1|                3|                 2|                   1|21/01/2005|          6|09:15|                      12|                   E09000020|              E09000020|               3|             3218|        6|         30|              3|               4|                3|                 4|                                0|                                      0|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002821|     null|\n",
+      "| 200501BS00021|         2005|         01BS00021|               527810|                178010| -0.16059|51.486552|           1|                3|                 2|                   1|21/01/2005|          6|21:16|                      12|                   E09000020|              E09000020|               4|              302|        6|         30|              0|              -1|               -1|                -1|                                0|                                      0|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002901|     null|\n",
+      "| 200501BS00022|         2005|         01BS00022|               526790|                178980|-0.174925|51.495498|           1|                2|                 1|                   1|08/01/2005|          7|03:00|                      12|                   E09000020|              E09000020|               3|                4|        6|         30|              3|               4|                6|                 0|                                0|                                      0|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002821|     null|\n",
n",
+      "only showing top 20 rows\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "A2018_df2.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 196,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
n",
+      "|accident_index|accident_year|accident_reference|location_easting_osgr|location_northing_osgr|longitude| latitude|police_force|accident_severity|number_of_vehicles|number_of_casualties|               date|day_of_week| time|local_authority_district|local_authority_ons_district|local_authority_highway|first_road_class|first_road_number|road_type|speed_limit|junction_detail|junction_control|second_road_class|second_road_number|pedestrian_crossing_human_control|pedestrian_crossing_physical_facilities|light_conditions|weather_conditions|road_surface_conditions|special_conditions_at_site|carriageway_hazards|urban_or_rural_area|did_police_officer_attend_scene_of_accident|trunk_road_flag|lsoa_of_accident_location|          timestamp|\n",
n",
+      "| 200501BS00001|         2005|         01BS00001|               525680|                178240| -0.19117|51.489096|           1|                2|                 1|                   1|2005-01-04 00:00:00|          3|17:42|                      12|                   E09000020|              E09000020|               3|             3218|        6|         30|              0|              -1|               -1|                -1|                                0|                                      1|               1|                 2|                      2|                         0|                  0|                  1|                                          1|              2|                E01002849|2005-01-04 00:00:00|\n",
+      "| 200501BS00002|         2005|         01BS00002|               524170|                181650|-0.211708|51.520075|           1|                3|                 1|                   1|2005-01-05 00:00:00|          4|17:36|                      12|                   E09000020|              E09000020|               4|              450|        3|         30|              6|               2|                5|                 0|                                0|                                      5|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002909|2005-01-05 00:00:00|\n",
+      "| 200501BS00003|         2005|         01BS00003|               524520|                182240|-0.206458|51.525301|           1|                3|                 2|                   1|2005-01-06 00:00:00|          5|00:15|                      12|                   E09000020|              E09000020|               5|                0|        6|         30|              0|              -1|               -1|                -1|                                0|                                      0|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002857|2005-01-06 00:00:00|\n",
+      "| 200501BS00004|         2005|         01BS00004|               526900|                177530|-0.173862|51.482442|           1|                3|                 1|                   1|2005-01-07 00:00:00|          6|10:35|                      12|                   E09000020|              E09000020|               3|             3220|        6|         30|              0|              -1|               -1|                -1|                                0|                                      0|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002840|2005-01-07 00:00:00|\n",
+      "| 200501BS00005|         2005|         01BS00005|               528060|                179040|-0.156618|51.495752|           1|                3|                 1|                   1|2005-01-10 00:00:00|          2|21:13|                      12|                   E09000020|              E09000020|               6|                0|        6|         30|              0|              -1|               -1|                -1|                                0|                                      0|               7|                 1|                      2|                         0|                  0|                  1|                                          1|              2|                E01002863|2005-01-10 00:00:00|\n",
+      "| 200501BS00006|         2005|         01BS00006|               524770|                181160|-0.203238| 51.51554|           1|                3|                 2|                   1|2005-01-11 00:00:00|          3|12:40|                      12|                   E09000020|              E09000020|               6|                0|        6|         30|              0|              -1|               -1|                -1|                                0|                                      0|               1|                 2|                      2|                         6|                  0|                  1|                                          1|              2|                E01002832|2005-01-11 00:00:00|\n",
+      "| 200501BS00007|         2005|         01BS00007|               524220|                180830|-0.211277|51.512695|           1|                3|                 2|                   1|2005-01-13 00:00:00|          5|20:40|                      12|                   E09000020|              E09000020|               5|                0|        6|         30|              3|               4|                6|                 0|                                0|                                      0|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002875|2005-01-13 00:00:00|\n",
+      "| 200501BS00009|         2005|         01BS00009|               525890|                179710|-0.187623| 51.50226|           1|                3|                 1|                   2|2005-01-14 00:00:00|          6|17:35|                      12|                   E09000020|              E09000020|               3|              315|        3|         30|              0|              -1|               -1|                -1|                                0|                                      0|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002889|2005-01-14 00:00:00|\n",
+      "| 200501BS00010|         2005|         01BS00010|               527350|                177650|-0.167342| 51.48342|           1|                3|                 2|                   2|2005-01-15 00:00:00|          7|22:43|                      12|                   E09000020|              E09000020|               3|             3212|        6|         30|              6|               2|                4|               304|                                0|                                      5|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002900|2005-01-15 00:00:00|\n",
+      "| 200501BS00011|         2005|         01BS00011|               524550|                180810|-0.206531|51.512443|           1|                3|                 2|                   5|2005-01-15 00:00:00|          7|16:00|                      12|                   E09000020|              E09000020|               4|              450|        6|         30|              3|               4|                5|                 0|                                0|                                      8|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002875|2005-01-15 00:00:00|\n",
+      "| 200501BS00012|         2005|         01BS00012|               526240|                178900|-0.182872|51.494902|           1|                3|                 1|                   1|2005-01-16 00:00:00|          1|00:42|                      12|                   E09000020|              E09000020|               3|                4|        6|         30|              6|               2|                4|               325|                                0|                                      5|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002835|2005-01-16 00:00:00|\n",
+      "| 200501BS00014|         2005|         01BS00014|               526170|                177690|-0.184312|51.484044|           1|                3|                 2|                   1|2005-01-25 00:00:00|          3|20:48|                      12|                   E09000020|              E09000020|               3|             3220|        6|         30|              6|               2|                3|               308|                                0|                                      5|               4|                 1|                      2|                         0|                  0|                  1|                                          1|              2|                E01002912|2005-01-25 00:00:00|\n",
+      "| 200501BS00015|         2005|         01BS00015|               525590|                178520|-0.192366|51.491632|           1|                3|                 1|                   1|2005-01-11 00:00:00|          3|12:55|                      12|                   E09000020|              E09000020|               6|                0|        2|         30|              3|               4|                3|              3220|                                0|                                      1|               1|                 2|                      2|                         0|                  0|                  1|                                          1|              2|                E01002849|2005-01-11 00:00:00|\n",
+      "| 200501BS00016|         2005|         01BS00016|               527990|                178690|-0.157753|51.492622|           1|                3|                 2|                   1|2005-01-18 00:00:00|          3|05:01|                      12|                   E09000020|              E09000020|               3|             3217|        2|         30|              3|               4|                3|              3216|                                0|                                      0|               4|                 2|                      2|                         0|                  0|                  1|                                          1|              2|                E01002902|2005-01-18 00:00:00|\n",
+      "| 200501BS00017|         2005|         01BS00017|               526700|                178970|-0.176224|51.495429|           1|                3|                 1|                   2|2005-01-18 00:00:00|          3|11:15|                      12|                   E09000020|              E09000020|               3|                4|        3|         30|              0|              -1|               -1|                -1|                                0|                                      0|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002821|2005-01-18 00:00:00|\n",
+      "| 200501BS00018|         2005|         01BS00018|               526460|                177460| -0.18022|51.481912|           1|                3|                 1|                   1|2005-01-18 00:00:00|          3|10:50|                      12|                   E09000020|              E09000020|               3|             3217|        6|         30|              3|               4|                6|                 0|                                0|                                      1|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002840|2005-01-18 00:00:00|\n",
+      "| 200501BS00019|         2005|         01BS00019|               524680|                179450|-0.205139|51.500191|           1|                2|                 2|                   1|2005-01-20 00:00:00|          5|00:15|                      12|                   E09000020|              E09000020|               6|                0|        6|         30|              3|               4|                6|                 0|                                0|                                      0|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002864|2005-01-20 00:00:00|\n",
+      "| 200501BS00020|         2005|         01BS00020|               527000|                179020|-0.171887|51.495811|           1|                3|                 2|                   1|2005-01-21 00:00:00|          6|09:15|                      12|                   E09000020|              E09000020|               3|             3218|        6|         30|              3|               4|                3|                 4|                                0|                                      0|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002821|2005-01-21 00:00:00|\n",
+      "| 200501BS00021|         2005|         01BS00021|               527810|                178010| -0.16059|51.486552|           1|                3|                 2|                   1|2005-01-21 00:00:00|          6|21:16|                      12|                   E09000020|              E09000020|               4|              302|        6|         30|              0|              -1|               -1|                -1|                                0|                                      0|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002901|2005-01-21 00:00:00|\n",
+      "| 200501BS00022|         2005|         01BS00022|               526790|                178980|-0.174925|51.495498|           1|                2|                 1|                   1|2005-01-08 00:00:00|          7|03:00|                      12|                   E09000020|              E09000020|               3|                4|        6|         30|              3|               4|                6|                 0|                                0|                                      0|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002821|2005-01-08 00:00:00|\n",
n",
+      "only showing top 20 rows\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "A2018_df2 = A2018_df2.withColumn('timestamp', F.col('date').cast(\"timestamp\"))\n",
+    "A2018_df2.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 197,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
n",
+      "|accident_index|accident_year|accident_reference|location_easting_osgr|location_northing_osgr|longitude| latitude|police_force|accident_severity|number_of_vehicles|number_of_casualties|               date|day_of_week| time|local_authority_district|local_authority_ons_district|local_authority_highway|first_road_class|first_road_number|road_type|speed_limit|junction_detail|junction_control|second_road_class|second_road_number|pedestrian_crossing_human_control|pedestrian_crossing_physical_facilities|light_conditions|weather_conditions|road_surface_conditions|special_conditions_at_site|carriageway_hazards|urban_or_rural_area|did_police_officer_attend_scene_of_accident|trunk_road_flag|lsoa_of_accident_location|          timestamp|month|\n",
n",
+      "| 200501BS00001|         2005|         01BS00001|               525680|                178240| -0.19117|51.489096|           1|                2|                 1|                   1|2005-01-04 00:00:00|          3|17:42|                      12|                   E09000020|              E09000020|               3|             3218|        6|         30|              0|              -1|               -1|                -1|                                0|                                      1|               1|                 2|                      2|                         0|                  0|                  1|                                          1|              2|                E01002849|2005-01-04 00:00:00|    1|\n",
+      "| 200501BS00002|         2005|         01BS00002|               524170|                181650|-0.211708|51.520075|           1|                3|                 1|                   1|2005-01-05 00:00:00|          4|17:36|                      12|                   E09000020|              E09000020|               4|              450|        3|         30|              6|               2|                5|                 0|                                0|                                      5|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002909|2005-01-05 00:00:00|    1|\n",
+      "| 200501BS00003|         2005|         01BS00003|               524520|                182240|-0.206458|51.525301|           1|                3|                 2|                   1|2005-01-06 00:00:00|          5|00:15|                      12|                   E09000020|              E09000020|               5|                0|        6|         30|              0|              -1|               -1|                -1|                                0|                                      0|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002857|2005-01-06 00:00:00|    1|\n",
+      "| 200501BS00004|         2005|         01BS00004|               526900|                177530|-0.173862|51.482442|           1|                3|                 1|                   1|2005-01-07 00:00:00|          6|10:35|                      12|                   E09000020|              E09000020|               3|             3220|        6|         30|              0|              -1|               -1|                -1|                                0|                                      0|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002840|2005-01-07 00:00:00|    1|\n",
+      "| 200501BS00005|         2005|         01BS00005|               528060|                179040|-0.156618|51.495752|           1|                3|                 1|                   1|2005-01-10 00:00:00|          2|21:13|                      12|                   E09000020|              E09000020|               6|                0|        6|         30|              0|              -1|               -1|                -1|                                0|                                      0|               7|                 1|                      2|                         0|                  0|                  1|                                          1|              2|                E01002863|2005-01-10 00:00:00|    1|\n",
+      "| 200501BS00006|         2005|         01BS00006|               524770|                181160|-0.203238| 51.51554|           1|                3|                 2|                   1|2005-01-11 00:00:00|          3|12:40|                      12|                   E09000020|              E09000020|               6|                0|        6|         30|              0|              -1|               -1|                -1|                                0|                                      0|               1|                 2|                      2|                         6|                  0|                  1|                                          1|              2|                E01002832|2005-01-11 00:00:00|    1|\n",
+      "| 200501BS00007|         2005|         01BS00007|               524220|                180830|-0.211277|51.512695|           1|                3|                 2|                   1|2005-01-13 00:00:00|          5|20:40|                      12|                   E09000020|              E09000020|               5|                0|        6|         30|              3|               4|                6|                 0|                                0|                                      0|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002875|2005-01-13 00:00:00|    1|\n",
+      "| 200501BS00009|         2005|         01BS00009|               525890|                179710|-0.187623| 51.50226|           1|                3|                 1|                   2|2005-01-14 00:00:00|          6|17:35|                      12|                   E09000020|              E09000020|               3|              315|        3|         30|              0|              -1|               -1|                -1|                                0|                                      0|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002889|2005-01-14 00:00:00|    1|\n",
+      "| 200501BS00010|         2005|         01BS00010|               527350|                177650|-0.167342| 51.48342|           1|                3|                 2|                   2|2005-01-15 00:00:00|          7|22:43|                      12|                   E09000020|              E09000020|               3|             3212|        6|         30|              6|               2|                4|               304|                                0|                                      5|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002900|2005-01-15 00:00:00|    1|\n",
+      "| 200501BS00011|         2005|         01BS00011|               524550|                180810|-0.206531|51.512443|           1|                3|                 2|                   5|2005-01-15 00:00:00|          7|16:00|                      12|                   E09000020|              E09000020|               4|              450|        6|         30|              3|               4|                5|                 0|                                0|                                      8|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002875|2005-01-15 00:00:00|    1|\n",
+      "| 200501BS00012|         2005|         01BS00012|               526240|                178900|-0.182872|51.494902|           1|                3|                 1|                   1|2005-01-16 00:00:00|          1|00:42|                      12|                   E09000020|              E09000020|               3|                4|        6|         30|              6|               2|                4|               325|                                0|                                      5|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002835|2005-01-16 00:00:00|    1|\n",
+      "| 200501BS00014|         2005|         01BS00014|               526170|                177690|-0.184312|51.484044|           1|                3|                 2|                   1|2005-01-25 00:00:00|          3|20:48|                      12|                   E09000020|              E09000020|               3|             3220|        6|         30|              6|               2|                3|               308|                                0|                                      5|               4|                 1|                      2|                         0|                  0|                  1|                                          1|              2|                E01002912|2005-01-25 00:00:00|    1|\n",
+      "| 200501BS00015|         2005|         01BS00015|               525590|                178520|-0.192366|51.491632|           1|                3|                 1|                   1|2005-01-11 00:00:00|          3|12:55|                      12|                   E09000020|              E09000020|               6|                0|        2|         30|              3|               4|                3|              3220|                                0|                                      1|               1|                 2|                      2|                         0|                  0|                  1|                                          1|              2|                E01002849|2005-01-11 00:00:00|    1|\n",
+      "| 200501BS00016|         2005|         01BS00016|               527990|                178690|-0.157753|51.492622|           1|                3|                 2|                   1|2005-01-18 00:00:00|          3|05:01|                      12|                   E09000020|              E09000020|               3|             3217|        2|         30|              3|               4|                3|              3216|                                0|                                      0|               4|                 2|                      2|                         0|                  0|                  1|                                          1|              2|                E01002902|2005-01-18 00:00:00|    1|\n",
+      "| 200501BS00017|         2005|         01BS00017|               526700|                178970|-0.176224|51.495429|           1|                3|                 1|                   2|2005-01-18 00:00:00|          3|11:15|                      12|                   E09000020|              E09000020|               3|                4|        3|         30|              0|              -1|               -1|                -1|                                0|                                      0|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002821|2005-01-18 00:00:00|    1|\n",
+      "| 200501BS00018|         2005|         01BS00018|               526460|                177460| -0.18022|51.481912|           1|                3|                 1|                   1|2005-01-18 00:00:00|          3|10:50|                      12|                   E09000020|              E09000020|               3|             3217|        6|         30|              3|               4|                6|                 0|                                0|                                      1|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002840|2005-01-18 00:00:00|    1|\n",
+      "| 200501BS00019|         2005|         01BS00019|               524680|                179450|-0.205139|51.500191|           1|                2|                 2|                   1|2005-01-20 00:00:00|          5|00:15|                      12|                   E09000020|              E09000020|               6|                0|        6|         30|              3|               4|                6|                 0|                                0|                                      0|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002864|2005-01-20 00:00:00|    1|\n",
+      "| 200501BS00020|         2005|         01BS00020|               527000|                179020|-0.171887|51.495811|           1|                3|                 2|                   1|2005-01-21 00:00:00|          6|09:15|                      12|                   E09000020|              E09000020|               3|             3218|        6|         30|              3|               4|                3|                 4|                                0|                                      0|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002821|2005-01-21 00:00:00|    1|\n",
+      "| 200501BS00021|         2005|         01BS00021|               527810|                178010| -0.16059|51.486552|           1|                3|                 2|                   1|2005-01-21 00:00:00|          6|21:16|                      12|                   E09000020|              E09000020|               4|              302|        6|         30|              0|              -1|               -1|                -1|                                0|                                      0|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002901|2005-01-21 00:00:00|    1|\n",
+      "| 200501BS00022|         2005|         01BS00022|               526790|                178980|-0.174925|51.495498|           1|                2|                 1|                   1|2005-01-08 00:00:00|          7|03:00|                      12|                   E09000020|              E09000020|               3|                4|        6|         30|              3|               4|                6|                 0|                                0|                                      0|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002821|2005-01-08 00:00:00|    1|\n",
n",
+      "only showing top 20 rows\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "from pyspark.sql.functions import *\n",
+    "\n",
+    "#Accident_Information_df\n",
+    "TimeAccident_dfmonth = A2018_df2.withColumn('month',month(A2018_df2.timestamp))\n",
+    "TimeAccident_dfmonth.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 193,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pyspark.sql import functions as F\n",
+    "A2018_df2=A20188.withColumn(\"date\", to_date(\"date\", \"dd/MM/yyyy\"))\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 198,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
n",
+      "|accident_index|accident_year|accident_reference|location_easting_osgr|location_northing_osgr|longitude| latitude|police_force|accident_severity|number_of_vehicles|number_of_casualties|               date|day_of_week| time|local_authority_district|local_authority_ons_district|local_authority_highway|first_road_class|first_road_number|road_type|speed_limit|junction_detail|junction_control|second_road_class|second_road_number|pedestrian_crossing_human_control|pedestrian_crossing_physical_facilities|light_conditions|weather_conditions|road_surface_conditions|special_conditions_at_site|carriageway_hazards|urban_or_rural_area|did_police_officer_attend_scene_of_accident|trunk_road_flag|lsoa_of_accident_location|          timestamp|month|\n",
n",
+      "| 200501BS00001|         2005|         01BS00001|               525680|                178240| -0.19117|51.489096|           1|                2|                 1|                   1|2005-01-04 00:00:00|          3|17:42|                      12|                   E09000020|              E09000020|               3|             3218|        6|         30|              0|              -1|               -1|                -1|                                0|                                      1|               1|                 2|                      2|                         0|                  0|                  1|                                          1|              2|                E01002849|2005-01-04 00:00:00|    1|\n",
+      "| 200501BS00002|         2005|         01BS00002|               524170|                181650|-0.211708|51.520075|           1|                3|                 1|                   1|2005-01-05 00:00:00|          4|17:36|                      12|                   E09000020|              E09000020|               4|              450|        3|         30|              6|               2|                5|                 0|                                0|                                      5|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002909|2005-01-05 00:00:00|    1|\n",
+      "| 200501BS00003|         2005|         01BS00003|               524520|                182240|-0.206458|51.525301|           1|                3|                 2|                   1|2005-01-06 00:00:00|          5|00:15|                      12|                   E09000020|              E09000020|               5|                0|        6|         30|              0|              -1|               -1|                -1|                                0|                                      0|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002857|2005-01-06 00:00:00|    1|\n",
+      "| 200501BS00004|         2005|         01BS00004|               526900|                177530|-0.173862|51.482442|           1|                3|                 1|                   1|2005-01-07 00:00:00|          6|10:35|                      12|                   E09000020|              E09000020|               3|             3220|        6|         30|              0|              -1|               -1|                -1|                                0|                                      0|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002840|2005-01-07 00:00:00|    1|\n",
+      "| 200501BS00005|         2005|         01BS00005|               528060|                179040|-0.156618|51.495752|           1|                3|                 1|                   1|2005-01-10 00:00:00|          2|21:13|                      12|                   E09000020|              E09000020|               6|                0|        6|         30|              0|              -1|               -1|                -1|                                0|                                      0|               7|                 1|                      2|                         0|                  0|                  1|                                          1|              2|                E01002863|2005-01-10 00:00:00|    1|\n",
+      "| 200501BS00006|         2005|         01BS00006|               524770|                181160|-0.203238| 51.51554|           1|                3|                 2|                   1|2005-01-11 00:00:00|          3|12:40|                      12|                   E09000020|              E09000020|               6|                0|        6|         30|              0|              -1|               -1|                -1|                                0|                                      0|               1|                 2|                      2|                         6|                  0|                  1|                                          1|              2|                E01002832|2005-01-11 00:00:00|    1|\n",
+      "| 200501BS00007|         2005|         01BS00007|               524220|                180830|-0.211277|51.512695|           1|                3|                 2|                   1|2005-01-13 00:00:00|          5|20:40|                      12|                   E09000020|              E09000020|               5|                0|        6|         30|              3|               4|                6|                 0|                                0|                                      0|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002875|2005-01-13 00:00:00|    1|\n",
+      "| 200501BS00009|         2005|         01BS00009|               525890|                179710|-0.187623| 51.50226|           1|                3|                 1|                   2|2005-01-14 00:00:00|          6|17:35|                      12|                   E09000020|              E09000020|               3|              315|        3|         30|              0|              -1|               -1|                -1|                                0|                                      0|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002889|2005-01-14 00:00:00|    1|\n",
+      "| 200501BS00010|         2005|         01BS00010|               527350|                177650|-0.167342| 51.48342|           1|                3|                 2|                   2|2005-01-15 00:00:00|          7|22:43|                      12|                   E09000020|              E09000020|               3|             3212|        6|         30|              6|               2|                4|               304|                                0|                                      5|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002900|2005-01-15 00:00:00|    1|\n",
+      "| 200501BS00011|         2005|         01BS00011|               524550|                180810|-0.206531|51.512443|           1|                3|                 2|                   5|2005-01-15 00:00:00|          7|16:00|                      12|                   E09000020|              E09000020|               4|              450|        6|         30|              3|               4|                5|                 0|                                0|                                      8|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002875|2005-01-15 00:00:00|    1|\n",
+      "| 200501BS00012|         2005|         01BS00012|               526240|                178900|-0.182872|51.494902|           1|                3|                 1|                   1|2005-01-16 00:00:00|          1|00:42|                      12|                   E09000020|              E09000020|               3|                4|        6|         30|              6|               2|                4|               325|                                0|                                      5|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002835|2005-01-16 00:00:00|    1|\n",
+      "| 200501BS00014|         2005|         01BS00014|               526170|                177690|-0.184312|51.484044|           1|                3|                 2|                   1|2005-01-25 00:00:00|          3|20:48|                      12|                   E09000020|              E09000020|               3|             3220|        6|         30|              6|               2|                3|               308|                                0|                                      5|               4|                 1|                      2|                         0|                  0|                  1|                                          1|              2|                E01002912|2005-01-25 00:00:00|    1|\n",
+      "| 200501BS00015|         2005|         01BS00015|               525590|                178520|-0.192366|51.491632|           1|                3|                 1|                   1|2005-01-11 00:00:00|          3|12:55|                      12|                   E09000020|              E09000020|               6|                0|        2|         30|              3|               4|                3|              3220|                                0|                                      1|               1|                 2|                      2|                         0|                  0|                  1|                                          1|              2|                E01002849|2005-01-11 00:00:00|    1|\n",
+      "| 200501BS00016|         2005|         01BS00016|               527990|                178690|-0.157753|51.492622|           1|                3|                 2|                   1|2005-01-18 00:00:00|          3|05:01|                      12|                   E09000020|              E09000020|               3|             3217|        2|         30|              3|               4|                3|              3216|                                0|                                      0|               4|                 2|                      2|                         0|                  0|                  1|                                          1|              2|                E01002902|2005-01-18 00:00:00|    1|\n",
+      "| 200501BS00017|         2005|         01BS00017|               526700|                178970|-0.176224|51.495429|           1|                3|                 1|                   2|2005-01-18 00:00:00|          3|11:15|                      12|                   E09000020|              E09000020|               3|                4|        3|         30|              0|              -1|               -1|                -1|                                0|                                      0|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002821|2005-01-18 00:00:00|    1|\n",
+      "| 200501BS00018|         2005|         01BS00018|               526460|                177460| -0.18022|51.481912|           1|                3|                 1|                   1|2005-01-18 00:00:00|          3|10:50|                      12|                   E09000020|              E09000020|               3|             3217|        6|         30|              3|               4|                6|                 0|                                0|                                      1|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002840|2005-01-18 00:00:00|    1|\n",
+      "| 200501BS00019|         2005|         01BS00019|               524680|                179450|-0.205139|51.500191|           1|                2|                 2|                   1|2005-01-20 00:00:00|          5|00:15|                      12|                   E09000020|              E09000020|               6|                0|        6|         30|              3|               4|                6|                 0|                                0|                                      0|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002864|2005-01-20 00:00:00|    1|\n",
+      "| 200501BS00020|         2005|         01BS00020|               527000|                179020|-0.171887|51.495811|           1|                3|                 2|                   1|2005-01-21 00:00:00|          6|09:15|                      12|                   E09000020|              E09000020|               3|             3218|        6|         30|              3|               4|                3|                 4|                                0|                                      0|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002821|2005-01-21 00:00:00|    1|\n",
+      "| 200501BS00021|         2005|         01BS00021|               527810|                178010| -0.16059|51.486552|           1|                3|                 2|                   1|2005-01-21 00:00:00|          6|21:16|                      12|                   E09000020|              E09000020|               4|              302|        6|         30|              0|              -1|               -1|                -1|                                0|                                      0|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002901|2005-01-21 00:00:00|    1|\n",
+      "| 200501BS00022|         2005|         01BS00022|               526790|                178980|-0.174925|51.495498|           1|                2|                 1|                   1|2005-01-08 00:00:00|          7|03:00|                      12|                   E09000020|              E09000020|               3|                4|        6|         30|              3|               4|                6|                 0|                                0|                                      0|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002821|2005-01-08 00:00:00|    1|\n",
+      "| 200501BS00023|         2005|         01BS00023|               525940|                178680| -0.18727|51.492992|           1|                3|                 1|                   1|2005-01-24 00:00:00|          2|21:45|                      12|                   E09000020|              E09000020|               6|                0|        6|         30|              0|              -1|               -1|                -1|                                0|                                      0|               4|                 1|                      1|                         0|                  0|                  1|                                          2|              2|                E01002839|2005-01-24 00:00:00|    1|\n",
+      "| 200501BS00024|         2005|         01BS00024|               524700|                180880|-0.204346|51.513039|           1|                3|                 2|                   1|2005-01-24 00:00:00|          2|17:05|                      12|                   E09000020|              E09000020|               4|              415|        6|         30|              3|               4|                5|                 0|                                0|                                      0|               4|                 1|                      1|                         0|                  0|                  1|                                          2|              2|                E01002882|2005-01-24 00:00:00|    1|\n",
+      "| 200501BS00025|         2005|         01BS00025|               526930|                177490|-0.173445|51.482076|           1|                3|                 2|                   1|2005-01-24 00:00:00|          2|21:30|                      12|                   E09000020|              E09000020|               3|             3220|        3|         30|              6|               2|                3|              3220|                                0|                                      0|               4|                 1|                      4|                         0|                  0|                  1|                                          1|              2|                E01002840|2005-01-24 00:00:00|    1|\n",
+      "| 200501BS00028|         2005|         01BS00028|               527290|                178710|-0.167824| 51.49296|           1|                3|                 2|                   1|2005-01-18 00:00:00|          3|17:25|                      12|                   E09000020|              E09000020|               5|                0|        6|         30|              6|               4|                6|                 0|                                0|                                      0|               4|                 1|                      1|                         0|                  0|                  1|                                          2|              2|                E01002858|2005-01-18 00:00:00|    1|\n",
+      "| 200501BS00029|         2005|         01BS00029|               527380|                179280|-0.166322|51.498062|           1|                3|                 2|                   1|2005-01-29 00:00:00|          7|07:34|                      12|                   E09000020|              E09000020|               3|                4|        3|         30|              3|               2|                4|               319|                                0|                                      5|               1|                 1|                      2|                         0|                  0|                  1|                                          1|              2|                E01002819|2005-01-29 00:00:00|    1|\n",
+      "| 200501BS00031|         2005|         01BS00031|               523930|                180330|-0.215629|51.508265|           1|                3|                 1|                   1|2005-01-19 00:00:00|          4|16:35|                      12|                   E09000020|              E09000020|               5|                0|        6|         30|              3|               4|                6|                 0|                                0|                                      1|               4|                 2|                      2|                         0|                  0|                  1|                                          1|              2|                E01001944|2005-01-19 00:00:00|    1|\n",
+      "| 200501BS00032|         2005|         01BS00032|               524470|                180980|-0.207623|51.513988|           1|                3|                 2|                   1|2005-01-30 00:00:00|          1|20:00|                      12|                   E09000020|              E09000020|               4|              450|        6|         30|              0|              -1|               -1|                -1|                                0|                                      0|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002874|2005-01-30 00:00:00|    1|\n",
+      "| 200501BS00033|         2005|         01BS00033|               525570|                178190|-0.192771|51.488671|           1|                3|                 2|                   2|2005-01-29 00:00:00|          7|13:15|                      12|                   E09000020|              E09000020|               3|             3220|        6|         30|              6|               2|                3|              3218|                                0|                                      5|               1|                 2|                      2|                         0|                  0|                  1|                                          2|              2|                E01002849|2005-01-29 00:00:00|    1|\n",
+      "| 200501BS70001|         2005|         01BS70001|               526240|                178900|-0.182872|51.494902|           1|                3|                 2|                   1|2005-02-01 00:00:00|          3|18:20|                      12|                   E09000020|              E09000020|               3|                4|        3|         30|              6|               2|                5|                 0|                                0|                                      5|               4|                 2|                      2|                         0|                  0|                  1|                                          1|              2|                E01002835|2005-02-01 00:00:00|    2|\n",
+      "| 200501BS70002|         2005|         01BS70002|               527780|                179160|-0.160606|51.496893|           1|                3|                 2|                   1|2005-02-02 00:00:00|          4|07:25|                      12|                   E09000020|              E09000020|               4|              319|        6|         30|              3|               4|                6|                 0|                                0|                                      1|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002863|2005-02-02 00:00:00|    2|\n",
+      "| 200501BS70003|         2005|         01BS70003|               523910|                181390|-0.215545|51.517796|           1|                2|                 2|                   2|2005-02-01 00:00:00|          3|17:30|                      12|                   E09000020|              E09000020|               4|              412|        2|         30|              2|               4|                6|                 0|                                0|                                      0|               4|                 2|                      2|                         0|                  0|                  1|                                          1|              2|                E01002878|2005-02-01 00:00:00|    2|\n",
+      "| 200501BS70004|         2005|         01BS70004|               524870|                181880|-0.201543|51.521988|           1|                3|                 2|                   1|2005-02-03 00:00:00|          5|12:30|                       1|                   E09000033|              E09000033|               3|             4207|        6|         30|              3|               4|                5|                 0|                                0|                                      1|               1|                 9|                      1|                         0|                  0|                  1|                                          2|              2|                E01002854|2005-02-03 00:00:00|    2|\n",
+      "| 200501BS70005|         2005|         01BS70005|               527250|                179170|-0.168234|51.497103|           1|                3|                 2|                   1|2005-02-12 00:00:00|          7|09:55|                      12|                   E09000020|              E09000020|               3|                4|        6|         30|              3|               4|                6|                 0|                                0|                                      4|               1|                 1|                      2|                         0|                  0|                  1|                                          1|              2|                E01002819|2005-02-12 00:00:00|    2|\n",
+      "| 200501BS70006|         2005|         01BS70006|               524170|                181640|-0.211712|51.519986|           1|                3|                 2|                   2|2005-02-03 00:00:00|          5|13:00|                      12|                   E09000020|              E09000020|               4|              450|        6|         30|              6|               4|                5|                 0|                                0|                                      1|               1|                 2|                      2|                         0|                  0|                  1|                                          1|              2|                E01002909|2005-02-03 00:00:00|    2|\n",
+      "| 200501BS70007|         2005|         01BS70007|               523740|                182030|-0.217769|51.523585|           1|                3|                 4|                   1|2005-02-01 00:00:00|          3|13:02|                      12|                   E09000020|              E09000020|               5|                0|        6|         30|              3|               4|                6|                 0|                                0|                                      1|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002906|2005-02-01 00:00:00|    2|\n",
+      "| 200501BS70008|         2005|         01BS70008|               523770|                181070|-0.217674|51.514951|           1|                3|                 2|                   2|2005-02-03 00:00:00|          5|22:55|                      12|                   E09000020|              E09000020|               6|                0|        6|         30|              6|               4|                6|                 0|                                0|                                      0|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002880|2005-02-03 00:00:00|    2|\n",
+      "| 200501BS70009|         2005|         01BS70009|               525840|                177020|-0.189301|51.478096|           1|                3|                 2|                   1|2005-02-03 00:00:00|          5|17:30|                      11|                   E09000013|              E09000013|               3|              308|        6|         30|              0|              -1|               -1|                -1|                                0|                                      0|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01001921|2005-02-03 00:00:00|    2|\n",
+      "| 200501BS70010|         2005|         01BS70010|               526940|                177460|-0.173312|51.481804|           1|                3|                 2|                   1|2005-02-06 00:00:00|          1|15:00|                      12|                   E09000020|              E09000020|               3|             3220|        6|         30|              0|              -1|               -1|                -1|                                0|                                      0|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002840|2005-02-06 00:00:00|    2|\n",
+      "| 200501BS70011|         2005|         01BS70011|               525040|                178620|-0.200249|51.492652|           1|                3|                 2|                   1|2005-02-05 00:00:00|          7|21:25|                      12|                   E09000020|              E09000020|               3|                4|        3|         30|              6|               2|                3|              3220|                                0|                                      5|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002848|2005-02-05 00:00:00|    2|\n",
+      "| 200501BS70012|         2005|         01BS70012|               525460|                179520|-0.193882|51.500648|           1|                3|                 2|                   1|2005-02-03 00:00:00|          5|12:41|                      12|                   E09000020|              E09000020|               3|              315|        6|         30|              0|              -1|               -1|                -1|                                0|                                      4|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002816|2005-02-03 00:00:00|    2|\n",
+      "| 200501BS70013|         2005|         01BS70013|               525410|                180490|-0.194258|51.509377|           1|                3|                 1|                   1|2005-02-04 00:00:00|          6|23:35|                      12|                   E09000020|              E09000020|               3|              402|        6|         30|              6|               2|                3|              4204|                                0|                                      5|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002827|2005-02-04 00:00:00|    2|\n",
+      "| 200501BS70014|         2005|         01BS70014|               527570|                177720| -0.16415|51.483999|           1|                3|                 2|                   1|2005-02-07 00:00:00|          2|09:15|                      12|                   E09000020|              E09000020|               4|              302|        6|         30|              3|               4|                6|                 0|                                0|                                      0|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002900|2005-02-07 00:00:00|    2|\n",
+      "| 200501BS70016|         2005|         01BS70016|               525200|                180460|-0.197293|51.509154|           1|                3|                 1|                   1|2005-02-12 00:00:00|          7|13:25|                      12|                   E09000020|              E09000020|               3|             4206|        6|         30|              0|              -1|               -1|                -1|                                0|                                      5|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002884|2005-02-12 00:00:00|    2|\n",
+      "| 200501BS70017|         2005|         01BS70017|               526710|                178230|-0.176346|51.488776|           1|                3|                 2|                   1|2005-02-08 00:00:00|          3|20:00|                      12|                   E09000020|              E09000020|               3|              308|        6|         30|              3|               4|                6|                 0|                                0|                                      0|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002910|2005-02-08 00:00:00|    2|\n",
+      "| 200501BS70018|         2005|         01BS70018|               525860|                179700|-0.188058|51.502177|           1|                3|                 2|                   1|2005-02-16 00:00:00|          4|08:20|                      12|                   E09000020|              E09000020|               3|              315|        6|         30|              0|              -1|               -1|                -1|                                0|                                      0|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002889|2005-02-16 00:00:00|    2|\n",
+      "| 200501BS70019|         2005|         01BS70019|               526360|                177790|-0.181542|  51.4849|           1|                3|                 2|                   1|2005-02-07 00:00:00|          2|11:30|                      12|                   E09000020|              E09000020|               6|                0|        6|         30|              0|              -1|               -1|                -1|                                0|                                      0|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002912|2005-02-07 00:00:00|    2|\n",
+      "| 200501BS70020|         2005|         01BS70020|               527020|                179020|-0.171599|51.495806|           1|                3|                 2|                   1|2005-02-10 00:00:00|          5|18:10|                      12|                   E09000020|              E09000020|               3|                4|        6|         30|              3|               4|                3|              3218|                                0|                                      0|               4|                 2|                      2|                         0|                  0|                  1|                                          1|              2|                E01002821|2005-02-10 00:00:00|    2|\n",
+      "| 200501BS70021|         2005|         01BS70021|               526290|                177800|-0.182546|51.485005|           1|                3|                 1|                   1|2005-02-12 00:00:00|          7|11:33|                      12|                   E09000020|              E09000020|               3|              308|        6|         30|              0|              -1|               -1|                -1|                                0|                                      4|               1|                 1|                      2|                         4|                  2|                  1|                                          1|              2|                E01002912|2005-02-12 00:00:00|    2|\n",
+      "| 200501BS70023|         2005|         01BS70023|               526020|                177850|-0.186415|51.485515|           1|                3|                 2|                   2|2005-02-13 00:00:00|          1|20:05|                      12|                   E09000020|              E09000020|               3|             3220|        6|         30|              6|               4|                6|                 0|                                0|                                      0|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002896|2005-02-13 00:00:00|    2|\n",
+      "| 200501BS70025|         2005|         01BS70025|               527270|                178660| -0.16813|51.492515|           1|                3|                 2|                   1|2005-02-01 00:00:00|          3|16:44|                      12|                   E09000020|              E09000020|               6|                0|        6|         30|              6|               4|                6|                 0|                                0|                                      0|               4|                 2|                      2|                         0|                  0|                  1|                                          1|              2|                E01002862|2005-02-01 00:00:00|    2|\n",
+      "| 200501BS70027|         2005|         01BS70027|               525190|                180500|-0.197423|51.509515|           1|                3|                 2|                   1|2005-02-17 00:00:00|          5|18:00|                      12|                   E09000020|              E09000020|               3|             4206|        6|         30|              9|               4|                4|               415|                                0|                                      1|               4|                 2|                      2|                         0|                  0|                  1|                                          2|              2|                E01002884|2005-02-17 00:00:00|    2|\n",
+      "| 200501BS70028|         2005|         01BS70028|               527960|                178790|-0.158149|51.493527|           1|                3|                 1|                   1|2005-02-04 00:00:00|          6|10:15|                      12|                   E09000020|              E09000020|               3|             3216|        6|         30|              0|              -1|               -1|                -1|                                0|                                      1|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002859|2005-02-04 00:00:00|    2|\n",
+      "| 200501BS70029|         2005|         01BS70029|               524100|                181830|-0.212653|51.521709|           1|                2|                 1|                   1|2005-02-15 00:00:00|          3|18:15|                      12|                   E09000020|              E09000020|               4|              450|        6|         30|              0|              -1|               -1|                -1|                                0|                                      1|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002905|2005-02-15 00:00:00|    2|\n",
+      "| 200501BS70030|         2005|         01BS70030|               526360|                177420|-0.181674|51.481575|           1|                3|                 2|                   1|2005-02-16 00:00:00|          4|10:35|                      12|                   E09000020|              E09000020|               3|             3220|        6|         30|              6|               2|                3|              3217|                                0|                                      0|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002913|2005-02-16 00:00:00|    2|\n",
+      "| 200501BS70031|         2005|         01BS70031|               524600|                181280|-0.205645|51.516656|           1|                3|                 1|                   1|2005-02-18 00:00:00|          6|17:36|                      12|                   E09000020|              E09000020|               4|              412|        6|         30|              6|               2|                6|                 0|                                0|                                      5|               4|                 2|                      2|                         0|                  0|                  1|                                          1|              2|                E01002831|2005-02-18 00:00:00|    2|\n",
+      "| 200501BS70032|         2005|         01BS70032|               525710|                179670|-0.190229|51.501941|           1|                3|                 1|                   1|2005-02-15 00:00:00|          3|16:05|                      12|                   E09000020|              E09000020|               6|                0|        6|         30|              3|               4|                3|               315|                                0|                                      0|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002889|2005-02-15 00:00:00|    2|\n",
+      "| 200501BS70033|         2005|         01BS70033|               526450|                179240|-0.179727|51.497911|           1|                3|                 1|                   1|2005-02-10 00:00:00|          5|13:56|                      12|                   E09000020|              E09000020|               5|                0|        6|         30|              3|               4|                5|                 0|                                0|                                      1|               1|                 2|                      2|                         0|                  0|                  1|                                          1|              2|                E01002892|2005-02-10 00:00:00|    2|\n",
+      "| 200501BS70034|         2005|         01BS70034|               524220|                181510|-0.211037|51.518806|           1|                3|                 2|                   1|2005-02-16 00:00:00|          4|14:10|                      12|                   E09000020|              E09000020|               4|              450|        6|         30|              6|               4|                6|                 0|                                0|                                      0|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002878|2005-02-16 00:00:00|    2|\n",
+      "| 200501BS70036|         2005|         01BS70036|               525430|                179140|-0.194449| 51.49724|           1|                3|                 2|                   1|2005-02-21 00:00:00|          2|10:10|                      12|                   E09000020|              E09000020|               6|                0|        1|         30|              2|               4|                6|                 0|                                0|                                      0|               1|                 1|                      1|                         0|                  0|                  1|                                          2|              2|                E01002813|2005-02-21 00:00:00|    2|\n",
+      "| 200501BS70037|         2005|         01BS70037|               524430|                180180|-0.208481|51.506807|           1|                3|                 1|                   1|2005-02-20 00:00:00|          1|14:50|                      12|                   E09000020|              E09000020|               6|                0|        6|         30|              3|               4|                6|                 0|                                0|                                      0|               1|                 2|                      2|                         0|                  0|                  1|                                          1|              2|                E01002871|2005-02-20 00:00:00|    2|\n",
n",
+      "only showing top 60 rows\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "TimeAccident_dfmonth.show(60)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 199,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "TimeAccident_dfmonthly_new=TimeAccident_dfmonth.withColumn(\n",
+    "    \"month\",\n",
+    "    when(\n",
+    "        col(\"month\") == 1,\n",
+    "        \"January\"\n",
+    "    ).\n",
+    "    when(\n",
+    "        col(\"month\") == 2,\n",
+    "        \"Februray\"\n",
+    "    ).\n",
+    "    when(\n",
+    "        col(\"month\") == 3,\n",
+    "        \"March\"\n",
+    "    ).\n",
+    "    when(\n",
+    "        col(\"month\") == 4,\n",
+    "        \"April\"\n",
+    "    ).\n",
+    "    when(\n",
+    "        col(\"month\") == 5,\n",
+    "        \"May\"\n",
+    "    ).\n",
+    "    when(\n",
+    "        col(\"month\") == 6,\n",
+    "        \"June\"\n",
+    "    ).\n",
+    "    when(\n",
+    "        col(\"month\") == 7,\n",
+    "        \"July\"\n",
+    "    ).\n",
+    "    when(\n",
+    "        col(\"month\") == 8,\n",
+    "        \"August\"\n",
+    "    ).\n",
+    "    when(\n",
+    "        col(\"month\") == 9,\n",
+    "        \"September\"\n",
+    "    ).\n",
+    "    when(\n",
+    "        col(\"month\") == 10,\n",
+    "        \"October\"\n",
+    "    ).\n",
+    "    when(\n",
+    "        col(\"month\") == 11,\n",
+    "        \"November\"\n",
+    "    ).\n",
+    "    when(\n",
+    "        col(\"month\") == 12,\n",
+    "        \"December\"\n",
+    "    ).otherwise(col(\"month\")),\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 200,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
n",
+      "|accident_index|accident_year|accident_reference|location_easting_osgr|location_northing_osgr|longitude| latitude|police_force|accident_severity|number_of_vehicles|number_of_casualties|               date|day_of_week| time|local_authority_district|local_authority_ons_district|local_authority_highway|first_road_class|first_road_number|road_type|speed_limit|junction_detail|junction_control|second_road_class|second_road_number|pedestrian_crossing_human_control|pedestrian_crossing_physical_facilities|light_conditions|weather_conditions|road_surface_conditions|special_conditions_at_site|carriageway_hazards|urban_or_rural_area|did_police_officer_attend_scene_of_accident|trunk_road_flag|lsoa_of_accident_location|          timestamp|  month|\n",
n",
+      "| 200501BS00001|         2005|         01BS00001|               525680|                178240| -0.19117|51.489096|           1|                2|                 1|                   1|2005-01-04 00:00:00|          3|17:42|                      12|                   E09000020|              E09000020|               3|             3218|        6|         30|              0|              -1|               -1|                -1|                                0|                                      1|               1|                 2|                      2|                         0|                  0|                  1|                                          1|              2|                E01002849|2005-01-04 00:00:00|January|\n",
+      "| 200501BS00002|         2005|         01BS00002|               524170|                181650|-0.211708|51.520075|           1|                3|                 1|                   1|2005-01-05 00:00:00|          4|17:36|                      12|                   E09000020|              E09000020|               4|              450|        3|         30|              6|               2|                5|                 0|                                0|                                      5|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002909|2005-01-05 00:00:00|January|\n",
+      "| 200501BS00003|         2005|         01BS00003|               524520|                182240|-0.206458|51.525301|           1|                3|                 2|                   1|2005-01-06 00:00:00|          5|00:15|                      12|                   E09000020|              E09000020|               5|                0|        6|         30|              0|              -1|               -1|                -1|                                0|                                      0|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002857|2005-01-06 00:00:00|January|\n",
+      "| 200501BS00004|         2005|         01BS00004|               526900|                177530|-0.173862|51.482442|           1|                3|                 1|                   1|2005-01-07 00:00:00|          6|10:35|                      12|                   E09000020|              E09000020|               3|             3220|        6|         30|              0|              -1|               -1|                -1|                                0|                                      0|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002840|2005-01-07 00:00:00|January|\n",
+      "| 200501BS00005|         2005|         01BS00005|               528060|                179040|-0.156618|51.495752|           1|                3|                 1|                   1|2005-01-10 00:00:00|          2|21:13|                      12|                   E09000020|              E09000020|               6|                0|        6|         30|              0|              -1|               -1|                -1|                                0|                                      0|               7|                 1|                      2|                         0|                  0|                  1|                                          1|              2|                E01002863|2005-01-10 00:00:00|January|\n",
+      "| 200501BS00006|         2005|         01BS00006|               524770|                181160|-0.203238| 51.51554|           1|                3|                 2|                   1|2005-01-11 00:00:00|          3|12:40|                      12|                   E09000020|              E09000020|               6|                0|        6|         30|              0|              -1|               -1|                -1|                                0|                                      0|               1|                 2|                      2|                         6|                  0|                  1|                                          1|              2|                E01002832|2005-01-11 00:00:00|January|\n",
+      "| 200501BS00007|         2005|         01BS00007|               524220|                180830|-0.211277|51.512695|           1|                3|                 2|                   1|2005-01-13 00:00:00|          5|20:40|                      12|                   E09000020|              E09000020|               5|                0|        6|         30|              3|               4|                6|                 0|                                0|                                      0|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002875|2005-01-13 00:00:00|January|\n",
+      "| 200501BS00009|         2005|         01BS00009|               525890|                179710|-0.187623| 51.50226|           1|                3|                 1|                   2|2005-01-14 00:00:00|          6|17:35|                      12|                   E09000020|              E09000020|               3|              315|        3|         30|              0|              -1|               -1|                -1|                                0|                                      0|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002889|2005-01-14 00:00:00|January|\n",
+      "| 200501BS00010|         2005|         01BS00010|               527350|                177650|-0.167342| 51.48342|           1|                3|                 2|                   2|2005-01-15 00:00:00|          7|22:43|                      12|                   E09000020|              E09000020|               3|             3212|        6|         30|              6|               2|                4|               304|                                0|                                      5|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002900|2005-01-15 00:00:00|January|\n",
+      "| 200501BS00011|         2005|         01BS00011|               524550|                180810|-0.206531|51.512443|           1|                3|                 2|                   5|2005-01-15 00:00:00|          7|16:00|                      12|                   E09000020|              E09000020|               4|              450|        6|         30|              3|               4|                5|                 0|                                0|                                      8|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002875|2005-01-15 00:00:00|January|\n",
+      "| 200501BS00012|         2005|         01BS00012|               526240|                178900|-0.182872|51.494902|           1|                3|                 1|                   1|2005-01-16 00:00:00|          1|00:42|                      12|                   E09000020|              E09000020|               3|                4|        6|         30|              6|               2|                4|               325|                                0|                                      5|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002835|2005-01-16 00:00:00|January|\n",
+      "| 200501BS00014|         2005|         01BS00014|               526170|                177690|-0.184312|51.484044|           1|                3|                 2|                   1|2005-01-25 00:00:00|          3|20:48|                      12|                   E09000020|              E09000020|               3|             3220|        6|         30|              6|               2|                3|               308|                                0|                                      5|               4|                 1|                      2|                         0|                  0|                  1|                                          1|              2|                E01002912|2005-01-25 00:00:00|January|\n",
+      "| 200501BS00015|         2005|         01BS00015|               525590|                178520|-0.192366|51.491632|           1|                3|                 1|                   1|2005-01-11 00:00:00|          3|12:55|                      12|                   E09000020|              E09000020|               6|                0|        2|         30|              3|               4|                3|              3220|                                0|                                      1|               1|                 2|                      2|                         0|                  0|                  1|                                          1|              2|                E01002849|2005-01-11 00:00:00|January|\n",
+      "| 200501BS00016|         2005|         01BS00016|               527990|                178690|-0.157753|51.492622|           1|                3|                 2|                   1|2005-01-18 00:00:00|          3|05:01|                      12|                   E09000020|              E09000020|               3|             3217|        2|         30|              3|               4|                3|              3216|                                0|                                      0|               4|                 2|                      2|                         0|                  0|                  1|                                          1|              2|                E01002902|2005-01-18 00:00:00|January|\n",
+      "| 200501BS00017|         2005|         01BS00017|               526700|                178970|-0.176224|51.495429|           1|                3|                 1|                   2|2005-01-18 00:00:00|          3|11:15|                      12|                   E09000020|              E09000020|               3|                4|        3|         30|              0|              -1|               -1|                -1|                                0|                                      0|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002821|2005-01-18 00:00:00|January|\n",
+      "| 200501BS00018|         2005|         01BS00018|               526460|                177460| -0.18022|51.481912|           1|                3|                 1|                   1|2005-01-18 00:00:00|          3|10:50|                      12|                   E09000020|              E09000020|               3|             3217|        6|         30|              3|               4|                6|                 0|                                0|                                      1|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002840|2005-01-18 00:00:00|January|\n",
+      "| 200501BS00019|         2005|         01BS00019|               524680|                179450|-0.205139|51.500191|           1|                2|                 2|                   1|2005-01-20 00:00:00|          5|00:15|                      12|                   E09000020|              E09000020|               6|                0|        6|         30|              3|               4|                6|                 0|                                0|                                      0|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002864|2005-01-20 00:00:00|January|\n",
+      "| 200501BS00020|         2005|         01BS00020|               527000|                179020|-0.171887|51.495811|           1|                3|                 2|                   1|2005-01-21 00:00:00|          6|09:15|                      12|                   E09000020|              E09000020|               3|             3218|        6|         30|              3|               4|                3|                 4|                                0|                                      0|               1|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002821|2005-01-21 00:00:00|January|\n",
+      "| 200501BS00021|         2005|         01BS00021|               527810|                178010| -0.16059|51.486552|           1|                3|                 2|                   1|2005-01-21 00:00:00|          6|21:16|                      12|                   E09000020|              E09000020|               4|              302|        6|         30|              0|              -1|               -1|                -1|                                0|                                      0|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002901|2005-01-21 00:00:00|January|\n",
+      "| 200501BS00022|         2005|         01BS00022|               526790|                178980|-0.174925|51.495498|           1|                2|                 1|                   1|2005-01-08 00:00:00|          7|03:00|                      12|                   E09000020|              E09000020|               3|                4|        6|         30|              3|               4|                6|                 0|                                0|                                      0|               4|                 1|                      1|                         0|                  0|                  1|                                          1|              2|                E01002821|2005-01-08 00:00:00|January|\n",
n",
+      "only showing top 20 rows\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "TimeAccident_dfmonthly_new.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 207,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "+---------+---------------+\n",
+      "|    month|Total accidents|\n",
+      "+---------+---------------+\n",
+      "|     July|         201161|\n",
+      "| November|         209588|\n",
+      "|  January|         184490|\n",
+      "|    March|         182482|\n",
+      "|  October|         205207|\n",
+      "|      May|         193802|\n",
+      "|   August|         186789|\n",
+      "|    April|         175254|\n",
+      "|     June|         194286|\n",
+      "| December|         187478|\n",
+      "| Februray|         170157|\n",
+      "|September|         196733|\n",
+      "+---------+---------------+\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "TimeAccident_dfmonthly_new_df = TimeAccident_dfmonthly_new.groupby('month').agg(F.count(TimeAccident_dfmonthly_new.accident_index).alias('Total accidents'))\n",
+    "#TimeAccident_dfmonthly_new_df=TimeAccident_dfmonthly_new_df.sort(\"month\")\n",
+    "TimeAccident_dfmonthly_new_df.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 281,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>month</th>\n",
+       "      <th>Traffic volume</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>January</td>\n",
+       "      <td>89.566667</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Februray</td>\n",
+       "      <td>94.466667</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>March</td>\n",
+       "      <td>98.266667</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>April</td>\n",
+       "      <td>100.566667</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>May</td>\n",
+       "      <td>101.933333</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>June</td>\n",
+       "      <td>103.966667</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>July</td>\n",
+       "      <td>104.766667</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>August</td>\n",
+       "      <td>104.900000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>September</td>\n",
+       "      <td>104.366667</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>October</td>\n",
+       "      <td>103.200000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>November</td>\n",
+       "      <td>100.133333</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>December</td>\n",
+       "      <td>93.533333</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "        month  Traffic volume\n",
+       "0     January       89.566667\n",
+       "1    Februray       94.466667\n",
+       "2       March       98.266667\n",
+       "3       April      100.566667\n",
+       "4         May      101.933333\n",
+       "5        June      103.966667\n",
+       "6        July      104.766667\n",
+       "7      August      104.900000\n",
+       "8   September      104.366667\n",
+       "9     October      103.200000\n",
+       "10   November      100.133333\n",
+       "11   December       93.533333"
+      ]
+     },
+     "execution_count": 281,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "month = pd.read_csv ('/Users/Asfandyar/Desktop/disertation/monthly traffic.csv')\n",
+    "month"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 268,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>month</th>\n",
+       "      <th>Total accidents</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>July</td>\n",
+       "      <td>201161</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>November</td>\n",
+       "      <td>209588</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>January</td>\n",
+       "      <td>184490</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>March</td>\n",
+       "      <td>182482</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>October</td>\n",
+       "      <td>205207</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>May</td>\n",
+       "      <td>193802</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>August</td>\n",
+       "      <td>186789</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>April</td>\n",
+       "      <td>175254</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>June</td>\n",
+       "      <td>194286</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>December</td>\n",
+       "      <td>187478</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>Februray</td>\n",
+       "      <td>170157</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>September</td>\n",
+       "      <td>196733</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "        month  Total accidents\n",
+       "0        July           201161\n",
+       "1    November           209588\n",
+       "2     January           184490\n",
+       "3       March           182482\n",
+       "4     October           205207\n",
+       "5         May           193802\n",
+       "6      August           186789\n",
+       "7       April           175254\n",
+       "8        June           194286\n",
+       "9    December           187478\n",
+       "10   Februray           170157\n",
+       "11  September           196733"
+      ]
+     },
+     "execution_count": 268,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "TimeAccident_dfmonthly_new_df_df=TimeAccident_dfmonthly_new_df.toPandas()\n",
+    "TimeAccident_dfmonthly_new_df_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 388,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>month</th>\n",
+       "      <th>Traffic volume</th>\n",
+       "      <th>Total accidents</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>January</td>\n",
+       "      <td>89.566667</td>\n",
+       "      <td>184490</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Februray</td>\n",
+       "      <td>94.466667</td>\n",
+       "      <td>170157</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>March</td>\n",
+       "      <td>98.266667</td>\n",
+       "      <td>182482</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>April</td>\n",
+       "      <td>100.566667</td>\n",
+       "      <td>175254</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>May</td>\n",
+       "      <td>101.933333</td>\n",
+       "      <td>193802</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>June</td>\n",
+       "      <td>103.966667</td>\n",
+       "      <td>194286</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>July</td>\n",
+       "      <td>104.766667</td>\n",
+       "      <td>201161</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>August</td>\n",
+       "      <td>104.900000</td>\n",
+       "      <td>186789</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>September</td>\n",
+       "      <td>104.366667</td>\n",
+       "      <td>196733</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>October</td>\n",
+       "      <td>103.200000</td>\n",
+       "      <td>205207</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>November</td>\n",
+       "      <td>100.133333</td>\n",
+       "      <td>209588</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>December</td>\n",
+       "      <td>93.533333</td>\n",
+       "      <td>187478</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "        month  Traffic volume  Total accidents\n",
+       "0     January       89.566667           184490\n",
+       "1    Februray       94.466667           170157\n",
+       "2       March       98.266667           182482\n",
+       "3       April      100.566667           175254\n",
+       "4         May      101.933333           193802\n",
+       "5        June      103.966667           194286\n",
+       "6        July      104.766667           201161\n",
+       "7      August      104.900000           186789\n",
+       "8   September      104.366667           196733\n",
+       "9     October      103.200000           205207\n",
+       "10   November      100.133333           209588\n",
+       "11   December       93.533333           187478"
+      ]
+     },
+     "execution_count": 388,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "result23=pd.merge(month, TimeAccident_dfmonthly_new_df_df, on=['month'])\n",
+    "result23"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 389,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>month</th>\n",
+       "      <th>Traffic volume</th>\n",
+       "      <th>Total accidents</th>\n",
+       "      <th>Accident Distribution</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>January</td>\n",
+       "      <td>89.566667</td>\n",
+       "      <td>184490</td>\n",
+       "      <td>2059.806476</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Februray</td>\n",
+       "      <td>94.466667</td>\n",
+       "      <td>170157</td>\n",
+       "      <td>1801.238532</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>March</td>\n",
+       "      <td>98.266667</td>\n",
+       "      <td>182482</td>\n",
+       "      <td>1857.008141</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>April</td>\n",
+       "      <td>100.566667</td>\n",
+       "      <td>175254</td>\n",
+       "      <td>1742.664898</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>May</td>\n",
+       "      <td>101.933333</td>\n",
+       "      <td>193802</td>\n",
+       "      <td>1901.262264</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>June</td>\n",
+       "      <td>103.966667</td>\n",
+       "      <td>194286</td>\n",
+       "      <td>1868.733568</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>July</td>\n",
+       "      <td>104.766667</td>\n",
+       "      <td>201161</td>\n",
+       "      <td>1920.085905</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>August</td>\n",
+       "      <td>104.900000</td>\n",
+       "      <td>186789</td>\n",
+       "      <td>1780.638704</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>September</td>\n",
+       "      <td>104.366667</td>\n",
+       "      <td>196733</td>\n",
+       "      <td>1885.017566</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>October</td>\n",
+       "      <td>103.200000</td>\n",
+       "      <td>205207</td>\n",
+       "      <td>1988.439922</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>November</td>\n",
+       "      <td>100.133333</td>\n",
+       "      <td>209588</td>\n",
+       "      <td>2093.089215</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>December</td>\n",
+       "      <td>93.533333</td>\n",
+       "      <td>187478</td>\n",
+       "      <td>2004.397719</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "        month  Traffic volume  Total accidents  Accident Distribution\n",
+       "0     January       89.566667           184490            2059.806476\n",
+       "1    Februray       94.466667           170157            1801.238532\n",
+       "2       March       98.266667           182482            1857.008141\n",
+       "3       April      100.566667           175254            1742.664898\n",
+       "4         May      101.933333           193802            1901.262264\n",
+       "5        June      103.966667           194286            1868.733568\n",
+       "6        July      104.766667           201161            1920.085905\n",
+       "7      August      104.900000           186789            1780.638704\n",
+       "8   September      104.366667           196733            1885.017566\n",
+       "9     October      103.200000           205207            1988.439922\n",
+       "10   November      100.133333           209588            2093.089215\n",
+       "11   December       93.533333           187478            2004.397719"
+      ]
+     },
+     "execution_count": 389,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "result23[\"Accident Distribution\"] = result23[\"Total accidents\"] / result23[\"Traffic volume\"]\n",
+    "result23"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 390,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>month</th>\n",
+       "      <th>Accident Distribution</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>January</td>\n",
+       "      <td>2059.806476</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Februray</td>\n",
+       "      <td>1801.238532</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>March</td>\n",
+       "      <td>1857.008141</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>April</td>\n",
+       "      <td>1742.664898</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>May</td>\n",
+       "      <td>1901.262264</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>June</td>\n",
+       "      <td>1868.733568</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>July</td>\n",
+       "      <td>1920.085905</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>August</td>\n",
+       "      <td>1780.638704</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>September</td>\n",
+       "      <td>1885.017566</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>October</td>\n",
+       "      <td>1988.439922</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>November</td>\n",
+       "      <td>2093.089215</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>December</td>\n",
+       "      <td>2004.397719</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "        month  Accident Distribution\n",
+       "0     January            2059.806476\n",
+       "1    Februray            1801.238532\n",
+       "2       March            1857.008141\n",
+       "3       April            1742.664898\n",
+       "4         May            1901.262264\n",
+       "5        June            1868.733568\n",
+       "6        July            1920.085905\n",
+       "7      August            1780.638704\n",
+       "8   September            1885.017566\n",
+       "9     October            1988.439922\n",
+       "10   November            2093.089215\n",
+       "11   December            2004.397719"
+      ]
+     },
+     "execution_count": 390,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "result23=result23.drop(['Total accidents', 'Traffic volume'], axis=1)\n",
+    "result23"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 320,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'T' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-320-94805f1af0e7>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mee\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mExpected_Values\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mT\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m \u001b[0mee\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mNameError\u001b[0m: name 'T' is not defined"
+     ]
+    }
+   ],
+   "source": [
+    "ee=Expected_Values.dot(T)\n",
+    "ee"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 285,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/png": "",
+      "text/plain": [
+       "<Figure size 1440x720 with 1 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "ax=result23.plot.bar('month','Accident Distribution', rot=90,title=\"Accidents distribution over month \",figsize=(20, 10),color=\"Orange\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 507,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "DataFrame[accident_index: string, accident_year: string, accident_reference: string, location_easting_osgr: string, location_northing_osgr: string, longitude: string, latitude: string, police_force: string, accident_severity: string, number_of_vehicles: string, number_of_casualties: string, date: string, day_of_week: string, time: string, local_authority_district: string, local_authority_ons_district: string, local_authority_highway: string, first_road_class: string, first_road_number: string, road_type: string, speed_limit: string, junction_detail: string, junction_control: string, second_road_class: string, second_road_number: string, pedestrian_crossing_human_control: string, pedestrian_crossing_physical_facilities: string, light_conditions: string, weather_conditions: string, road_surface_conditions: string, special_conditions_at_site: string, carriageway_hazards: string, urban_or_rural_area: string, did_police_officer_attend_scene_of_accident: string, trunk_road_flag: string, lsoa_of_accident_location: string]"
+      ]
+     },
+     "execution_count": 507,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "A2018"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "DataFrame[accident_index: string, accident_year: string, accident_reference: string, location_easting_osgr: string, location_northing_osgr: string, longitude: string, latitude: string, police_force: string, accident_severity: string, number_of_vehicles: string, number_of_casualties: string, date: string, day_of_week: string, time: string, local_authority_district: string, local_authority_ons_district: string, local_authority_highway: string, first_road_class: string, first_road_number: string, road_type: string, speed_limit: string, junction_detail: string, junction_control: string, second_road_class: string, second_road_number: string, pedestrian_crossing_human_control: string, pedestrian_crossing_physical_facilities: string, light_conditions: string, weather_conditions: string, road_surface_conditions: string, special_conditions_at_site: string, carriageway_hazards: string, urban_or_rural_area: string, did_police_officer_attend_scene_of_accident: string, trunk_road_flag: string, lsoa_of_accident_location: string]"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "A2018"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 662,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "+-----------------+---------+---------------+\n",
+      "|accident_severity|road_type|Total accidents|\n",
+      "+-----------------+---------+---------------+\n",
+      "|                3|       -1|              1|\n",
+      "|                1|        1|            471|\n",
+      "|                2|        3|          44030|\n",
+      "|                3|        3|         291457|\n",
+      "|                2|        2|           6566|\n",
+      "|                1|        2|            328|\n",
+      "|                2|        6|         262089|\n",
+      "|                3|        1|         136135|\n",
+      "|                1|        3|           5983|\n",
+      "|                3|        2|          42394|\n",
+      "|                3|        6|        1416841|\n",
+      "|                1|        7|            205|\n",
+      "|                2|        1|          14601|\n",
+      "|                1|        6|          22595|\n",
+      "|                3|        9|          16857|\n",
+      "|                2|        7|           2489|\n",
+      "|                1|        9|            116|\n",
+      "|                3|        7|          22311|\n",
+      "|                2|        9|           1958|\n",
+      "+-----------------+---------+---------------+\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "A2018ts_df = TimeAccident_dfmonthly_new.groupby(\"accident_severity\",'road_type').agg(F.count(TimeAccident_dfmonthly_new.accident_index).alias('Total accidents'))\n",
+    "A2018ts_df.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "+-----------------+---------+---------------+\n",
+      "|accident_severity|road_type|Total accidents|\n",
+      "+-----------------+---------+---------------+\n",
+      "|                3|       -1|              1|\n",
+      "|                1|        1|            471|\n",
+      "|                2|        3|          44030|\n",
+      "|                3|        3|         291457|\n",
+      "|                2|        2|           6566|\n",
+      "|                1|        2|            328|\n",
+      "|                2|        6|         262089|\n",
+      "|                3|        1|         136135|\n",
+      "|                1|        3|           5983|\n",
+      "|                3|        2|          42394|\n",
+      "|                3|        6|        1416841|\n",
+      "|                1|        7|            205|\n",
+      "|                2|        1|          14601|\n",
+      "|                1|        6|          22595|\n",
+      "|                3|        9|          16857|\n",
+      "|                2|        7|           2489|\n",
+      "|                1|        9|            116|\n",
+      "|                3|        7|          22311|\n",
+      "|                2|        9|           1958|\n",
+      "+-----------------+---------+---------------+\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "A2018ts_df = A2018.groupby(\"accident_severity\",'road_type').agg(F.count(A2018.accident_index).alias('Total accidents'))\n",
+    "A2018ts_df.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "A2018_df=A2018ts_df.toPandas()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>accident_severity</th>\n",
+       "      <th>road_type</th>\n",
+       "      <th>Total accidents</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>3</td>\n",
+       "      <td>-1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>471</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>44030</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3</td>\n",
+       "      <td>3</td>\n",
+       "      <td>291457</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>6566</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>328</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>2</td>\n",
+       "      <td>6</td>\n",
+       "      <td>262089</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>136135</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>5983</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>42394</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>3</td>\n",
+       "      <td>6</td>\n",
+       "      <td>1416841</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>1</td>\n",
+       "      <td>7</td>\n",
+       "      <td>205</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>14601</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>22595</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>3</td>\n",
+       "      <td>9</td>\n",
+       "      <td>16857</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>2</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2489</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>1</td>\n",
+       "      <td>9</td>\n",
+       "      <td>116</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>3</td>\n",
+       "      <td>7</td>\n",
+       "      <td>22311</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>2</td>\n",
+       "      <td>9</td>\n",
+       "      <td>1958</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   accident_severity road_type  Total accidents\n",
+       "0                  3        -1                1\n",
+       "1                  1         1              471\n",
+       "2                  2         3            44030\n",
+       "3                  3         3           291457\n",
+       "4                  2         2             6566\n",
+       "5                  1         2              328\n",
+       "6                  2         6           262089\n",
+       "7                  3         1           136135\n",
+       "8                  1         3             5983\n",
+       "9                  3         2            42394\n",
+       "10                 3         6          1416841\n",
+       "11                 1         7              205\n",
+       "12                 2         1            14601\n",
+       "13                 1         6            22595\n",
+       "14                 3         9            16857\n",
+       "15                 2         7             2489\n",
+       "16                 1         9              116\n",
+       "17                 3         7            22311\n",
+       "18                 2         9             1958"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "A2018_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from scipy.stats import spearmanr"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "numpy.int64"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "\n",
+    "A2018_df['accident_severity'] =  A2018_df['accident_severity'].astype(int)\n",
+    "type(A2018_df['accident_severity'][0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from scipy.stats import chi2_contingency\n",
+    "table = [[10, 20, 30],[6,  9,  17]]\n",
+    "stat, p, dof, expected = chi2_contingency(table)\n",
+    "print('stat=%.3f, p=%.3f' % (stat, p))\n",
+    "if p > 0.05:\n",
+    "\tprint('Probably independent')\n",
+    "else:\n",
+    "\tprint('Probably dependent')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "stat=0.607, p=0.002\n",
+      "Probably dependent\n"
+     ]
+    }
+   ],
+   "source": [
+    "from scipy.stats import pearsonr\n",
+    "from scipy.stats import kendalltau\n",
+    "data1 = A2018_df['accident_severity']\n",
+    "data2 = A2018_df['Total accidents']\n",
+    "stat, p = kendalltau(data1, data2)\n",
+    "print('stat=%.3f, p=%.3f' % (stat, p))\n",
+    "if p > 0.05:\n",
+    "\tprint('Probably independent')\n",
+    "else:\n",
+    "\tprint('Probably dependent')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>accident_severity</th>\n",
+       "      <th>road_type</th>\n",
+       "      <th>Total accidents</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>471</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>44030</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3</td>\n",
+       "      <td>3</td>\n",
+       "      <td>291457</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>6566</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>328</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>2</td>\n",
+       "      <td>6</td>\n",
+       "      <td>262089</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>136135</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>5983</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>42394</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>3</td>\n",
+       "      <td>6</td>\n",
+       "      <td>1416841</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>1</td>\n",
+       "      <td>7</td>\n",
+       "      <td>205</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>14601</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>22595</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>3</td>\n",
+       "      <td>9</td>\n",
+       "      <td>16857</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>2</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2489</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>1</td>\n",
+       "      <td>9</td>\n",
+       "      <td>116</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>3</td>\n",
+       "      <td>7</td>\n",
+       "      <td>22311</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>2</td>\n",
+       "      <td>9</td>\n",
+       "      <td>1958</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    accident_severity road_type  Total accidents\n",
+       "1                   1         1              471\n",
+       "2                   2         3            44030\n",
+       "3                   3         3           291457\n",
+       "4                   2         2             6566\n",
+       "5                   1         2              328\n",
+       "6                   2         6           262089\n",
+       "7                   3         1           136135\n",
+       "8                   1         3             5983\n",
+       "9                   3         2            42394\n",
+       "10                  3         6          1416841\n",
+       "11                  1         7              205\n",
+       "12                  2         1            14601\n",
+       "13                  1         6            22595\n",
+       "14                  3         9            16857\n",
+       "15                  2         7             2489\n",
+       "16                  1         9              116\n",
+       "17                  3         7            22311\n",
+       "18                  2         9             1958"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "A2018_df=A2018_df[A2018_df.road_type != \"-1\"]\n",
+    "A2018_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.dropna()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead tr th {\n",
+       "        text-align: left;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead tr:last-of-type th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr>\n",
+       "      <th></th>\n",
+       "      <th colspan=\"3\" halign=\"left\">Total accidents</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>accident_severity</th>\n",
+       "      <th>1</th>\n",
+       "      <th>2</th>\n",
+       "      <th>3</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>road_type</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>471</td>\n",
+       "      <td>14601</td>\n",
+       "      <td>136135</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>328</td>\n",
+       "      <td>6566</td>\n",
+       "      <td>42394</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>5983</td>\n",
+       "      <td>44030</td>\n",
+       "      <td>291457</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>22595</td>\n",
+       "      <td>262089</td>\n",
+       "      <td>1416841</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>205</td>\n",
+       "      <td>2489</td>\n",
+       "      <td>22311</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>116</td>\n",
+       "      <td>1958</td>\n",
+       "      <td>16857</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                  Total accidents                 \n",
+       "accident_severity               1       2        3\n",
+       "road_type                                         \n",
+       "1                             471   14601   136135\n",
+       "2                             328    6566    42394\n",
+       "3                            5983   44030   291457\n",
+       "6                           22595  262089  1416841\n",
+       "7                             205    2489    22311\n",
+       "9                             116    1958    16857"
+      ]
+     },
+     "execution_count": 27,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "A2018_dfpiv=A2018_df.pivot(index ='road_type', columns ='accident_severity')\n",
+    "A2018_dfpiv"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "A2018_dfpiv=A2018_dfpiv.dropna()\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Observed Values :-\n",
+      " [[    471   14601  136135]\n",
+      " [    328    6566   42394]\n",
+      " [   5983   44030  291457]\n",
+      " [  22595  262089 1416841]\n",
+      " [    205    2489   22311]\n",
+      " [    116    1958   16857]]\n",
+      "Expected Values :-\n",
+      " [[1.96314350e+03 2.19287320e+04 1.27315124e+05]\n",
+      " [6.39913608e+02 7.14797161e+03 4.15001148e+04]\n",
+      " [4.43335700e+03 4.95215441e+04 2.87515099e+05]\n",
+      " [2.20911581e+04 2.46762952e+05 1.43267089e+06]\n",
+      " [3.24643722e+02 3.62633968e+03 2.10540166e+04]\n",
+      " [2.45784055e+02 2.74546037e+03 1.59397556e+04]]\n",
+      "Degree of Freedom:- 10\n"
+     ]
+    }
+   ],
+   "source": [
+    "A2018_dfpiv=A2018_dfpiv.dropna()\n",
+    "\n",
+    "dataset_table=A2018_dfpiv\n",
+    " \n",
+    "dataset_table.values \n",
+    "Observed_Values = dataset_table.values \n",
+    "print(\"Observed Values :-\\n\",Observed_Values)\n",
+    "import scipy.stats\n",
+    "b=scipy.stats.chi2_contingency(dataset_table)\n",
+    "Expected_Values = b[3]\n",
+    "print(\"Expected Values :-\\n\",Expected_Values)\n",
+    "#Degree of Freedom\n",
+    "no_of_rows=len(dataset_table.iloc[0:6,0])\n",
+    "no_of_columns=len(dataset_table.iloc[0,0:3])\n",
+    "df=(no_of_rows-1)*(no_of_columns-1)\n",
+    "print(\"Degree of Freedom:-\",df)\n",
+    "\n"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# The method for measuring Chi-Square value for the selected factors"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "r [4193.795574610202, 218.67280881776227, 1204.6776335388686, 1138.2752182581148, 475.84594970392686, 347.1751350618879]\n",
+      "chi-square statistic:- 7578.4423199907615\n",
+      "critical_value: 18.307038053275146\n",
+      "p-value 0.0\n",
+      "Significance level:  0.05\n",
+      "Degree of Freedom:  10\n",
+      "Reject H0,There is a relationship between 2 categorical variables\n"
+     ]
+    }
+   ],
+   "source": [
+    "r=[]\n",
+    "for o,e in zip(Observed_Values,Expected_Values):\n",
+    "    a=(o-e)**2./e\n",
+    "    \n",
+    "    r.append(a[0]+a[1]+a[2])\n",
+    "print(\"r\",r)\n",
+    "chi_square_statistic=0\n",
+    "for i in range(len(r)):\n",
+    "    chi_square_statistic+=r[i]\n",
+    "print(\"chi-square statistic:-\",chi_square_statistic)\n",
+    "alpha = 0.05\n",
+    "from scipy.stats import chi2\n",
+    "critical_value=chi2.ppf(q=1-alpha,df=df)\n",
+    "print('critical_value:',critical_value)\n",
+    "#p-value\n",
+    "p_value=1-chi2.cdf(x=chi_square_statistic,df=df)\n",
+    "print(\"p-value\",p_value)\n",
+    "print('Significance level: ',alpha)\n",
+    "print('Degree of Freedom: ',df)\n",
+    "if chi_square_statistic>=critical_value:\n",
+    "    print(\"Reject H0,There is a relationship between 2 categorical variables\")\n",
+    "else:\n",
+    "    print(\"Retain H0,There is no relationship between 2 categorical variables\")\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 624,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n",
+      "  warnings.warn(msg, FutureWarning)\n"
+     ]
+    },
+    {
+     "data": {
+      "image/png": "",
+      "text/plain": [
+       "<Figure size 432x288 with 1 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "<Figure size 432x288 with 0 Axes>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "sns.distplot(A2018_df['accident_severity']);\n",
+    "fig = plt.figure()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 511,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<AxesSubplot:>"
+      ]
+     },
+     "execution_count": 511,
+     "metadata": {},
+     "output_type": "execute_result"
+    },
+    {
+     "data": {
+      "image/png": "",
+      "text/plain": [
+       "<Figure size 1440x648 with 2 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "corr =  A2018_df.corr()\n",
+    "plt.subplots(figsize=(20,9))\n",
+    "sns.heatmap(corr)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
n",
+      "|accident_index|accident_year|accident_reference|vehicle_reference|vehicle_type|towing_and_articulation|vehicle_manoeuvre|vehicle_direction_from|vehicle_direction_to|vehicle_location_restricted_lane|junction_location|skidding_and_overturning|hit_object_in_carriageway|vehicle_leaving_carriageway|hit_object_off_carriageway|first_point_of_impact|vehicle_left_hand_drive|journey_purpose_of_driver|sex_of_driver|age_of_driver|age_band_of_driver|engine_capacity_cc|propulsion_code|age_of_vehicle|generic_make_model|driver_imd_decile|driver_home_area_type|lsoa_of_driver|\n",
n",
+      "| 200501BS00001|         2005|         01BS00001|                1|           9|                      0|               18|                     6|                   2|                               0|                0|                       0|                        0|                          0|                         0|                    1|                      1|                       15|            2|           74|                10|                -1|             -1|            -1|                -1|               -1|                    1|     E01001924|\n",
+      "| 200501BS00002|         2005|         01BS00002|                1|          11|                      0|                4|                     5|                   1|                               0|                3|                       0|                        0|                          0|                         0|                    4|                      1|                        1|            1|           42|                 7|              8268|              2|             3|                -1|               -1|                   -1|            -1|\n",
+      "| 200501BS00003|         2005|         01BS00003|                1|          11|                      0|               17|                     7|                   5|                               0|                0|                       0|                        4|                          0|                         0|                    4|                      1|                        1|            1|           35|                 6|              8300|              2|             5|                -1|               -1|                    1|     E01000638|\n",
+      "| 200501BS00003|         2005|         01BS00003|                2|           9|                      0|                2|                     0|                   0|                               0|                0|                       0|                        0|                          0|                         0|                    3|                      1|                       15|            1|           62|                 9|              1762|              1|             6|                -1|               -1|                    1|     E01000605|\n",
+      "| 200501BS00004|         2005|         01BS00004|                1|           9|                      0|               18|                     8|                   4|                               0|                0|                       0|                        0|                          0|                         0|                    1|                      1|                       15|            2|           49|                 8|              1769|              1|             4|                -1|               -1|                    1|     E01002842|\n",
+      "| 200501BS00005|         2005|         01BS00005|                1|           3|                      0|               18|                     1|                   5|                               0|                0|                       1|                       10|                          0|                         0|                    1|                      1|                       15|            1|           49|                 8|                85|              1|            10|                -1|               -1|                   -1|            -1|\n",
+      "| 200501BS00006|         2005|         01BS00006|                1|           9|                      0|                5|                     4|                   8|                               0|                0|                       0|                        0|                          0|                         0|                    0|                      1|                       15|            1|           51|                 8|              2976|              1|             1|                -1|               -1|                    1|     E01002833|\n",
+      "| 200501BS00006|         2005|         01BS00006|                2|           3|                      0|               18|                     4|                   8|                               0|                0|                       0|                        0|                          0|                         0|                    0|                      1|                       15|            2|           30|                 6|               124|              1|             2|                -1|               -1|                    1|     E01004315|\n",
+      "| 200501BS00007|         2005|         01BS00007|                1|           3|                      0|               18|                     8|                   4|                               0|                1|                       0|                        4|                          0|                         0|                    1|                      1|                       15|            1|           31|                 6|                -1|             -1|            -1|                -1|               -1|                   -1|            -1|\n",
+      "| 200501BS00007|         2005|         01BS00007|                2|           9|                      0|                2|                     0|                   0|                               0|                1|                       0|                        0|                          0|                         0|                    2|                      1|                       15|            1|           41|                 7|              4266|              1|             4|                -1|               -1|                    1|     E01002875|\n",
+      "| 200501BS00009|         2005|         01BS00009|                1|           9|                      0|               18|                     3|                   7|                               0|                0|                       0|                        0|                          1|                         0|                    1|                      1|                       15|            1|           68|                10|              5343|              1|            16|                -1|               -1|                    1|     E01001871|\n",
+      "| 200501BS00010|         2005|         01BS00010|                1|           9|                      0|               18|                     7|                   3|                               0|                8|                       0|                        0|                          0|                         0|                    2|                      1|                       15|            1|           35|                 6|              1998|              1|            13|                -1|               -1|                    1|     E01003229|\n",
+      "| 200501BS00010|         2005|         01BS00010|                2|           9|                      0|                9|                     3|                   8|                               0|                8|                       0|                        0|                          0|                         0|                    1|                      1|                       15|            2|           48|                 8|                -1|             -1|            -1|                -1|               -1|                    1|     E01002913|\n",
+      "| 200501BS00011|         2005|         01BS00011|                1|          11|                      0|                9|                     2|                   8|                               0|                8|                       0|                        0|                          0|                         0|                    0|                      1|                        1|            1|           42|                 7|              8268|              2|             2|                -1|               -1|                    1|     E01000861|\n",
+      "| 200501BS00011|         2005|         01BS00011|                2|          90|                      0|               18|                     4|                   8|                               0|                8|                       0|                        0|                          0|                         0|                    0|                      1|                       15|            3|           -1|                -1|                -1|             -1|            -1|                -1|               -1|                   -1|            -1|\n",
+      "| 200501BS00012|         2005|         01BS00012|                1|           9|                      0|               18|                     3|                   7|                               0|                8|                       0|                        0|                          0|                         0|                    1|                      1|                       15|            1|           34|                 6|              1988|              1|             6|                -1|               -1|                    1|     E01003829|\n",
+      "| 200501BS00014|         2005|         01BS00014|                1|           9|                      0|                9|                     6|                   4|                               0|                8|                       0|                        0|                          0|                         0|                    1|                      1|                       15|            2|           19|                 4|              1124|              1|             8|                -1|               -1|                    1|     E01001377|\n",
+      "| 200501BS00014|         2005|         01BS00014|                2|           3|                      0|               18|                     2|                   6|                               0|                8|                       0|                        0|                          0|                         0|                    1|                      1|                       15|            2|           20|                 4|               124|              1|             1|                -1|               -1|                   -1|            -1|\n",
+      "| 200501BS00015|         2005|         01BS00015|                1|           9|                      0|                9|                     8|                   6|                               0|                3|                       0|                        0|                          0|                         0|                    1|                      1|                       15|            2|           47|                 8|              1360|              1|             2|                -1|               -1|                    1|     E01002848|\n",
+      "| 200501BS00016|         2005|         01BS00016|                1|           9|                      0|               18|                     7|                   3|                               0|                8|                       0|                        7|                          7|                         1|                    1|                      1|                       15|            1|           34|                 6|               698|              1|             2|                -1|               -1|                    1|     E01024185|\n",
n",
+      "only showing top 20 rows\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "from pyspark.sql.functions import concat, col, lit\n",
+    "\n",
+    "\n",
+    "V20052014 = spark.read.format('csv')\\\n",
+    "            .option('header',True).option('escape','\"')\\\n",
+    "            .load('/Users/Asfandyar/Downloads/dft-road-casualty-statistics-vehicle-1979-2021.csv')\n",
+    "V20052014=V20052014.filter(V20052014.accident_year>2004)\n",
+    "V20052014=V20052014.filter(V20052014.accident_year<2020)\n",
+    "V20052014.show()\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pyspark.sql.functions import col, when\n",
+    "valueWhenTrue1 =\"Pedal cycle\"\n",
+    "valueWhenTrue2 =\"Motorcycle\"\n",
+    "valueWhenTrue3 = \"Motorcycle\"\n",
+    "valueWhenTrue4 = \"Motorcycle\"\n",
+    "valueWhenTrue5 = \"Motorcycle\"\n",
+    "valueWhenTrue8 = \"Car\"\n",
+    "valueWhenTrue9 =\"Car\"\n",
+    "valueWhenTrue10 =\"Bus\"\n",
+    "valueWhenTrue11 =\"Bus\"\n",
+    "valueWhenTrue16 =\"Ridden horse\"\n",
+    "valueWhenTrue17 =\"Agricultural vehicle\"\n",
+    "valueWhenTrue18 =\"Bus\"\n",
+    "valueWhenTrue19 =\"Goods\"\n",
+    "valueWhenTrue20 =\"Goods\"\n",
+    "valueWhenTrue21 =\"Goods\"\n",
+    "valueWhenTrue22 =\"Motorcycle\"\n",
+    "valueWhenTrue23 =\"Motorcycle\"\n",
+    "valueWhenTrue90 =\"Other vehicle\"\n",
+    "valueWhenTrue97 =\"Motorcycle\"\n",
+    "valueWhenTrue98 =\"Goods\"\n",
+    "valueWhenTrueo1 =\"Data missing or out of range\"\n",
+    "#C2017 = C2017df.withColumn(\"Vehicle_Type\", when(df.gender == \"M\",\"Male\")\n",
+    "#                                 .when(df.gender == \"F\",\"Female\")\n",
+    "#                                 .when(df.gender.isNull() ,\"\")\n",
+    "#                                 .otherwise(df.gender))\n",
+    "V20052014=V20052014.withColumn(\n",
+    "    \"vehicle_type\",\n",
+    "    when(\n",
+    "        col(\"vehicle_type\") == 1,\n",
+    "        \"Pedal cycle\"\n",
+    "    ).\n",
+    "    when(\n",
+    "        col(\"vehicle_type\") == 2,\n",
+    "        valueWhenTrue2\n",
+    "    ).\n",
+    "    when(\n",
+    "        col(\"vehicle_type\") == 3,\n",
+    "        valueWhenTrue3\n",
+    "    ).\n",
+    "    when(\n",
+    "        col(\"vehicle_type\") == 4,\n",
+    "        valueWhenTrue4\n",
+    "    ).\n",
+    "    when(\n",
+    "        col(\"vehicle_type\") == 5,\n",
+    "        valueWhenTrue5\n",
+    "    ).\n",
+    "    when(\n",
+    "        col(\"vehicle_type\") == 8,\n",
+    "        valueWhenTrue8\n",
+    "    ).\n",
+    "    when(\n",
+    "        col(\"vehicle_type\") == 9,\n",
+    "        valueWhenTrue9\n",
+    "    ).\n",
+    "    when(\n",
+    "        col(\"vehicle_type\") == 10,\n",
+    "        valueWhenTrue10\n",
+    "    ).\n",
+    "    when(\n",
+    "        col(\"vehicle_type\") == 11,\n",
+    "        valueWhenTrue11\n",
+    "    ).\n",
+    "    when(\n",
+    "        col(\"vehicle_type\") == 16,\n",
+    "        valueWhenTrue16\n",
+    "    ).\n",
+    "    when(\n",
+    "        col(\"vehicle_type\") == 17,\n",
+    "        valueWhenTrue17\n",
+    "    ).\n",
+    "    when(\n",
+    "        col(\"vehicle_type\") == 18,\n",
+    "        valueWhenTrue18\n",
+    "    ).\n",
+    "    when(\n",
+    "        col(\"vehicle_type\") == 19,\n",
+    "        valueWhenTrue19\n",
+    "    ).\n",
+    "    when(\n",
+    "        col(\"vehicle_type\") == 20,\n",
+    "        valueWhenTrue20\n",
+    "    ).\n",
+    "    when(\n",
+    "        col(\"vehicle_type\") == 21,\n",
+    "        valueWhenTrue21\n",
+    "    ).\n",
+    "    when(\n",
+    "        col(\"vehicle_type\") == 22,\n",
+    "        valueWhenTrue22\n",
+    "    ).\n",
+    "    when(\n",
+    "        col(\"vehicle_type\") == 23,\n",
+    "        valueWhenTrue23\n",
+    "    ).\n",
+    "    when(\n",
+    "        col(\"vehicle_type\") == 90,\n",
+    "        valueWhenTrue90\n",
+    "    ).\n",
+    "    when(\n",
+    "        col(\"vehicle_type\") == 97,\n",
+    "        valueWhenTrue97\n",
+    "    ).\n",
+    "    when(\n",
+    "        col(\"vehicle_type\") == 98,\n",
+    "        valueWhenTrue98\n",
+    "    ).\n",
+    "    when(\n",
+    "        col(\"vehicle_type\") == -1,\n",
+    "        valueWhenTrueo1\n",
+    "    ).otherwise(col(\"vehicle_type\"))\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "+--------------+-----------------+\n",
+      "|accident_index|accident_severity|\n",
+      "+--------------+-----------------+\n",
+      "| 200501BS00001|                2|\n",
+      "| 200501BS00002|                3|\n",
+      "| 200501BS00003|                3|\n",
+      "| 200501BS00004|                3|\n",
+      "| 200501BS00005|                3|\n",
+      "| 200501BS00006|                3|\n",
+      "| 200501BS00007|                3|\n",
+      "| 200501BS00009|                3|\n",
+      "| 200501BS00010|                3|\n",
+      "| 200501BS00011|                3|\n",
+      "| 200501BS00012|                3|\n",
+      "| 200501BS00014|                3|\n",
+      "| 200501BS00015|                3|\n",
+      "| 200501BS00016|                3|\n",
+      "| 200501BS00017|                3|\n",
+      "| 200501BS00018|                3|\n",
+      "| 200501BS00019|                2|\n",
+      "| 200501BS00020|                3|\n",
+      "| 200501BS00021|                3|\n",
+      "| 200501BS00022|                2|\n",
+      "+--------------+-----------------+\n",
+      "only showing top 20 rows\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "accidentindex=A2018.select('accident_index','accident_severity')\n",
+    "accidentindex.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "V20052014vech_dff=V20052014vech_dff.withColumn(\n",
+    "    \"age_band_of_driver\",\n",
+    "    when(\n",
+    "        col(\"age_band_of_driver\") == 1,\n",
+    "        \"Upto 20Y\"\n",
+    "    ).\n",
+    "    when(\n",
+    "        col(\"age_band_of_driver\") == 2,\n",
+    "        \"Upto 20Y\"\n",
+    "    ).\n",
+    "    when(\n",
+    "        col(\"age_band_of_driver\") == 3,\n",
+    "        \"Upto 20Y\"\n",
+    "    ).\n",
+    "    when(\n",
+    "        col(\"age_band_of_driver\") == 4,\n",
+    "        \"Upto 20Y\"\n",
+    "    ).\n",
+    "    when(\n",
+    "        col(\"age_band_of_driver\") == 5,\n",
+    "        \"20Y to 40Y\"\n",
+    "    ).\n",
+    "    when(\n",
+    "        col(\"age_band_of_driver\") == 6,\n",
+    "        \"20Y to 40Y\"\n",
+    "    ).\n",
+    "    when(\n",
+    "        col(\"age_band_of_driver\") == 7,\n",
+    "        \"20Y to 40Y\"\n",
+    "    ).when(\n",
+    "        col(\"age_band_of_driver\") == 8,\n",
+    "        \"40Y to 70Y\"\n",
+    "    ).when(\n",
+    "        col(\"age_band_of_driver\") == 9,\n",
+    "        \"40Y to 70Y\"\n",
+    "    ).when(\n",
+    "        col(\"age_band_of_driver\") == 10,\n",
+    "        \"40Y to 70Y\"\n",
+    "    ).when(\n",
+    "        col(\"age_band_of_driver\") == 11,\n",
+    "        \"Over 70\"\n",
+    "    ).when(\n",
+    "        col(\"age_band_of_driver\") == -1,\n",
+    "        \"Data missing or out of range\"\n",
+    "    ).otherwise(col(\"age_band_of_driver\")),\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
n",
+      "|accident_index|accident_year|accident_reference|vehicle_reference|vehicle_type|towing_and_articulation|vehicle_manoeuvre|vehicle_direction_from|vehicle_direction_to|vehicle_location_restricted_lane|junction_location|skidding_and_overturning|hit_object_in_carriageway|vehicle_leaving_carriageway|hit_object_off_carriageway|first_point_of_impact|vehicle_left_hand_drive|journey_purpose_of_driver|sex_of_driver|age_of_driver|age_band_of_driver|engine_capacity_cc|propulsion_code|age_of_vehicle|generic_make_model|driver_imd_decile|driver_home_area_type|lsoa_of_driver|accident_severity|\n",
n",
+      "| 200501BS70192|         2005|         01BS70192|                1|       Goods|                      0|               18|                     1|                   5|                               0|                0|                       0|                        0|                          0|                         0|                    3|                      1|                       15|            1|           29|                 6|              1896|              2|             8|                -1|               -1|                    1|     E01003039|                3|\n",
+      "| 200501BS70192|         2005|         01BS70192|                2|       Goods|                      0|                2|                     0|                   0|                               0|                0|                       0|                        0|                          0|                         0|                    0|                      1|                       15|            1|           51|                 8|              2299|              2|             7|                -1|               -1|                    2|     E01015586|                3|\n",
+      "| 200501BS70293|         2005|         01BS70293|                1|         Car|                      0|                9|                     6|                   4|                               0|                2|                       0|                        0|                          0|                         0|                    1|                      1|                       15|            1|           45|                 7|              1999|              1|             0|                -1|               -1|                    1|     E01001623|                3|\n",
+      "| 200501BS70739|         2005|         01BS70739|                1|  Motorcycle|                      0|                4|                     6|                   2|                               0|                1|                       0|                        0|                          0|                         0|                    2|                      1|                       15|            1|           27|                 6|               645|              1|             0|                -1|               -1|                    1|     E01002535|                3|\n",
+      "| 200501BS70739|         2005|         01BS70739|                2|         Car|                      0|               18|                     6|                   2|                               0|                1|                       0|                        0|                          0|                         0|                    1|                      1|                       15|            1|           -1|                -1|              1598|              1|             3|                -1|               -1|                    1|     E01004520|                3|\n",
+      "| 200501CP00077|         2005|         01CP00077|                1|         Car|                      0|               10|                     3|                   1|                               0|                1|                       0|                        0|                          0|                         0|                    1|                      1|                       15|            1|           75|                10|              2946|              1|             6|                -1|               -1|                    1|     E01015889|                3|\n",
+      "| 200501CP00077|         2005|         01CP00077|                2| Pedal cycle|                      0|               10|                     3|                   1|                               0|                1|                       0|                        0|                          0|                         0|                    2|                      1|                       15|            1|           36|                 7|                -1|             -1|            -1|                -1|               -1|                   -1|            -1|                3|\n",
+      "| 200501CP00182|         2005|         01CP00182|                1|         Car|                      0|                9|                     1|                   7|                               0|                8|                       0|                        0|                          0|                         0|                    1|                      1|                       15|            3|           -1|                -1|              1896|              2|             3|                -1|               -1|                   -1|            -1|                3|\n",
+      "| 200501CP00197|         2005|         01CP00197|                1|         Car|                      0|                6|                     7|                   7|                               0|                2|                       0|                        0|                          0|                         0|                    3|                      1|                       15|            1|           67|                10|              1240|              1|             1|                -1|               -1|                    1|     E01000206|                3|\n",
+      "| 200501CP00197|         2005|         01CP00197|                2|  Motorcycle|                      0|               14|                     7|                   3|                               0|                2|                       0|                        0|                          0|                         0|                    4|                      1|                       15|            1|           35|                 6|               498|              1|            10|                -1|               -1|                   -1|            -1|                3|\n",
+      "| 200501CP00212|         2005|         01CP00212|                1|         Car|                      0|                9|                     8|                   6|                               0|                8|                       0|                        0|                          0|                         0|                    4|                      1|                       15|            1|           52|                 8|              2664|              2|             5|                -1|               -1|                    1|     E01004416|                3|\n",
+      "| 200501CP00212|         2005|         01CP00212|                2| Pedal cycle|                      0|               18|                     4|                   8|                               0|                8|                       0|                        0|                          0|                         0|                    1|                      1|                       15|            2|           30|                 6|                -1|             -1|            -1|                -1|               -1|                    1|     E01021326|                3|\n",
+      "| 200501CW10269|         2005|         01CW10269|                1|         Car|                      0|               17|                     8|                   5|                               0|                0|                       0|                        0|                          0|                         0|                    3|                      1|                        1|            1|           -1|                -1|                -1|             -1|            -1|                -1|               -1|                    1|     E01004290|                2|\n",
+      "| 200501CW10359|         2005|         01CW10359|                1|         Car|                      0|               18|                     5|                   1|                               0|                0|                       0|                        0|                          0|                         0|                    4|                      1|                       15|            3|           -1|                -1|                -1|             -1|            -1|                -1|               -1|                   -1|            -1|                3|\n",
+      "| 200501CW10368|         2005|         01CW10368|                1|         Car|                      0|                4|                     7|                   3|                               0|                1|                       0|                        0|                          0|                         0|                    2|                      1|                       15|            2|           23|                 5|              1149|              1|             2|                -1|               -1|                    1|     E01017959|                3|\n",
+      "| 200501CW10368|         2005|         01CW10368|                2|         Car|                      0|                4|                     7|                   3|                               0|                1|                       0|                        0|                          0|                         0|                    1|                      1|                       15|            1|           51|                 8|              1994|              1|             9|                -1|               -1|                    1|     E01023819|                3|\n",
+      "| 200501CW10403|         2005|         01CW10403|                1|         Car|                      0|                9|                     8|                   6|                               0|                8|                       0|                        0|                          0|                         0|                    4|                      1|                       15|            1|           27|                 6|              1389|              1|            13|                -1|               -1|                    1|     E01023720|                3|\n",
+      "| 200501CW10403|         2005|         01CW10403|                2|         Car|                      0|               18|                     4|                   8|                               0|                8|                       0|                        0|                          0|                         0|                    1|                      1|                       15|            1|           36|                 7|                -1|             -1|            -1|                -1|               -1|                    1|     E01004673|                3|\n",
+      "| 200501CW10495|         2005|         01CW10495|                1|         Car|                      0|                6|                     2|                   2|                               0|                0|                       0|                        0|                          0|                         0|                    3|                      1|                       15|            1|           44|                 7|              1390|              1|             9|                -1|               -1|                    1|     E01000549|                3|\n",
+      "| 200501CW10495|         2005|         01CW10495|                2|         Car|                      0|               18|                     2|                   6|                               0|                0|                       0|                        0|                          0|                         0|                    1|                      2|                       15|            1|           28|                 6|                -1|             -1|            -1|                -1|               -1|                    1|     E01004650|                3|\n",
n",
+      "only showing top 20 rows\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "V20052014vech_dff = V20052014.join(accidentindex, on=['Accident_Index'])\n",
+    "\n",
+    "V20052014vech_dff.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "+--------------------+-----------------+---------------+\n",
+      "|  age_band_of_driver|accident_severity|Total accidents|\n",
+      "+--------------------+-----------------+---------------+\n",
+      "|Data missing or o...|                1|           2463|\n",
+      "|             Over 70|                1|           2525|\n",
+      "|            Upto 20Y|                1|           4731|\n",
+      "|             Over 70|                2|          17507|\n",
+      "|          40Y to 70Y|                1|          17654|\n",
+      "|          20Y to 40Y|                1|          25536|\n",
+      "|Data missing or o...|                2|          48337|\n",
+      "|            Upto 20Y|                2|          60174|\n",
+      "|             Over 70|                3|          76450|\n",
+      "|          40Y to 70Y|                2|         166391|\n",
+      "|          20Y to 40Y|                2|         270225|\n",
+      "|            Upto 20Y|                3|         351830|\n",
+      "|Data missing or o...|                3|         425600|\n",
+      "|          40Y to 70Y|                3|         931310|\n",
+      "|          20Y to 40Y|                3|        1795753|\n",
+      "+--------------------+-----------------+---------------+\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "Age_df = V20052014vech_dff.groupby('age_band_of_driver','accident_severity').agg(F.count(V20052014vech_dff.accident_index).alias('Total accidents')).sort(\"Total accidents\")\n",
+    "Age_df.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "Age_df_df=Age_df.toPandas()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead tr th {\n",
+       "        text-align: left;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead tr:last-of-type th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr>\n",
+       "      <th></th>\n",
+       "      <th colspan=\"3\" halign=\"left\">Total accidents</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>accident_severity</th>\n",
+       "      <th>1</th>\n",
+       "      <th>2</th>\n",
+       "      <th>3</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>age_band_of_driver</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>20Y to 40Y</th>\n",
+       "      <td>25536</td>\n",
+       "      <td>270225</td>\n",
+       "      <td>1795753</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>40Y to 70Y</th>\n",
+       "      <td>17654</td>\n",
+       "      <td>166391</td>\n",
+       "      <td>931310</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Data missing or out of range</th>\n",
+       "      <td>2463</td>\n",
+       "      <td>48337</td>\n",
+       "      <td>425600</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Over 70</th>\n",
+       "      <td>2525</td>\n",
+       "      <td>17507</td>\n",
+       "      <td>76450</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Upto 20Y</th>\n",
+       "      <td>4731</td>\n",
+       "      <td>60174</td>\n",
+       "      <td>351830</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                             Total accidents                 \n",
+       "accident_severity                          1       2        3\n",
+       "age_band_of_driver                                           \n",
+       "20Y to 40Y                             25536  270225  1795753\n",
+       "40Y to 70Y                             17654  166391   931310\n",
+       "Data missing or out of range            2463   48337   425600\n",
+       "Over 70                                 2525   17507    76450\n",
+       "Upto 20Y                                4731   60174   351830"
+      ]
+     },
+     "execution_count": 27,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "Age_df_df=Age_df_df.dropna()\n",
+    "A2018_dfpiv=Age_df_df.pivot(index ='age_band_of_driver', columns ='accident_severity')\n",
+    "A2018_dfpiv"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "size    1   2   3   4  5  6\n",
+      "sex                        \n",
+      "Male    1  98  24  28  4  2\n",
+      "Female  3  58  14   9  1  2\n"
+     ]
+    }
+   ],
+   "source": [
+    "import seaborn as sns\n",
+    "dataset=sns.load_dataset('tips')\n",
+    "dataset_table=pd.crosstab(dataset['sex'],dataset['size'])\n",
+    "print(dataset_table)"
+   ]
+  }
+ ],
+ "metadata": {
+  "interpreter": {
+   "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
+  },
+  "kernelspec": {
+   "display_name": "Python 3.9.7 64-bit",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.7"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}