diff --git a/notebooks/preprocessing_arpit.ipynb b/notebooks/preprocessing_arpit.ipynb
deleted file mode 100644
index 72940f947012ce5207091e86547dd2d32bf3e61a..0000000000000000000000000000000000000000
--- a/notebooks/preprocessing_arpit.ipynb
+++ /dev/null
@@ -1,567 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "7ce4daec",
-   "metadata": {},
-   "source": [
-    "### Loading the dataset "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 69,
-   "id": "0e4c74ab",
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "import pandas as pd\n",
-    "\n",
-    "# Load the raw dataset\n",
-    "file_path = \"C:/Users/Arpit Mahapatra/Desktop/MLDM Coursework 2025/mlmavericks_coursework/data/raw/census_income_rawdata.csv\"\n",
-    "df = pd.read_csv(file_path)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 70,
-   "id": "d4182c53",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>age</th>\n",
-       "      <th>workclass</th>\n",
-       "      <th>fnlwgt</th>\n",
-       "      <th>education</th>\n",
-       "      <th>education-num</th>\n",
-       "      <th>marital-status</th>\n",
-       "      <th>occupation</th>\n",
-       "      <th>relationship</th>\n",
-       "      <th>race</th>\n",
-       "      <th>sex</th>\n",
-       "      <th>capital-gain</th>\n",
-       "      <th>capital-loss</th>\n",
-       "      <th>hours-per-week</th>\n",
-       "      <th>native-country</th>\n",
-       "      <th>income</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>39</td>\n",
-       "      <td>State-gov</td>\n",
-       "      <td>77516</td>\n",
-       "      <td>Bachelors</td>\n",
-       "      <td>13</td>\n",
-       "      <td>Never-married</td>\n",
-       "      <td>Adm-clerical</td>\n",
-       "      <td>Not-in-family</td>\n",
-       "      <td>White</td>\n",
-       "      <td>Male</td>\n",
-       "      <td>2174</td>\n",
-       "      <td>0</td>\n",
-       "      <td>40</td>\n",
-       "      <td>United-States</td>\n",
-       "      <td>&lt;=50K</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>50</td>\n",
-       "      <td>Self-emp-not-inc</td>\n",
-       "      <td>83311</td>\n",
-       "      <td>Bachelors</td>\n",
-       "      <td>13</td>\n",
-       "      <td>Married-civ-spouse</td>\n",
-       "      <td>Exec-managerial</td>\n",
-       "      <td>Husband</td>\n",
-       "      <td>White</td>\n",
-       "      <td>Male</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>13</td>\n",
-       "      <td>United-States</td>\n",
-       "      <td>&lt;=50K</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>38</td>\n",
-       "      <td>Private</td>\n",
-       "      <td>215646</td>\n",
-       "      <td>HS-grad</td>\n",
-       "      <td>9</td>\n",
-       "      <td>Divorced</td>\n",
-       "      <td>Handlers-cleaners</td>\n",
-       "      <td>Not-in-family</td>\n",
-       "      <td>White</td>\n",
-       "      <td>Male</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>40</td>\n",
-       "      <td>United-States</td>\n",
-       "      <td>&lt;=50K</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>53</td>\n",
-       "      <td>Private</td>\n",
-       "      <td>234721</td>\n",
-       "      <td>11th</td>\n",
-       "      <td>7</td>\n",
-       "      <td>Married-civ-spouse</td>\n",
-       "      <td>Handlers-cleaners</td>\n",
-       "      <td>Husband</td>\n",
-       "      <td>Black</td>\n",
-       "      <td>Male</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>40</td>\n",
-       "      <td>United-States</td>\n",
-       "      <td>&lt;=50K</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>28</td>\n",
-       "      <td>Private</td>\n",
-       "      <td>338409</td>\n",
-       "      <td>Bachelors</td>\n",
-       "      <td>13</td>\n",
-       "      <td>Married-civ-spouse</td>\n",
-       "      <td>Prof-specialty</td>\n",
-       "      <td>Wife</td>\n",
-       "      <td>Black</td>\n",
-       "      <td>Female</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>40</td>\n",
-       "      <td>Cuba</td>\n",
-       "      <td>&lt;=50K</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   age         workclass  fnlwgt  education  education-num  \\\n",
-       "0   39         State-gov   77516  Bachelors             13   \n",
-       "1   50  Self-emp-not-inc   83311  Bachelors             13   \n",
-       "2   38           Private  215646    HS-grad              9   \n",
-       "3   53           Private  234721       11th              7   \n",
-       "4   28           Private  338409  Bachelors             13   \n",
-       "\n",
-       "       marital-status         occupation   relationship   race     sex  \\\n",
-       "0       Never-married       Adm-clerical  Not-in-family  White    Male   \n",
-       "1  Married-civ-spouse    Exec-managerial        Husband  White    Male   \n",
-       "2            Divorced  Handlers-cleaners  Not-in-family  White    Male   \n",
-       "3  Married-civ-spouse  Handlers-cleaners        Husband  Black    Male   \n",
-       "4  Married-civ-spouse     Prof-specialty           Wife  Black  Female   \n",
-       "\n",
-       "   capital-gain  capital-loss  hours-per-week native-country income  \n",
-       "0          2174             0              40  United-States  <=50K  \n",
-       "1             0             0              13  United-States  <=50K  \n",
-       "2             0             0              40  United-States  <=50K  \n",
-       "3             0             0              40  United-States  <=50K  \n",
-       "4             0             0              40           Cuba  <=50K  "
-      ]
-     },
-     "execution_count": 70,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df.head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "0c24e28c",
-   "metadata": {},
-   "source": [
-    "### Displaying each column's data type "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 71,
-   "id": "62119ad4",
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "age                int64\n",
-      "workclass         object\n",
-      "fnlwgt             int64\n",
-      "education         object\n",
-      "education-num      int64\n",
-      "marital-status    object\n",
-      "occupation        object\n",
-      "relationship      object\n",
-      "race              object\n",
-      "sex               object\n",
-      "capital-gain       int64\n",
-      "capital-loss       int64\n",
-      "hours-per-week     int64\n",
-      "native-country    object\n",
-      "income            object\n",
-      "dtype: object\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(df.dtypes)\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "d6816ab3",
-   "metadata": {},
-   "source": [
-    "### Checking for null values\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 72,
-   "id": "d486feaf",
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "age                 0\n",
-       "workclass         963\n",
-       "fnlwgt              0\n",
-       "education           0\n",
-       "education-num       0\n",
-       "marital-status      0\n",
-       "occupation        966\n",
-       "relationship        0\n",
-       "race                0\n",
-       "sex                 0\n",
-       "capital-gain        0\n",
-       "capital-loss        0\n",
-       "hours-per-week      0\n",
-       "native-country    274\n",
-       "income              0\n",
-       "dtype: int64"
-      ]
-     },
-     "execution_count": 72,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df.isnull().sum()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "369da037",
-   "metadata": {},
-   "source": [
-    "### Handling the null values and replacing them with \"Unknown\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 73,
-   "id": "1a5df3f2",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df.fillna(\"Unknown\", inplace=True)\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "278efc8d",
-   "metadata": {},
-   "source": [
-    "### Handling \"?\" by replacing it with \"Unknown\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 74,
-   "id": "ca5f24f1",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df.replace(\"?\", \"Unknown\", inplace=True)\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "b6d6e110",
-   "metadata": {},
-   "source": [
-    "### Discretizing 'age' into age groups "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 75,
-   "id": "2bf01c3d",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def age_group(age):\n",
-    "    if age < 30:\n",
-    "        return \"young\"\n",
-    "    elif 30 <= age <= 55:\n",
-    "        return \"middle\"\n",
-    "    else:\n",
-    "        return \"senior\"\n",
-    "df[\"age\"] = df[\"age\"].apply(age_group)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "f381014c",
-   "metadata": {},
-   "source": [
-    "### Discritizing 'hours-per-week' into hour groups"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 76,
-   "id": "18153522",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Discretize 'hours-per-week' into hours groups\n",
-    "def hours_group(hours):\n",
-    "    if hours < 25:\n",
-    "        return \"low\"\n",
-    "    elif 25 <= hours <= 40:\n",
-    "        return \"average\"\n",
-    "    else:\n",
-    "        return \"high\"\n",
-    "df[\"hours-per-week\"] = df[\"hours-per-week\"].apply(hours_group)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "b54335a6",
-   "metadata": {},
-   "source": [
-    "### Displaying all the unique values and their frequency in the target variable "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 77,
-   "id": "04506643",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<=50K     24720\n",
-       "<=50K.    12435\n",
-       ">50K       7841\n",
-       ">50K.      3846\n",
-       "Name: income, dtype: int64"
-      ]
-     },
-     "execution_count": 77,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df[\"income\"].value_counts()\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "12a4e5ec",
-   "metadata": {},
-   "source": [
-    "### Removing \".\" from the values in target variable to resolve string formatting error and clean the dirty data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 78,
-   "id": "e93e201e",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<=50K    37155\n",
-       ">50K     11687\n",
-       "Name: income, dtype: int64"
-      ]
-     },
-     "execution_count": 78,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df[\"income\"] = df[\"income\"].str.strip().str.rstrip(\".\")\n",
-    "\n",
-    "df[\"income\"].value_counts()\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "00c14391",
-   "metadata": {},
-   "source": [
-    "### Normalising the strings across the dataframe for a cleaner data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 79,
-   "id": "19a7e465",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df = df.applymap(lambda x: x.strip().lower() if isinstance(x, str) else x)\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "82dd59c6",
-   "metadata": {},
-   "source": [
-    "### Encoding the Target variable "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 80,
-   "id": "2953bc1b",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df[\"income\"] = df[\"income\"].map({\"<=50K\": 0, \">50K\": 1})\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "15634166",
-   "metadata": {},
-   "source": [
-    "### Removing all the unnecessary columnns "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 81,
-   "id": "39c881b2",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "columns_to_keep = [\n",
-    "    \"age\", \"workclass\", \"education\", \"marital-status\",\n",
-    "    \"occupation\", \"relationship\", \"race\", \"sex\",\n",
-    "    \"hours-per-week\", \"native-country\", \"income\"  \n",
-    "]\n",
-    "\n",
-    "df = df[columns_to_keep]\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "1f2b92e4",
-   "metadata": {},
-   "source": [
-    "### Creating a new preprocessed CSV file for preprocessed data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 82,
-   "id": "2cfc0da4",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df.to_csv('C:/Users/Arpit Mahapatra/Desktop/MLDM Coursework 2025/mlmavericks_coursework/data/processed/preprocessed_cencus_income_data.csv', index=False)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "85cf84f3",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "785be949",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "3f7919db",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.8"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}