From 36ea5ab9f05d3f35e7594a6c75d396365cbb6b70 Mon Sep 17 00:00:00 2001
From: Ritwik <rm02120@surrey.ac.uk>
Date: Mon, 14 Apr 2025 21:54:25 +0100
Subject: [PATCH] removed few lines unrequired

---
 .../regression/Regression_Preprocessed.ipynb  | 286 +-----------------
 1 file changed, 1 insertion(+), 285 deletions(-)

diff --git a/notebooks/regression/Regression_Preprocessed.ipynb b/notebooks/regression/Regression_Preprocessed.ipynb
index 72737ca..95b389b 100644
--- a/notebooks/regression/Regression_Preprocessed.ipynb
+++ b/notebooks/regression/Regression_Preprocessed.ipynb
@@ -357,291 +357,6 @@
     "df.info()"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 264,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "🔹 Value counts for 'date':\n",
-      "3/11/2015    24\n",
-      "1/31/2015    24\n",
-      "1/11/2015    23\n",
-      "1/12/2015    23\n",
-      "3/10/2015    23\n",
-      "1/24/2015    23\n",
-      "1/8/2015     22\n",
-      "1/10/2015    22\n",
-      "3/9/2015     22\n",
-      "3/3/2015     22\n",
-      "1/13/2015    22\n",
-      "1/7/2015     22\n",
-      "1/5/2015     22\n",
-      "1/22/2015    22\n",
-      "3/8/2015     22\n",
-      "2/26/2015    21\n",
-      "3/4/2015     21\n",
-      "2/25/2015    21\n",
-      "1/25/2015    21\n",
-      "1/6/2015     21\n",
-      "1/3/2015     21\n",
-      "1/4/2015     21\n",
-      "2/28/2015    21\n",
-      "1/14/2015    21\n",
-      "1/28/2015    21\n",
-      "1/17/2015    21\n",
-      "2/18/2015    21\n",
-      "1/27/2015    21\n",
-      "3/1/2015     20\n",
-      "3/2/2015     20\n",
-      "2/17/2015    20\n",
-      "2/22/2015    20\n",
-      "2/19/2015    20\n",
-      "1/29/2015    20\n",
-      "3/5/2015     19\n",
-      "2/4/2015     19\n",
-      "2/8/2015     19\n",
-      "3/7/2015     19\n",
-      "1/19/2015    19\n",
-      "2/7/2015     19\n",
-      "1/21/2015    19\n",
-      "2/2/2015     19\n",
-      "2/23/2015    19\n",
-      "1/1/2015     19\n",
-      "2/11/2015    19\n",
-      "2/24/2015    19\n",
-      "1/18/2015    19\n",
-      "2/15/2015    19\n",
-      "2/10/2015    19\n",
-      "1/15/2015    19\n",
-      "2/3/2015     19\n",
-      "1/26/2015    19\n",
-      "2/1/2015     19\n",
-      "2/12/2015    19\n",
-      "2/5/2015     18\n",
-      "2/16/2015    18\n",
-      "2/9/2015     18\n",
-      "2/14/2015    17\n",
-      "1/20/2015    15\n",
-      "Name: date, dtype: int64\n",
-      "\n",
-      "🔹 Value counts for 'quarter':\n",
-      "Quarter1    360\n",
-      "Quarter2    335\n",
-      "Quarter4    248\n",
-      "Quarter3    210\n",
-      "Quarter5     44\n",
-      "Name: quarter, dtype: int64\n",
-      "\n",
-      "🔹 Value counts for 'department':\n",
-      "sweing        691\n",
-      "finishing     257\n",
-      "finishing     249\n",
-      "Name: department, dtype: int64\n",
-      "\n",
-      "🔹 Value counts for 'day':\n",
-      "Wednesday    208\n",
-      "Sunday       203\n",
-      "Tuesday      201\n",
-      "Thursday     199\n",
-      "Monday       199\n",
-      "Saturday     187\n",
-      "Name: day, dtype: int64\n",
-      "\n",
-      "🔹 Value counts for 'team':\n",
-      "8     109\n",
-      "2     109\n",
-      "4     105\n",
-      "1     105\n",
-      "9     104\n",
-      "10    100\n",
-      "12     99\n",
-      "7      96\n",
-      "3      95\n",
-      "6      94\n",
-      "5      93\n",
-      "11     88\n",
-      "Name: team, dtype: int64\n",
-      "\n",
-      "🔹 Value counts for 'targeted_productivity':\n",
-      "0.80    540\n",
-      "0.70    242\n",
-      "0.75    216\n",
-      "0.65     63\n",
-      "0.60     57\n",
-      "0.50     49\n",
-      "0.35     27\n",
-      "0.40      2\n",
-      "0.07      1\n",
-      "Name: targeted_productivity, dtype: int64\n",
-      "\n",
-      "🔹 Value counts for 'smv':\n",
-      "3.94     192\n",
-      "2.90     108\n",
-      "22.52    103\n",
-      "30.10     79\n",
-      "4.15      76\n",
-      "        ... \n",
-      "20.20      1\n",
-      "20.55      1\n",
-      "50.89      1\n",
-      "26.87      1\n",
-      "42.41      1\n",
-      "Name: smv, Length: 70, dtype: int64\n",
-      "\n",
-      "🔹 Value counts for 'wip':\n",
-      "NaN       506\n",
-      "1039.0      5\n",
-      "1282.0      4\n",
-      "968.0       3\n",
-      "1144.0      3\n",
-      "         ... \n",
-      "1186.0      1\n",
-      "891.0       1\n",
-      "1498.0      1\n",
-      "551.0       1\n",
-      "1239.0      1\n",
-      "Name: wip, Length: 549, dtype: int64\n",
-      "\n",
-      "🔹 Value counts for 'over_time':\n",
-      "960      129\n",
-      "1440     111\n",
-      "6960      61\n",
-      "6840      48\n",
-      "1200      39\n",
-      "        ... \n",
-      "5580       1\n",
-      "13800      1\n",
-      "8970       1\n",
-      "7680       1\n",
-      "2040       1\n",
-      "Name: over_time, Length: 143, dtype: int64\n",
-      "\n",
-      "🔹 Value counts for 'incentive':\n",
-      "0       604\n",
-      "50      113\n",
-      "63       61\n",
-      "45       54\n",
-      "30       52\n",
-      "23       38\n",
-      "38       29\n",
-      "60       28\n",
-      "40       27\n",
-      "75       24\n",
-      "113      21\n",
-      "88       19\n",
-      "34       17\n",
-      "56       14\n",
-      "26        9\n",
-      "55        7\n",
-      "81        7\n",
-      "100       7\n",
-      "69        6\n",
-      "65        6\n",
-      "70        6\n",
-      "960       5\n",
-      "35        5\n",
-      "94        4\n",
-      "44        4\n",
-      "90        3\n",
-      "46        2\n",
-      "24        2\n",
-      "27        2\n",
-      "119       2\n",
-      "49        2\n",
-      "98        1\n",
-      "21        1\n",
-      "1200      1\n",
-      "62        1\n",
-      "25        1\n",
-      "1080      1\n",
-      "93        1\n",
-      "29        1\n",
-      "1440      1\n",
-      "32        1\n",
-      "33        1\n",
-      "2880      1\n",
-      "54        1\n",
-      "37        1\n",
-      "138       1\n",
-      "53        1\n",
-      "3600      1\n",
-      "Name: incentive, dtype: int64\n",
-      "\n",
-      "🔹 Value counts for 'idle_time':\n",
-      "0.0      1179\n",
-      "3.5         3\n",
-      "4.0         2\n",
-      "4.5         2\n",
-      "8.0         2\n",
-      "5.0         2\n",
-      "2.0         2\n",
-      "270.0       1\n",
-      "150.0       1\n",
-      "6.5         1\n",
-      "300.0       1\n",
-      "90.0        1\n",
-      "Name: idle_time, dtype: int64\n",
-      "\n",
-      "🔹 Value counts for 'idle_men':\n",
-      "0     1179\n",
-      "30       3\n",
-      "20       3\n",
-      "15       3\n",
-      "10       3\n",
-      "35       2\n",
-      "45       1\n",
-      "40       1\n",
-      "37       1\n",
-      "25       1\n",
-      "Name: idle_men, dtype: int64\n",
-      "\n",
-      "🔹 Value counts for 'no_of_style_change':\n",
-      "0    1050\n",
-      "1     114\n",
-      "2      33\n",
-      "Name: no_of_style_change, dtype: int64\n",
-      "\n",
-      "🔹 Value counts for 'no_of_workers':\n",
-      "8.0     262\n",
-      "58.0    114\n",
-      "57.0    109\n",
-      "59.0     75\n",
-      "10.0     60\n",
-      "       ... \n",
-      "29.5      1\n",
-      "11.0      1\n",
-      "21.0      1\n",
-      "6.0       1\n",
-      "33.5      1\n",
-      "Name: no_of_workers, Length: 61, dtype: int64\n",
-      "\n",
-      "🔹 Value counts for 'actual_productivity':\n",
-      "0.800402    24\n",
-      "0.971867    12\n",
-      "0.850137    12\n",
-      "0.750651    11\n",
-      "0.850502    11\n",
-      "            ..\n",
-      "0.460579     1\n",
-      "0.852794     1\n",
-      "0.585316     1\n",
-      "1.000457     1\n",
-      "0.700888     1\n",
-      "Name: actual_productivity, Length: 879, dtype: int64\n"
-     ]
-    }
-   ],
-   "source": [
-    "for col in df.columns:\n",
-    "    print(f\"\\n🔹 Value counts for '{col}':\")\n",
-    "    print(df[col].value_counts(dropna=False))  # This shows all values\n"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 265,
@@ -684,6 +399,7 @@
     }
    ],
    "source": [
+    "#checking for any existing null values\n",
     "df.isnull().sum()"
    ]
   },
-- 
GitLab