From 36ea5ab9f05d3f35e7594a6c75d396365cbb6b70 Mon Sep 17 00:00:00 2001 From: Ritwik <rm02120@surrey.ac.uk> Date: Mon, 14 Apr 2025 21:54:25 +0100 Subject: [PATCH] removed few lines unrequired --- .../regression/Regression_Preprocessed.ipynb | 286 +----------------- 1 file changed, 1 insertion(+), 285 deletions(-) diff --git a/notebooks/regression/Regression_Preprocessed.ipynb b/notebooks/regression/Regression_Preprocessed.ipynb index 72737ca..95b389b 100644 --- a/notebooks/regression/Regression_Preprocessed.ipynb +++ b/notebooks/regression/Regression_Preprocessed.ipynb @@ -357,291 +357,6 @@ "df.info()" ] }, - { - "cell_type": "code", - "execution_count": 264, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "🔹 Value counts for 'date':\n", - "3/11/2015 24\n", - "1/31/2015 24\n", - "1/11/2015 23\n", - "1/12/2015 23\n", - "3/10/2015 23\n", - "1/24/2015 23\n", - "1/8/2015 22\n", - "1/10/2015 22\n", - "3/9/2015 22\n", - "3/3/2015 22\n", - "1/13/2015 22\n", - "1/7/2015 22\n", - "1/5/2015 22\n", - "1/22/2015 22\n", - "3/8/2015 22\n", - "2/26/2015 21\n", - "3/4/2015 21\n", - "2/25/2015 21\n", - "1/25/2015 21\n", - "1/6/2015 21\n", - "1/3/2015 21\n", - "1/4/2015 21\n", - "2/28/2015 21\n", - "1/14/2015 21\n", - "1/28/2015 21\n", - "1/17/2015 21\n", - "2/18/2015 21\n", - "1/27/2015 21\n", - "3/1/2015 20\n", - "3/2/2015 20\n", - "2/17/2015 20\n", - "2/22/2015 20\n", - "2/19/2015 20\n", - "1/29/2015 20\n", - "3/5/2015 19\n", - "2/4/2015 19\n", - "2/8/2015 19\n", - "3/7/2015 19\n", - "1/19/2015 19\n", - "2/7/2015 19\n", - "1/21/2015 19\n", - "2/2/2015 19\n", - "2/23/2015 19\n", - "1/1/2015 19\n", - "2/11/2015 19\n", - "2/24/2015 19\n", - "1/18/2015 19\n", - "2/15/2015 19\n", - "2/10/2015 19\n", - "1/15/2015 19\n", - "2/3/2015 19\n", - "1/26/2015 19\n", - "2/1/2015 19\n", - "2/12/2015 19\n", - "2/5/2015 18\n", - "2/16/2015 18\n", - "2/9/2015 18\n", - "2/14/2015 17\n", - "1/20/2015 15\n", - "Name: date, dtype: int64\n", - "\n", - "🔹 Value counts for 'quarter':\n", - "Quarter1 360\n", - "Quarter2 335\n", - "Quarter4 248\n", - "Quarter3 210\n", - "Quarter5 44\n", - "Name: quarter, dtype: int64\n", - "\n", - "🔹 Value counts for 'department':\n", - "sweing 691\n", - "finishing 257\n", - "finishing 249\n", - "Name: department, dtype: int64\n", - "\n", - "🔹 Value counts for 'day':\n", - "Wednesday 208\n", - "Sunday 203\n", - "Tuesday 201\n", - "Thursday 199\n", - "Monday 199\n", - "Saturday 187\n", - "Name: day, dtype: int64\n", - "\n", - "🔹 Value counts for 'team':\n", - "8 109\n", - "2 109\n", - "4 105\n", - "1 105\n", - "9 104\n", - "10 100\n", - "12 99\n", - "7 96\n", - "3 95\n", - "6 94\n", - "5 93\n", - "11 88\n", - "Name: team, dtype: int64\n", - "\n", - "🔹 Value counts for 'targeted_productivity':\n", - "0.80 540\n", - "0.70 242\n", - "0.75 216\n", - "0.65 63\n", - "0.60 57\n", - "0.50 49\n", - "0.35 27\n", - "0.40 2\n", - "0.07 1\n", - "Name: targeted_productivity, dtype: int64\n", - "\n", - "🔹 Value counts for 'smv':\n", - "3.94 192\n", - "2.90 108\n", - "22.52 103\n", - "30.10 79\n", - "4.15 76\n", - " ... \n", - "20.20 1\n", - "20.55 1\n", - "50.89 1\n", - "26.87 1\n", - "42.41 1\n", - "Name: smv, Length: 70, dtype: int64\n", - "\n", - "🔹 Value counts for 'wip':\n", - "NaN 506\n", - "1039.0 5\n", - "1282.0 4\n", - "968.0 3\n", - "1144.0 3\n", - " ... \n", - "1186.0 1\n", - "891.0 1\n", - "1498.0 1\n", - "551.0 1\n", - "1239.0 1\n", - "Name: wip, Length: 549, dtype: int64\n", - "\n", - "🔹 Value counts for 'over_time':\n", - "960 129\n", - "1440 111\n", - "6960 61\n", - "6840 48\n", - "1200 39\n", - " ... \n", - "5580 1\n", - "13800 1\n", - "8970 1\n", - "7680 1\n", - "2040 1\n", - "Name: over_time, Length: 143, dtype: int64\n", - "\n", - "🔹 Value counts for 'incentive':\n", - "0 604\n", - "50 113\n", - "63 61\n", - "45 54\n", - "30 52\n", - "23 38\n", - "38 29\n", - "60 28\n", - "40 27\n", - "75 24\n", - "113 21\n", - "88 19\n", - "34 17\n", - "56 14\n", - "26 9\n", - "55 7\n", - "81 7\n", - "100 7\n", - "69 6\n", - "65 6\n", - "70 6\n", - "960 5\n", - "35 5\n", - "94 4\n", - "44 4\n", - "90 3\n", - "46 2\n", - "24 2\n", - "27 2\n", - "119 2\n", - "49 2\n", - "98 1\n", - "21 1\n", - "1200 1\n", - "62 1\n", - "25 1\n", - "1080 1\n", - "93 1\n", - "29 1\n", - "1440 1\n", - "32 1\n", - "33 1\n", - "2880 1\n", - "54 1\n", - "37 1\n", - "138 1\n", - "53 1\n", - "3600 1\n", - "Name: incentive, dtype: int64\n", - "\n", - "🔹 Value counts for 'idle_time':\n", - "0.0 1179\n", - "3.5 3\n", - "4.0 2\n", - "4.5 2\n", - "8.0 2\n", - "5.0 2\n", - "2.0 2\n", - "270.0 1\n", - "150.0 1\n", - "6.5 1\n", - "300.0 1\n", - "90.0 1\n", - "Name: idle_time, dtype: int64\n", - "\n", - "🔹 Value counts for 'idle_men':\n", - "0 1179\n", - "30 3\n", - "20 3\n", - "15 3\n", - "10 3\n", - "35 2\n", - "45 1\n", - "40 1\n", - "37 1\n", - "25 1\n", - "Name: idle_men, dtype: int64\n", - "\n", - "🔹 Value counts for 'no_of_style_change':\n", - "0 1050\n", - "1 114\n", - "2 33\n", - "Name: no_of_style_change, dtype: int64\n", - "\n", - "🔹 Value counts for 'no_of_workers':\n", - "8.0 262\n", - "58.0 114\n", - "57.0 109\n", - "59.0 75\n", - "10.0 60\n", - " ... \n", - "29.5 1\n", - "11.0 1\n", - "21.0 1\n", - "6.0 1\n", - "33.5 1\n", - "Name: no_of_workers, Length: 61, dtype: int64\n", - "\n", - "🔹 Value counts for 'actual_productivity':\n", - "0.800402 24\n", - "0.971867 12\n", - "0.850137 12\n", - "0.750651 11\n", - "0.850502 11\n", - " ..\n", - "0.460579 1\n", - "0.852794 1\n", - "0.585316 1\n", - "1.000457 1\n", - "0.700888 1\n", - "Name: actual_productivity, Length: 879, dtype: int64\n" - ] - } - ], - "source": [ - "for col in df.columns:\n", - " print(f\"\\n🔹 Value counts for '{col}':\")\n", - " print(df[col].value_counts(dropna=False)) # This shows all values\n" - ] - }, { "cell_type": "code", "execution_count": 265, @@ -684,6 +399,7 @@ } ], "source": [ + "#checking for any existing null values\n", "df.isnull().sum()" ] }, -- GitLab