diff --git a/Datasets/ALL32USERS15MIN_WITHTHRESHOLD.xlsx b/Datasets/ALL32USERS15MIN_WITHTHRESHOLD.xlsx new file mode 100644 index 0000000..eb44793 Binary files /dev/null and b/Datasets/ALL32USERS15MIN_WITHTHRESHOLD.xlsx differ diff --git a/Datasets/ALL32USERS1HR_WITHTHRESHOLD.xlsx b/Datasets/ALL32USERS1HR_WITHTHRESHOLD.xlsx new file mode 100644 index 0000000..316a3a1 Binary files /dev/null and b/Datasets/ALL32USERS1HR_WITHTHRESHOLD.xlsx differ diff --git a/Datasets/ALLUSERS32_15MIN_WITHOUTTHREHOLD .xlsx b/Datasets/ALLUSERS32_15MIN_WITHOUTTHREHOLD .xlsx new file mode 100644 index 0000000..8e4d9ed Binary files /dev/null and b/Datasets/ALLUSERS32_15MIN_WITHOUTTHREHOLD .xlsx differ diff --git a/Datasets/ALLUSERS_32_1HR_WITHOUT_THRESHOLD.xlsx b/Datasets/ALLUSERS_32_1HR_WITHOUT_THRESHOLD.xlsx new file mode 100644 index 0000000..74b6974 Binary files /dev/null and b/Datasets/ALLUSERS_32_1HR_WITHOUT_THRESHOLD.xlsx differ diff --git a/MODIFICATIONSANDINSTRUCTIONS.pdf b/MODIFICATIONSANDINSTRUCTIONS.pdf new file mode 100644 index 0000000..dd9ec12 Binary files /dev/null and b/MODIFICATIONSANDINSTRUCTIONS.pdf differ diff --git a/final-32-automated-code-new(1).ipynb b/final-32-automated-code-new(1).ipynb new file mode 100644 index 0000000..5db8cea --- /dev/null +++ b/final-32-automated-code-new(1).ipynb @@ -0,0 +1,987 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19", + "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5", + "trusted": true + }, + "outputs": [], + "source": [ + "# This Python 3 environment comes with many helpful analytics libraries installed\n", + "# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python\n", + "# For example, here's several helpful packages to load\n", + "\n", + "import numpy as np # linear algebra\n", + "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n", + "\n", + "# Input data files are available in the read-only \"../input/\" directory\n", + "# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n", + "\n", + "import os\n", + "for dirname, _, filenames in os.walk('/kaggle/input'):\n", + " for filename in filenames:\n", + " print(os.path.join(dirname, filename))\n", + "\n", + "# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\" \n", + "# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": true, + "execution": { + "iopub.execute_input": "2025-05-02T07:51:57.538752Z", + "iopub.status.busy": "2025-05-02T07:51:57.538555Z", + "iopub.status.idle": "2025-05-02T08:46:51.909800Z", + "shell.execute_reply": "2025-05-02T08:46:51.909147Z", + "shell.execute_reply.started": "2025-05-02T07:51:57.538734Z" + }, + "jupyter": { + "outputs_hidden": true + }, + "trusted": true + }, + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'sklearn'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[6], line 11\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mkeras_tuner\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mkt\u001b[39;00m\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mkeras_tuner\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m RandomSearch\n\u001b[0;32m---> 11\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01msklearn\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmetrics\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m accuracy_score\n\u001b[1;32m 13\u001b[0m \u001b[38;5;66;03m# === Clean previous tuning directory ===\u001b[39;00m\n\u001b[1;32m 14\u001b[0m shutil\u001b[38;5;241m.\u001b[39mrmtree(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m/kaggle/working/my_dir\u001b[39m\u001b[38;5;124m\"\u001b[39m, ignore_errors\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'sklearn'" + ] + } + ], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import shutil\n", + "import os\n", + "from tensorflow.keras.models import Sequential\n", + "from tensorflow.keras.layers import LSTM, Dense, Dropout,GRU,Bidirectional\n", + "from tensorflow.keras.optimizers import Adam\n", + "from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping\n", + "import keras_tuner as kt\n", + "from keras_tuner import RandomSearch\n", + "from sklearn.metrics import accuracy_score\n", + "\n", + "# === Clean previous tuning directory ===\n", + "shutil.rmtree(\"/kaggle/working/my_dir\", ignore_errors=True)\n", + "\n", + "# === Load dataset ===\n", + "file_path = '/kaggle/input/32usrs/ALLUSERS32_15MIN_WITHOUTTHREHOLD.xlsx' \n", + "\n", + "df = pd.read_excel(file_path)\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "trusted": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "trusted": true + }, + "outputs": [], + "source": [ + "# === Helper functions for scenario selection ===\n", + "def get_user_input_for_scenario(scenario_type):\n", + " print(f\"\\nPlease define your custom {scenario_type} scenario:\")\n", + " years_input = input(f\"Enter {scenario_type} years (comma-separated, e.g., 2017,2018): \").strip()\n", + " years = list(map(int, years_input.split(',')))\n", + " years_months = []\n", + " for year in years:\n", + " months_input = input(f\"Enter months for year {year} (comma-separated, e.g., 1,2,3): \").strip()\n", + " months = list(map(int, months_input.split(',')))\n", + " years_months.append((year, months))\n", + " return years_months\n", + "\n", + "def display_warning_about_2020_data():\n", + " print(\"\\n⚠️ Warning: 2020 data after February is excluded due to COVID-19.\")\n", + " print(\"✅ Only Jan and Feb 2020 are used for testing. Do not use them in training/validation.\")\n", + "\n", + "def display_warnings_for_scenarios(scenario_type):\n", + " if scenario_type == \"training\":\n", + " print(\"\\n⚠️ Predefined Training Scenarios (for reference only):\")\n", + " for name, scenario in predefined_training_scenarios.items():\n", + " parts = [f\"{year}-{months}\" for year, months in scenario['years_months']]\n", + " print(f\" {name}: {', '.join(parts)}\")\n", + " elif scenario_type == \"validation\":\n", + " print(\"\\n⚠️ Predefined Validation Scenario:\")\n", + " for name, scenario in predefined_validation_scenarios.items():\n", + " parts = [f\"{year}-{months}\" for year, months in scenario['years_months']]\n", + " print(f\" {name}: {', '.join(parts)}\")\n", + " print(\" - This uses Oct, Nov, Dec of 2019\")\n", + "\n", + "predefined_training_scenarios = {\n", + " \"Scenario 1\": {\"years_months\": [(2018, list(range(1, 13))), (2019, list(range(1, 10)))]},\n", + " \"Scenario 2\": {\"years_months\": [(2017, list(range(1, 13))), (2018, list(range(1, 13))), (2019, list(range(1, 10)))]}\n", + "}\n", + "predefined_validation_scenarios = {\n", + " \"Scenario A\": {\"years_months\": [(2019, [10, 11, 12])]}\n", + "}\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "trusted": true + }, + "outputs": [], + "source": [ + "# === Get user-defined training and validation scenarios ===\n", + "print(\"=== Training Scenario Setup ===\")\n", + "display_warning_about_2020_data()\n", + "display_warnings_for_scenarios(\"training\")\n", + "training_scenario = get_user_input_for_scenario(\"training\")\n", + "\n", + "print(\"\\n=== Validation Scenario Setup ===\")\n", + "display_warning_about_2020_data()\n", + "display_warnings_for_scenarios(\"validation\")\n", + "validation_scenario = get_user_input_for_scenario(\"validation\")\n", + "\n", + "# === Filter and preprocess data ===\n", + "def filter_data(df, scenario):\n", + " filtered = pd.DataFrame()\n", + " for year, months in scenario:\n", + " filtered = pd.concat([filtered, df[(df['Year'] == year) & (df['Month'].isin(months))]])\n", + " return filtered.drop(columns=['Month', 'Year', 'date', 'DayOfWeek']) \n", + "\n", + "data = filter_data(df, training_scenario)\n", + "data_val = filter_data(df, validation_scenario)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "trusted": true + }, + "outputs": [], + "source": [ + "\n", + "\n", + "# === Organize by user ===\n", + "df_sorted = data.sort_values(by='user').reset_index(drop=True)\n", + "df_sorted_val = data_val.sort_values(by='user').reset_index(drop=True)\n", + "users = df_sorted['user'].unique()\n", + "users_val = df_sorted_val['user'].unique()\n", + "\n", + "user_data = {user: df_sorted[df_sorted['user'] == user] for user in users}\n", + "user_data_val = {user: df_sorted_val[df_sorted_val['user'] == user] for user in users_val}\n", + "\n", + "# === Callbacks ===\n", + "early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)\n", + "lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, verbose=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "trusted": true + }, + "outputs": [], + "source": [ + "# === Model tuning and training loop ===\n", + "best_models = {}\n", + "\n", + "for sequence_length in range(20, 30, 5):\n", + " print(f\"\\n=== Training for Sequence Length: {sequence_length} ===\")\n", + "\n", + " # Training data\n", + " X, y = [], []\n", + " for user, data in user_data.items():\n", + " features = data.drop('user', axis=1).values\n", + " labels = data['user'].values\n", + " for i in range(len(features) - sequence_length):\n", + " X.append(features[i:i + sequence_length])\n", + " y.append(labels[i + sequence_length])\n", + " X = np.array(X)\n", + " y = np.array(y)\n", + "\n", + " # Validation data\n", + " X_val, y_val = [], []\n", + " for user, data in user_data_val.items():\n", + " features = data.drop('user', axis=1).values\n", + " labels = data['user'].values\n", + " for i in range(len(features) - sequence_length):\n", + " X_val.append(features[i:i + sequence_length])\n", + " y_val.append(labels[i + sequence_length])\n", + " X_val = np.array(X_val)\n", + " y_val = np.array(y_val)\n", + "\n", + " if X.shape[0] == 0 or X_val.shape[0] == 0:\n", + " print(f\"⚠️ Skipped sequence length {sequence_length} due to insufficient data.\")\n", + " continue\n", + "\n", + " n_features = X.shape[2]\n", + "\n", + " def build_model(hp):\n", + " model = Sequential()\n", + " model.add(Bidirectional(LSTM(units=hp.Int('units', 32, 256, step=2),\n", + " input_shape=(sequence_length, n_features))))\n", + " model.add(Dropout(hp.Float('dropout_rate', 0.1, 0.5, step=0.1)))\n", + " model.add(Dense(len(users), activation='softmax'))\n", + " model.compile(\n", + " optimizer=Adam(learning_rate=hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])),\n", + " loss='sparse_categorical_crossentropy',\n", + " metrics=['accuracy']\n", + " )\n", + " return model\n", + "\n", + " tuner = RandomSearch(\n", + " build_model,\n", + " objective='val_loss',\n", + " max_trials=30,\n", + " executions_per_trial=2,\n", + " directory='/kaggle/working/my_dir',\n", + " project_name=f'lstm_seq_{sequence_length}'\n", + " )\n", + "\n", + " tuner.search(X, y, epochs=30, validation_data=(X_val, y_val),\n", + " callbacks=[early_stopping, lr_scheduler], verbose=1)\n", + "\n", + " best_hps = tuner.get_best_hyperparameters(1)[0]\n", + " best_model = tuner.hypermodel.build(best_hps)\n", + " best_model.fit(X, y, epochs=30, validation_data=(X_val, y_val),\n", + " callbacks=[early_stopping, lr_scheduler], verbose=0)\n", + "\n", + " best_models[sequence_length] = {\n", + " 'model': best_model,\n", + " 'best_hyperparameters': {\n", + " 'units': best_hps.get('units'),\n", + " 'dropout_rate': best_hps.get('dropout_rate'),\n", + " 'learning_rate': best_hps.get('learning_rate')\n", + " }\n", + " }" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "trusted": true + }, + "outputs": [], + "source": [ + "\n", + "\n", + "# === Get test scenario input ===\n", + "def get_user_input_for_test():\n", + " print(\"\\n=== Testing Scenario Setup ===\")\n", + " print(\"⚠️ Only January and February of 2020 were used for testing in predefined setup.\")\n", + " print(\"⚠️ Avoid using 2020 data after February due to COVID-19 impact.\\n\")\n", + " years_input = input(\"Enter test years (comma-separated, e.g., 2020): \").strip()\n", + " years = list(map(int, years_input.split(',')))\n", + " years_months = []\n", + " for year in years:\n", + " months_input = input(f\"Enter months for year {year} (comma-separated, e.g., 1,2): \").strip()\n", + " months = list(map(int, months_input.split(',')))\n", + " years_months.append((year, months))\n", + " return years_months\n", + "\n", + "def filter_test_data(df, scenario):\n", + " data_parts = []\n", + " for year, months in scenario:\n", + " part = df[(df['Year'] == year) & (df['Month'].isin(months))]\n", + " data_parts.append(part)\n", + " return pd.concat(data_parts, ignore_index=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "execution": { + "iopub.execute_input": "2025-05-02T08:53:17.334789Z", + "iopub.status.busy": "2025-05-02T08:53:17.334489Z", + "iopub.status.idle": "2025-05-02T08:53:17.344855Z", + "shell.execute_reply": "2025-05-02T08:53:17.344176Z", + "shell.execute_reply.started": "2025-05-02T08:53:17.334766Z" + }, + "trusted": true + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import os\n", + "\n", + "def evaluate_model_on_test_data(model, test_df, sequence_length, excel_writer):\n", + " print(\"\\n🧪 Evaluating on Test Data...\")\n", + " test_df = test_df.drop(columns=['Month', 'Year', 'date', 'DayOfWeek'])\n", + " test_df = test_df.sort_values(by='user').reset_index(drop=True)\n", + "\n", + " users = test_df['user'].unique()\n", + " results = []\n", + " accuracy_above_50 = 0\n", + "\n", + " for user in users:\n", + " user_df = test_df[test_df['user'] == user]\n", + " X, y_true = [], []\n", + " user_features = user_df.drop(columns=['user']).values\n", + " user_labels = user_df['user'].values\n", + "\n", + " if len(user_df) <= sequence_length:\n", + " print(f\"Skipping User {user} (not enough data for sequence length {sequence_length})\")\n", + " continue\n", + "\n", + " for i in range(len(user_df) - sequence_length):\n", + " seq_x = user_features[i:i + sequence_length]\n", + " seq_y = user_labels[i + sequence_length]\n", + " X.append(seq_x)\n", + " y_true.append(seq_y)\n", + "\n", + " X = np.array(X)\n", + " y_true = np.array(y_true)\n", + "\n", + " if len(X) == 0:\n", + " continue\n", + "\n", + " y_pred = model.predict(X, verbose=0)\n", + " y_pred_classes = np.argmax(y_pred, axis=1)\n", + "\n", + " unique_pred, counts_pred = np.unique(y_pred_classes, return_counts=True)\n", + " label_counts_pred = dict(zip(unique_pred, counts_pred))\n", + "\n", + " unique_true, counts_true = np.unique(y_true, return_counts=True)\n", + " label_counts_true = dict(zip(unique_true, counts_true))\n", + "\n", + " acc = accuracy_score(y_true, y_pred_classes)\n", + " if acc > 0.5:\n", + " accuracy_above_50 += 1\n", + "\n", + " # Append result to list\n", + " results.append({\n", + " 'User': user,\n", + " 'Accuracy (%)': acc * 100,\n", + " 'Predicted Class Distribution': str(label_counts_pred),\n", + " 'Actual Class Distribution': str(label_counts_true)\n", + " })\n", + "\n", + " print(f\"\\n=== User {user} ===\")\n", + " print(f\"✅ Accuracy: {acc * 100:.2f}%\")\n", + " print(\"📊 Predicted Class Distribution:\", label_counts_pred)\n", + " print(\"📌 Actual Class Distribution: \", label_counts_true)\n", + "\n", + " final_accuracy_percent = (accuracy_above_50 / 32) * 100\n", + " print(f\"\\n🟩 Final Evaluation Summary for Sequence Length {sequence_length}:\")\n", + " print(f\"Users with >50% Accuracy: {accuracy_above_50} / 32\")\n", + " print(f\"✅ Final Success Rate: {final_accuracy_percent:.2f}%\")\n", + "\n", + " # Append overall stats as a new row\n", + " results.append({\n", + " 'User': 'TOTAL',\n", + " 'Accuracy (%)': '',\n", + " 'Predicted Class Distribution': f'Users >50% Acc: {accuracy_above_50}/32',\n", + " 'Actual Class Distribution': f'Success Rate: {final_accuracy_percent:.2f}%'\n", + " })\n", + "\n", + " # Save results to Excel sheet\n", + " df_results = pd.DataFrame(results)\n", + " df_results.to_excel(excel_writer, sheet_name=f\"SeqLen_{sequence_length}\", index=False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": true, + "execution": { + "iopub.execute_input": "2025-05-02T08:56:14.082755Z", + "iopub.status.busy": "2025-05-02T08:56:14.082010Z", + "iopub.status.idle": "2025-05-02T08:56:28.518300Z", + "shell.execute_reply": "2025-05-02T08:56:28.517562Z", + "shell.execute_reply.started": "2025-05-02T08:56:14.082721Z" + }, + "jupyter": { + "outputs_hidden": true + }, + "trusted": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "=== Testing Scenario Setup ===\n", + "⚠️ Only January and February of 2020 were used for testing in predefined setup.\n", + "⚠️ Avoid using 2020 data after February due to COVID-19 impact.\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Enter test years (comma-separated, e.g., 2020): 2020\n", + "Enter months for year 2020 (comma-separated, e.g., 1,2): 1,2\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "🔍 Testing Model for Sequence Length: 20\n", + "\n", + "🧪 Evaluating on Test Data...\n", + "\n", + "=== User 0 ===\n", + "✅ Accuracy: 47.50%\n", + "📊 Predicted Class Distribution: {0: 19, 18: 9, 24: 7, 26: 1, 30: 3, 31: 1}\n", + "📌 Actual Class Distribution: {0: 40}\n", + "\n", + "=== User 1 ===\n", + "✅ Accuracy: 82.50%\n", + "📊 Predicted Class Distribution: {1: 33, 31: 7}\n", + "📌 Actual Class Distribution: {1: 40}\n", + "\n", + "=== User 2 ===\n", + "✅ Accuracy: 0.00%\n", + "📊 Predicted Class Distribution: {6: 2, 12: 12, 17: 13, 30: 12, 31: 1}\n", + "📌 Actual Class Distribution: {2: 40}\n", + "\n", + "=== User 3 ===\n", + "✅ Accuracy: 41.03%\n", + "📊 Predicted Class Distribution: {3: 16, 6: 1, 12: 8, 29: 13, 30: 1}\n", + "📌 Actual Class Distribution: {3: 39}\n", + "\n", + "=== User 4 ===\n", + "✅ Accuracy: 2.50%\n", + "📊 Predicted Class Distribution: {2: 1, 4: 1, 8: 2, 9: 3, 18: 11, 23: 3, 26: 16, 29: 1, 30: 1, 31: 1}\n", + "📌 Actual Class Distribution: {4: 40}\n", + "\n", + "=== User 5 ===\n", + "✅ Accuracy: 57.50%\n", + "📊 Predicted Class Distribution: {2: 5, 5: 23, 23: 2, 29: 6, 30: 3, 31: 1}\n", + "📌 Actual Class Distribution: {5: 40}\n", + "\n", + "=== User 6 ===\n", + "✅ Accuracy: 25.00%\n", + "📊 Predicted Class Distribution: {6: 10, 17: 1, 30: 5, 31: 24}\n", + "📌 Actual Class Distribution: {6: 40}\n", + "\n", + "=== User 7 ===\n", + "✅ Accuracy: 52.50%\n", + "📊 Predicted Class Distribution: {7: 21, 10: 3, 11: 14, 18: 2}\n", + "📌 Actual Class Distribution: {7: 40}\n", + "\n", + "=== User 8 ===\n", + "✅ Accuracy: 62.50%\n", + "📊 Predicted Class Distribution: {8: 25, 23: 1, 29: 8, 30: 6}\n", + "📌 Actual Class Distribution: {8: 40}\n", + "\n", + "=== User 9 ===\n", + "✅ Accuracy: 100.00%\n", + "📊 Predicted Class Distribution: {9: 40}\n", + "📌 Actual Class Distribution: {9: 40}\n", + "\n", + "=== User 10 ===\n", + "✅ Accuracy: 57.50%\n", + "📊 Predicted Class Distribution: {10: 23, 11: 15, 30: 2}\n", + "📌 Actual Class Distribution: {10: 40}\n", + "\n", + "=== User 11 ===\n", + "✅ Accuracy: 35.00%\n", + "📊 Predicted Class Distribution: {1: 1, 10: 15, 11: 14, 12: 1, 14: 4, 15: 2, 16: 2, 25: 1}\n", + "📌 Actual Class Distribution: {11: 40}\n", + "\n", + "=== User 12 ===\n", + "✅ Accuracy: 62.50%\n", + "📊 Predicted Class Distribution: {3: 1, 12: 25, 26: 14}\n", + "📌 Actual Class Distribution: {12: 40}\n", + "\n", + "=== User 13 ===\n", + "✅ Accuracy: 55.00%\n", + "📊 Predicted Class Distribution: {10: 3, 11: 3, 12: 2, 13: 22, 16: 1, 21: 9}\n", + "📌 Actual Class Distribution: {13: 40}\n", + "\n", + "=== User 14 ===\n", + "✅ Accuracy: 70.00%\n", + "📊 Predicted Class Distribution: {0: 1, 14: 28, 16: 2, 18: 7, 25: 2}\n", + "📌 Actual Class Distribution: {14: 40}\n", + "\n", + "=== User 15 ===\n", + "✅ Accuracy: 100.00%\n", + "📊 Predicted Class Distribution: {15: 40}\n", + "📌 Actual Class Distribution: {15: 40}\n", + "\n", + "=== User 16 ===\n", + "✅ Accuracy: 17.50%\n", + "📊 Predicted Class Distribution: {15: 20, 16: 7, 18: 13}\n", + "📌 Actual Class Distribution: {16: 40}\n", + "\n", + "=== User 17 ===\n", + "✅ Accuracy: 40.00%\n", + "📊 Predicted Class Distribution: {0: 2, 16: 6, 17: 16, 18: 1, 28: 1, 31: 14}\n", + "📌 Actual Class Distribution: {17: 40}\n", + "\n", + "=== User 18 ===\n", + "✅ Accuracy: 97.50%\n", + "📊 Predicted Class Distribution: {0: 1, 18: 39}\n", + "📌 Actual Class Distribution: {18: 40}\n", + "\n", + "=== User 19 ===\n", + "✅ Accuracy: 72.50%\n", + "📊 Predicted Class Distribution: {1: 3, 6: 7, 19: 29, 22: 1}\n", + "📌 Actual Class Distribution: {19: 40}\n", + "\n", + "=== User 20 ===\n", + "✅ Accuracy: 77.50%\n", + "📊 Predicted Class Distribution: {2: 8, 20: 31, 26: 1}\n", + "📌 Actual Class Distribution: {20: 40}\n", + "\n", + "=== User 21 ===\n", + "✅ Accuracy: 92.50%\n", + "📊 Predicted Class Distribution: {21: 37, 24: 3}\n", + "📌 Actual Class Distribution: {21: 40}\n", + "\n", + "=== User 22 ===\n", + "✅ Accuracy: 0.00%\n", + "📊 Predicted Class Distribution: {8: 4, 9: 2, 23: 1, 29: 27, 30: 1}\n", + "📌 Actual Class Distribution: {22: 35}\n", + "\n", + "=== User 23 ===\n", + "✅ Accuracy: 77.50%\n", + "📊 Predicted Class Distribution: {3: 9, 23: 31}\n", + "📌 Actual Class Distribution: {23: 40}\n", + "\n", + "=== User 24 ===\n", + "✅ Accuracy: 92.50%\n", + "📊 Predicted Class Distribution: {21: 3, 24: 37}\n", + "📌 Actual Class Distribution: {24: 40}\n", + "\n", + "=== User 25 ===\n", + "✅ Accuracy: 2.50%\n", + "📊 Predicted Class Distribution: {2: 14, 12: 11, 23: 1, 25: 1, 29: 4, 30: 9}\n", + "📌 Actual Class Distribution: {25: 40}\n", + "\n", + "=== User 26 ===\n", + "✅ Accuracy: 0.00%\n", + "📊 Predicted Class Distribution: {12: 18, 18: 3, 21: 13, 24: 6}\n", + "📌 Actual Class Distribution: {26: 40}\n", + "\n", + "=== User 27 ===\n", + "✅ Accuracy: 0.00%\n", + "📊 Predicted Class Distribution: {12: 38, 21: 1, 24: 1}\n", + "📌 Actual Class Distribution: {27: 40}\n", + "\n", + "=== User 28 ===\n", + "✅ Accuracy: 100.00%\n", + "📊 Predicted Class Distribution: {28: 40}\n", + "📌 Actual Class Distribution: {28: 40}\n", + "\n", + "=== User 29 ===\n", + "✅ Accuracy: 40.00%\n", + "📊 Predicted Class Distribution: {12: 12, 26: 1, 29: 16, 30: 11}\n", + "📌 Actual Class Distribution: {29: 40}\n", + "\n", + "=== User 30 ===\n", + "✅ Accuracy: 35.00%\n", + "📊 Predicted Class Distribution: {12: 1, 18: 9, 23: 5, 25: 3, 26: 3, 29: 2, 30: 14, 31: 3}\n", + "📌 Actual Class Distribution: {30: 40}\n", + "\n", + "=== User 31 ===\n", + "✅ Accuracy: 50.00%\n", + "📊 Predicted Class Distribution: {12: 2, 18: 18, 31: 20}\n", + "📌 Actual Class Distribution: {31: 40}\n", + "\n", + "🟩 Final Evaluation Summary for Sequence Length 20:\n", + "Users with >50% Accuracy: 17 / 32\n", + "✅ Final Success Rate: 53.12%\n", + "\n", + "🔍 Testing Model for Sequence Length: 25\n", + "\n", + "🧪 Evaluating on Test Data...\n", + "\n", + "=== User 0 ===\n", + "✅ Accuracy: 17.14%\n", + "📊 Predicted Class Distribution: {0: 6, 18: 2, 24: 3, 25: 2, 26: 14, 30: 7, 31: 1}\n", + "📌 Actual Class Distribution: {0: 35}\n", + "\n", + "=== User 1 ===\n", + "✅ Accuracy: 8.57%\n", + "📊 Predicted Class Distribution: {1: 3, 31: 32}\n", + "📌 Actual Class Distribution: {1: 35}\n", + "\n", + "=== User 2 ===\n", + "✅ Accuracy: 5.71%\n", + "📊 Predicted Class Distribution: {2: 2, 12: 5, 17: 11, 21: 1, 30: 3, 31: 13}\n", + "📌 Actual Class Distribution: {2: 35}\n", + "\n", + "=== User 3 ===\n", + "✅ Accuracy: 14.71%\n", + "📊 Predicted Class Distribution: {3: 5, 12: 1, 29: 5, 30: 16, 31: 7}\n", + "📌 Actual Class Distribution: {3: 34}\n", + "\n", + "=== User 4 ===\n", + "✅ Accuracy: 0.00%\n", + "📊 Predicted Class Distribution: {2: 4, 9: 4, 10: 1, 25: 7, 26: 5, 27: 1, 30: 12, 31: 1}\n", + "📌 Actual Class Distribution: {4: 35}\n", + "\n", + "=== User 5 ===\n", + "✅ Accuracy: 100.00%\n", + "📊 Predicted Class Distribution: {5: 35}\n", + "📌 Actual Class Distribution: {5: 35}\n", + "\n", + "=== User 6 ===\n", + "✅ Accuracy: 31.43%\n", + "📊 Predicted Class Distribution: {6: 11, 31: 24}\n", + "📌 Actual Class Distribution: {6: 35}\n", + "\n", + "=== User 7 ===\n", + "✅ Accuracy: 65.71%\n", + "📊 Predicted Class Distribution: {7: 23, 10: 3, 13: 9}\n", + "📌 Actual Class Distribution: {7: 35}\n", + "\n", + "=== User 8 ===\n", + "✅ Accuracy: 82.86%\n", + "📊 Predicted Class Distribution: {4: 2, 8: 29, 22: 2, 30: 2}\n", + "📌 Actual Class Distribution: {8: 35}\n", + "\n", + "=== User 9 ===\n", + "✅ Accuracy: 97.14%\n", + "📊 Predicted Class Distribution: {4: 1, 9: 34}\n", + "📌 Actual Class Distribution: {9: 35}\n", + "\n", + "=== User 10 ===\n", + "✅ Accuracy: 40.00%\n", + "📊 Predicted Class Distribution: {10: 14, 13: 6, 23: 3, 25: 2, 30: 10}\n", + "📌 Actual Class Distribution: {10: 35}\n", + "\n", + "=== User 11 ===\n", + "✅ Accuracy: 31.43%\n", + "📊 Predicted Class Distribution: {10: 22, 11: 11, 12: 1, 19: 1}\n", + "📌 Actual Class Distribution: {11: 35}\n", + "\n", + "=== User 12 ===\n", + "✅ Accuracy: 57.14%\n", + "📊 Predicted Class Distribution: {12: 20, 29: 15}\n", + "📌 Actual Class Distribution: {12: 35}\n", + "\n", + "=== User 13 ===\n", + "✅ Accuracy: 57.14%\n", + "📊 Predicted Class Distribution: {12: 1, 13: 20, 21: 14}\n", + "📌 Actual Class Distribution: {13: 35}\n", + "\n", + "=== User 14 ===\n", + "✅ Accuracy: 62.86%\n", + "📊 Predicted Class Distribution: {0: 4, 14: 22, 15: 2, 18: 7}\n", + "📌 Actual Class Distribution: {14: 35}\n", + "\n", + "=== User 15 ===\n", + "✅ Accuracy: 100.00%\n", + "📊 Predicted Class Distribution: {15: 35}\n", + "📌 Actual Class Distribution: {15: 35}\n", + "\n", + "=== User 16 ===\n", + "✅ Accuracy: 40.00%\n", + "📊 Predicted Class Distribution: {7: 2, 15: 13, 16: 14, 18: 6}\n", + "📌 Actual Class Distribution: {16: 35}\n", + "\n", + "=== User 17 ===\n", + "✅ Accuracy: 65.71%\n", + "📊 Predicted Class Distribution: {0: 1, 16: 11, 17: 23}\n", + "📌 Actual Class Distribution: {17: 35}\n", + "\n", + "=== User 18 ===\n", + "✅ Accuracy: 82.86%\n", + "📊 Predicted Class Distribution: {0: 6, 18: 29}\n", + "📌 Actual Class Distribution: {18: 35}\n", + "\n", + "=== User 19 ===\n", + "✅ Accuracy: 60.00%\n", + "📊 Predicted Class Distribution: {6: 13, 19: 21, 22: 1}\n", + "📌 Actual Class Distribution: {19: 35}\n", + "\n", + "=== User 20 ===\n", + "✅ Accuracy: 5.71%\n", + "📊 Predicted Class Distribution: {2: 33, 20: 2}\n", + "📌 Actual Class Distribution: {20: 35}\n", + "\n", + "=== User 21 ===\n", + "✅ Accuracy: 100.00%\n", + "📊 Predicted Class Distribution: {21: 35}\n", + "📌 Actual Class Distribution: {21: 35}\n", + "\n", + "=== User 22 ===\n", + "✅ Accuracy: 0.00%\n", + "📊 Predicted Class Distribution: {8: 2, 9: 2, 29: 26}\n", + "📌 Actual Class Distribution: {22: 30}\n", + "\n", + "=== User 23 ===\n", + "✅ Accuracy: 65.71%\n", + "📊 Predicted Class Distribution: {3: 4, 23: 23, 30: 8}\n", + "📌 Actual Class Distribution: {23: 35}\n", + "\n", + "=== User 24 ===\n", + "✅ Accuracy: 100.00%\n", + "📊 Predicted Class Distribution: {24: 35}\n", + "📌 Actual Class Distribution: {24: 35}\n", + "\n", + "=== User 25 ===\n", + "✅ Accuracy: 0.00%\n", + "📊 Predicted Class Distribution: {2: 33, 12: 1, 30: 1}\n", + "📌 Actual Class Distribution: {25: 35}\n", + "\n", + "=== User 26 ===\n", + "✅ Accuracy: 0.00%\n", + "📊 Predicted Class Distribution: {12: 29, 21: 6}\n", + "📌 Actual Class Distribution: {26: 35}\n", + "\n", + "=== User 27 ===\n", + "✅ Accuracy: 0.00%\n", + "📊 Predicted Class Distribution: {12: 35}\n", + "📌 Actual Class Distribution: {27: 35}\n", + "\n", + "=== User 28 ===\n", + "✅ Accuracy: 100.00%\n", + "📊 Predicted Class Distribution: {28: 35}\n", + "📌 Actual Class Distribution: {28: 35}\n", + "\n", + "=== User 29 ===\n", + "✅ Accuracy: 28.57%\n", + "📊 Predicted Class Distribution: {2: 1, 12: 2, 26: 8, 29: 10, 30: 14}\n", + "📌 Actual Class Distribution: {29: 35}\n", + "\n", + "=== User 30 ===\n", + "✅ Accuracy: 34.29%\n", + "📊 Predicted Class Distribution: {2: 4, 26: 2, 27: 4, 29: 13, 30: 12}\n", + "📌 Actual Class Distribution: {30: 35}\n", + "\n", + "=== User 31 ===\n", + "✅ Accuracy: 60.00%\n", + "📊 Predicted Class Distribution: {12: 1, 16: 1, 18: 12, 31: 21}\n", + "📌 Actual Class Distribution: {31: 35}\n", + "\n", + "🟩 Final Evaluation Summary for Sequence Length 25:\n", + "Users with >50% Accuracy: 16 / 32\n", + "✅ Final Success Rate: 50.00%\n", + "\n", + "✅ All evaluations completed. Results saved to: /kaggle/working/evaluation_results.xlsx\n" + ] + } + ], + "source": [ + "from pandas import ExcelWriter\n", + "\n", + "# === Run evaluation for each trained sequence length ===\n", + "test_scenario = get_user_input_for_test()\n", + "test_data = filter_test_data(df, test_scenario)\n", + "\n", + "output_excel_path = \"/kaggle/working/evaluation_results.xlsx\"\n", + "\n", + "with ExcelWriter(output_excel_path) as writer:\n", + " for sequence_length, result in best_models.items():\n", + " print(f\"\\n🔍 Testing Model for Sequence Length: {sequence_length}\")\n", + " evaluate_model_on_test_data(\n", + " result['model'],\n", + " test_data.copy(),\n", + " sequence_length,\n", + " writer # 👈 pass the writer\n", + " )\n", + "\n", + "print(f\"\\n✅ All evaluations completed. Results saved to: {output_excel_path}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "trusted": true + }, + "outputs": [], + "source": [ + "\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "trusted": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "trusted": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "trusted": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "trusted": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "trusted": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "trusted": true + }, + "outputs": [], + "source": [ + "\n", + "# # === Evaluation function (your version) ===\n", + "# def evaluate_model_on_test_data(model, test_df, sequence_length):\n", + "# print(\"\\n🧪 Evaluating on Test Data...\")\n", + "# test_df = test_df.drop(columns=['Month', 'Year', 'date', 'DayOfWeek'])\n", + "# test_df = test_df.sort_values(by='user').reset_index(drop=True)\n", + "\n", + "# users = test_df['user'].unique()\n", + "# results = {}\n", + "# accuracy_above_50 = 0\n", + "\n", + "# for user in users:\n", + "# user_df = test_df[test_df['user'] == user]\n", + "# X, y_true = [], []\n", + "# user_features = user_df.drop(columns=['user']).values\n", + "# user_labels = user_df['user'].values\n", + "\n", + "# if len(user_df) <= sequence_length:\n", + "# print(f\"Skipping User {user} (not enough data for sequence length {sequence_length})\")\n", + "# continue\n", + "\n", + "# for i in range(len(user_df) - sequence_length):\n", + "# seq_x = user_features[i:i + sequence_length]\n", + "# seq_y = user_labels[i + sequence_length]\n", + "# X.append(seq_x)\n", + "# y_true.append(seq_y)\n", + "\n", + "# X = np.array(X)\n", + "# y_true = np.array(y_true)\n", + "\n", + "# if len(X) == 0:\n", + "# continue\n", + "\n", + "# y_pred = model.predict(X, verbose=0)\n", + "# y_pred_classes = np.argmax(y_pred, axis=1)\n", + "\n", + "# unique_pred, counts_pred = np.unique(y_pred_classes, return_counts=True)\n", + "# label_counts_pred = dict(zip(unique_pred, counts_pred))\n", + "\n", + "# unique_true, counts_true = np.unique(y_true, return_counts=True)\n", + "# label_counts_true = dict(zip(unique_true, counts_true))\n", + "\n", + "# acc = accuracy_score(y_true, y_pred_classes)\n", + "# if acc > 0.5:\n", + "# accuracy_above_50 += 1\n", + "\n", + "# results[user] = {\n", + "# 'accuracy': acc,\n", + "# 'predicted_counts': label_counts_pred,\n", + "# 'actual_counts': label_counts_true\n", + "# }\n", + "\n", + "# print(f\"\\n=== User {user} ===\")\n", + "# print(f\"✅ Accuracy: {acc * 100:.2f}%\")\n", + "# print(\"📊 Predicted Class Distribution:\", label_counts_pred)\n", + "# print(\"📌 Actual Class Distribution: \", label_counts_true)\n", + "\n", + "# final_accuracy_percent = (accuracy_above_50 / 32) * 100\n", + "# print(f\"\\n🟩 Final Evaluation Summary for Sequence Length {sequence_length}:\")\n", + "# print(f\"Users with >50% Accuracy: {accuracy_above_50} / 32\")\n", + "# print(f\"✅ Final Success Rate: {final_accuracy_percent:.2f}%\")\n", + "\n", + "# # === Run evaluation for each trained sequence length ===\n", + "# test_scenario = get_user_input_for_test()\n", + "# test_data = filter_test_data(df, test_scenario)\n", + "\n", + "# for sequence_length, result in best_models.items():\n", + "# print(f\"\\n🔍 Testing Model for Sequence Length: {sequence_length}\")\n", + "# evaluate_model_on_test_data(result['model'], test_data.copy(), sequence_length)\n", + "\n", + "# print(\"\\n✅ All evaluations completed.\")\n" + ] + } + ], + "metadata": { + "kaggle": { + "accelerator": "nvidiaTeslaT4", + "dataSources": [ + { + "datasetId": 5775075, + "sourceId": 9494285, + "sourceType": "datasetVersion" + } + ], + "dockerImageVersionId": 31011, + "isGpuEnabled": true, + "isInternetEnabled": true, + "language": "python", + "sourceType": "notebook" + }, + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.18" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}