Added the dataset files as well as the original jupyter notebook and the provided instructions.

6 months ago · 43867979ef
6 changed files with 987 additions and 0 deletions
--- a/Datasets/ALL32USERS15MIN_WITHTHRESHOLD.xlsx
+++ b/Datasets/ALL32USERS15MIN_WITHTHRESHOLD.xlsx
--- a/Datasets/ALL32USERS1HR_WITHTHRESHOLD.xlsx
+++ b/Datasets/ALL32USERS1HR_WITHTHRESHOLD.xlsx
--- a/Datasets/ALLUSERS32_15MIN_WITHOUTTHREHOLD
+++ b/Datasets/ALLUSERS32_15MIN_WITHOUTTHREHOLD
--- a/Datasets/ALLUSERS_32_1HR_WITHOUT_THRESHOLD.xlsx
+++ b/Datasets/ALLUSERS_32_1HR_WITHOUT_THRESHOLD.xlsx
--- a/MODIFICATIONSANDINSTRUCTIONS.pdf
+++ b/MODIFICATIONSANDINSTRUCTIONS.pdf
--- a/final-32-automated-code-new(1).ipynb
+++ b/final-32-automated-code-new(1).ipynb
@ -0,0 +1,987 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
+    "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
+    "trusted": true
+   },
+   "outputs": [],
+   "source": [
+    "# This Python 3 environment comes with many helpful analytics libraries installed\n",
+    "# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python\n",
+    "# For example, here's several helpful packages to load\n",
+    "\n",
+    "import numpy as np # linear algebra\n",
+    "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n",
+    "\n",
+    "# Input data files are available in the read-only \"../input/\" directory\n",
+    "# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n",
+    "\n",
+    "import os\n",
+    "for dirname, _, filenames in os.walk('/kaggle/input'):\n",
+    "    for filename in filenames:\n",
+    "        print(os.path.join(dirname, filename))\n",
+    "\n",
+    "# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\" \n",
+    "# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "collapsed": true,
+    "execution": {
+     "iopub.execute_input": "2025-05-02T07:51:57.538752Z",
+     "iopub.status.busy": "2025-05-02T07:51:57.538555Z",
+     "iopub.status.idle": "2025-05-02T08:46:51.909800Z",
+     "shell.execute_reply": "2025-05-02T08:46:51.909147Z",
+     "shell.execute_reply.started": "2025-05-02T07:51:57.538734Z"
+    },
+    "jupyter": {
+     "outputs_hidden": true
+    },
+    "trusted": true
+   },
+   "outputs": [
+    {
+     "ename": "ModuleNotFoundError",
+     "evalue": "No module named 'sklearn'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[6], line 11\u001b[0m\n\u001b[1;32m      9\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mkeras_tuner\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mkt\u001b[39;00m\n\u001b[1;32m     10\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mkeras_tuner\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m RandomSearch\n\u001b[0;32m---> 11\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01msklearn\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmetrics\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m accuracy_score\n\u001b[1;32m     13\u001b[0m \u001b[38;5;66;03m# === Clean previous tuning directory ===\u001b[39;00m\n\u001b[1;32m     14\u001b[0m shutil\u001b[38;5;241m.\u001b[39mrmtree(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m/kaggle/working/my_dir\u001b[39m\u001b[38;5;124m\"\u001b[39m, ignore_errors\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n",
+      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'sklearn'"
+     ]
+    }
+   ],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import shutil\n",
+    "import os\n",
+    "from tensorflow.keras.models import Sequential\n",
+    "from tensorflow.keras.layers import LSTM, Dense, Dropout,GRU,Bidirectional\n",
+    "from tensorflow.keras.optimizers import Adam\n",
+    "from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping\n",
+    "import keras_tuner as kt\n",
+    "from keras_tuner import RandomSearch\n",
+    "from sklearn.metrics import accuracy_score\n",
+    "\n",
+    "# === Clean previous tuning directory ===\n",
+    "shutil.rmtree(\"/kaggle/working/my_dir\", ignore_errors=True)\n",
+    "\n",
+    "# === Load dataset ===\n",
+    "file_path = '/kaggle/input/32usrs/ALLUSERS32_15MIN_WITHOUTTHREHOLD.xlsx' \n",
+    "\n",
+    "df = pd.read_excel(file_path)\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "trusted": true
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "trusted": true
+   },
+   "outputs": [],
+   "source": [
+    "# === Helper functions for scenario selection ===\n",
+    "def get_user_input_for_scenario(scenario_type):\n",
+    "    print(f\"\\nPlease define your custom {scenario_type} scenario:\")\n",
+    "    years_input = input(f\"Enter {scenario_type} years (comma-separated, e.g., 2017,2018): \").strip()\n",
+    "    years = list(map(int, years_input.split(',')))\n",
+    "    years_months = []\n",
+    "    for year in years:\n",
+    "        months_input = input(f\"Enter months for year {year} (comma-separated, e.g., 1,2,3): \").strip()\n",
+    "        months = list(map(int, months_input.split(',')))\n",
+    "        years_months.append((year, months))\n",
+    "    return years_months\n",
+    "\n",
+    "def display_warning_about_2020_data():\n",
+    "    print(\"\\n⚠️ Warning: 2020 data after February is excluded due to COVID-19.\")\n",
+    "    print(\"✅ Only Jan and Feb 2020 are used for testing. Do not use them in training/validation.\")\n",
+    "\n",
+    "def display_warnings_for_scenarios(scenario_type):\n",
+    "    if scenario_type == \"training\":\n",
+    "        print(\"\\n⚠️ Predefined Training Scenarios (for reference only):\")\n",
+    "        for name, scenario in predefined_training_scenarios.items():\n",
+    "            parts = [f\"{year}-{months}\" for year, months in scenario['years_months']]\n",
+    "            print(f\"  {name}: {', '.join(parts)}\")\n",
+    "    elif scenario_type == \"validation\":\n",
+    "        print(\"\\n⚠️ Predefined Validation Scenario:\")\n",
+    "        for name, scenario in predefined_validation_scenarios.items():\n",
+    "            parts = [f\"{year}-{months}\" for year, months in scenario['years_months']]\n",
+    "            print(f\"  {name}: {', '.join(parts)}\")\n",
+    "        print(\"  - This uses Oct, Nov, Dec of 2019\")\n",
+    "\n",
+    "predefined_training_scenarios = {\n",
+    "    \"Scenario 1\": {\"years_months\": [(2018, list(range(1, 13))), (2019, list(range(1, 10)))]},\n",
+    "    \"Scenario 2\": {\"years_months\": [(2017, list(range(1, 13))), (2018, list(range(1, 13))), (2019, list(range(1, 10)))]}\n",
+    "}\n",
+    "predefined_validation_scenarios = {\n",
+    "    \"Scenario A\": {\"years_months\": [(2019, [10, 11, 12])]}\n",
+    "}\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "trusted": true
+   },
+   "outputs": [],
+   "source": [
+    "# === Get user-defined training and validation scenarios ===\n",
+    "print(\"=== Training Scenario Setup ===\")\n",
+    "display_warning_about_2020_data()\n",
+    "display_warnings_for_scenarios(\"training\")\n",
+    "training_scenario = get_user_input_for_scenario(\"training\")\n",
+    "\n",
+    "print(\"\\n=== Validation Scenario Setup ===\")\n",
+    "display_warning_about_2020_data()\n",
+    "display_warnings_for_scenarios(\"validation\")\n",
+    "validation_scenario = get_user_input_for_scenario(\"validation\")\n",
+    "\n",
+    "# === Filter and preprocess data ===\n",
+    "def filter_data(df, scenario):\n",
+    "    filtered = pd.DataFrame()\n",
+    "    for year, months in scenario:\n",
+    "        filtered = pd.concat([filtered, df[(df['Year'] == year) & (df['Month'].isin(months))]])\n",
+    "    return filtered.drop(columns=['Month', 'Year', 'date', 'DayOfWeek']) \n",
+    "\n",
+    "data = filter_data(df, training_scenario)\n",
+    "data_val = filter_data(df, validation_scenario)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "trusted": true
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "\n",
+    "# === Organize by user ===\n",
+    "df_sorted = data.sort_values(by='user').reset_index(drop=True)\n",
+    "df_sorted_val = data_val.sort_values(by='user').reset_index(drop=True)\n",
+    "users = df_sorted['user'].unique()\n",
+    "users_val = df_sorted_val['user'].unique()\n",
+    "\n",
+    "user_data = {user: df_sorted[df_sorted['user'] == user] for user in users}\n",
+    "user_data_val = {user: df_sorted_val[df_sorted_val['user'] == user] for user in users_val}\n",
+    "\n",
+    "# === Callbacks ===\n",
+    "early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)\n",
+    "lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, verbose=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "trusted": true
+   },
+   "outputs": [],
+   "source": [
+    "# === Model tuning and training loop ===\n",
+    "best_models = {}\n",
+    "\n",
+    "for sequence_length in range(20, 30, 5):\n",
+    "    print(f\"\\n=== Training for Sequence Length: {sequence_length} ===\")\n",
+    "\n",
+    "    # Training data\n",
+    "    X, y = [], []\n",
+    "    for user, data in user_data.items():\n",
+    "        features = data.drop('user', axis=1).values\n",
+    "        labels = data['user'].values\n",
+    "        for i in range(len(features) - sequence_length):\n",
+    "            X.append(features[i:i + sequence_length])\n",
+    "            y.append(labels[i + sequence_length])\n",
+    "    X = np.array(X)\n",
+    "    y = np.array(y)\n",
+    "\n",
+    "    # Validation data\n",
+    "    X_val, y_val = [], []\n",
+    "    for user, data in user_data_val.items():\n",
+    "        features = data.drop('user', axis=1).values\n",
+    "        labels = data['user'].values\n",
+    "        for i in range(len(features) - sequence_length):\n",
+    "            X_val.append(features[i:i + sequence_length])\n",
+    "            y_val.append(labels[i + sequence_length])\n",
+    "    X_val = np.array(X_val)\n",
+    "    y_val = np.array(y_val)\n",
+    "\n",
+    "    if X.shape[0] == 0 or X_val.shape[0] == 0:\n",
+    "        print(f\"⚠️ Skipped sequence length {sequence_length} due to insufficient data.\")\n",
+    "        continue\n",
+    "\n",
+    "    n_features = X.shape[2]\n",
+    "\n",
+    "    def build_model(hp):\n",
+    "        model = Sequential()\n",
+    "        model.add(Bidirectional(LSTM(units=hp.Int('units', 32, 256, step=2),\n",
+    "                                     input_shape=(sequence_length, n_features))))\n",
+    "        model.add(Dropout(hp.Float('dropout_rate', 0.1, 0.5, step=0.1)))\n",
+    "        model.add(Dense(len(users), activation='softmax'))\n",
+    "        model.compile(\n",
+    "            optimizer=Adam(learning_rate=hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])),\n",
+    "            loss='sparse_categorical_crossentropy',\n",
+    "            metrics=['accuracy']\n",
+    "        )\n",
+    "        return model\n",
+    "\n",
+    "    tuner = RandomSearch(\n",
+    "        build_model,\n",
+    "        objective='val_loss',\n",
+    "        max_trials=30,\n",
+    "        executions_per_trial=2,\n",
+    "        directory='/kaggle/working/my_dir',\n",
+    "        project_name=f'lstm_seq_{sequence_length}'\n",
+    "    )\n",
+    "\n",
+    "    tuner.search(X, y, epochs=30, validation_data=(X_val, y_val),\n",
+    "                 callbacks=[early_stopping, lr_scheduler], verbose=1)\n",
+    "\n",
+    "    best_hps = tuner.get_best_hyperparameters(1)[0]\n",
+    "    best_model = tuner.hypermodel.build(best_hps)\n",
+    "    best_model.fit(X, y, epochs=30, validation_data=(X_val, y_val),\n",
+    "                   callbacks=[early_stopping, lr_scheduler], verbose=0)\n",
+    "\n",
+    "    best_models[sequence_length] = {\n",
+    "        'model': best_model,\n",
+    "        'best_hyperparameters': {\n",
+    "            'units': best_hps.get('units'),\n",
+    "            'dropout_rate': best_hps.get('dropout_rate'),\n",
+    "            'learning_rate': best_hps.get('learning_rate')\n",
+    "        }\n",
+    "    }"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "trusted": true
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "\n",
+    "# === Get test scenario input ===\n",
+    "def get_user_input_for_test():\n",
+    "    print(\"\\n=== Testing Scenario Setup ===\")\n",
+    "    print(\"⚠️ Only January and February of 2020 were used for testing in predefined setup.\")\n",
+    "    print(\"⚠️ Avoid using 2020 data after February due to COVID-19 impact.\\n\")\n",
+    "    years_input = input(\"Enter test years (comma-separated, e.g., 2020): \").strip()\n",
+    "    years = list(map(int, years_input.split(',')))\n",
+    "    years_months = []\n",
+    "    for year in years:\n",
+    "        months_input = input(f\"Enter months for year {year} (comma-separated, e.g., 1,2): \").strip()\n",
+    "        months = list(map(int, months_input.split(',')))\n",
+    "        years_months.append((year, months))\n",
+    "    return years_months\n",
+    "\n",
+    "def filter_test_data(df, scenario):\n",
+    "    data_parts = []\n",
+    "    for year, months in scenario:\n",
+    "        part = df[(df['Year'] == year) & (df['Month'].isin(months))]\n",
+    "        data_parts.append(part)\n",
+    "    return pd.concat(data_parts, ignore_index=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-05-02T08:53:17.334789Z",
+     "iopub.status.busy": "2025-05-02T08:53:17.334489Z",
+     "iopub.status.idle": "2025-05-02T08:53:17.344855Z",
+     "shell.execute_reply": "2025-05-02T08:53:17.344176Z",
+     "shell.execute_reply.started": "2025-05-02T08:53:17.334766Z"
+    },
+    "trusted": true
+   },
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import os\n",
+    "\n",
+    "def evaluate_model_on_test_data(model, test_df, sequence_length, excel_writer):\n",
+    "    print(\"\\n🧪 Evaluating on Test Data...\")\n",
+    "    test_df = test_df.drop(columns=['Month', 'Year', 'date', 'DayOfWeek'])\n",
+    "    test_df = test_df.sort_values(by='user').reset_index(drop=True)\n",
+    "\n",
+    "    users = test_df['user'].unique()\n",
+    "    results = []\n",
+    "    accuracy_above_50 = 0\n",
+    "\n",
+    "    for user in users:\n",
+    "        user_df = test_df[test_df['user'] == user]\n",
+    "        X, y_true = [], []\n",
+    "        user_features = user_df.drop(columns=['user']).values\n",
+    "        user_labels = user_df['user'].values\n",
+    "\n",
+    "        if len(user_df) <= sequence_length:\n",
+    "            print(f\"Skipping User {user} (not enough data for sequence length {sequence_length})\")\n",
+    "            continue\n",
+    "\n",
+    "        for i in range(len(user_df) - sequence_length):\n",
+    "            seq_x = user_features[i:i + sequence_length]\n",
+    "            seq_y = user_labels[i + sequence_length]\n",
+    "            X.append(seq_x)\n",
+    "            y_true.append(seq_y)\n",
+    "\n",
+    "        X = np.array(X)\n",
+    "        y_true = np.array(y_true)\n",
+    "\n",
+    "        if len(X) == 0:\n",
+    "            continue\n",
+    "\n",
+    "        y_pred = model.predict(X, verbose=0)\n",
+    "        y_pred_classes = np.argmax(y_pred, axis=1)\n",
+    "\n",
+    "        unique_pred, counts_pred = np.unique(y_pred_classes, return_counts=True)\n",
+    "        label_counts_pred = dict(zip(unique_pred, counts_pred))\n",
+    "\n",
+    "        unique_true, counts_true = np.unique(y_true, return_counts=True)\n",
+    "        label_counts_true = dict(zip(unique_true, counts_true))\n",
+    "\n",
+    "        acc = accuracy_score(y_true, y_pred_classes)\n",
+    "        if acc > 0.5:\n",
+    "            accuracy_above_50 += 1\n",
+    "\n",
+    "        # Append result to list\n",
+    "        results.append({\n",
+    "            'User': user,\n",
+    "            'Accuracy (%)': acc * 100,\n",
+    "            'Predicted Class Distribution': str(label_counts_pred),\n",
+    "            'Actual Class Distribution': str(label_counts_true)\n",
+    "        })\n",
+    "\n",
+    "        print(f\"\\n=== User {user} ===\")\n",
+    "        print(f\"✅ Accuracy: {acc * 100:.2f}%\")\n",
+    "        print(\"📊 Predicted Class Distribution:\", label_counts_pred)\n",
+    "        print(\"📌 Actual Class Distribution:   \", label_counts_true)\n",
+    "\n",
+    "    final_accuracy_percent = (accuracy_above_50 / 32) * 100\n",
+    "    print(f\"\\n🟩 Final Evaluation Summary for Sequence Length {sequence_length}:\")\n",
+    "    print(f\"Users with >50% Accuracy: {accuracy_above_50} / 32\")\n",
+    "    print(f\"✅ Final Success Rate: {final_accuracy_percent:.2f}%\")\n",
+    "\n",
+    "    # Append overall stats as a new row\n",
+    "    results.append({\n",
+    "        'User': 'TOTAL',\n",
+    "        'Accuracy (%)': '',\n",
+    "        'Predicted Class Distribution': f'Users >50% Acc: {accuracy_above_50}/32',\n",
+    "        'Actual Class Distribution': f'Success Rate: {final_accuracy_percent:.2f}%'\n",
+    "    })\n",
+    "\n",
+    "    # Save results to Excel sheet\n",
+    "    df_results = pd.DataFrame(results)\n",
+    "    df_results.to_excel(excel_writer, sheet_name=f\"SeqLen_{sequence_length}\", index=False)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "collapsed": true,
+    "execution": {
+     "iopub.execute_input": "2025-05-02T08:56:14.082755Z",
+     "iopub.status.busy": "2025-05-02T08:56:14.082010Z",
+     "iopub.status.idle": "2025-05-02T08:56:28.518300Z",
+     "shell.execute_reply": "2025-05-02T08:56:28.517562Z",
+     "shell.execute_reply.started": "2025-05-02T08:56:14.082721Z"
+    },
+    "jupyter": {
+     "outputs_hidden": true
+    },
+    "trusted": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "=== Testing Scenario Setup ===\n",
+      "⚠️ Only January and February of 2020 were used for testing in predefined setup.\n",
+      "⚠️ Avoid using 2020 data after February due to COVID-19 impact.\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Enter test years (comma-separated, e.g., 2020):  2020\n",
+      "Enter months for year 2020 (comma-separated, e.g., 1,2):  1,2\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "🔍 Testing Model for Sequence Length: 20\n",
+      "\n",
+      "🧪 Evaluating on Test Data...\n",
+      "\n",
+      "=== User 0 ===\n",
+      "✅ Accuracy: 47.50%\n",
+      "📊 Predicted Class Distribution: {0: 19, 18: 9, 24: 7, 26: 1, 30: 3, 31: 1}\n",
+      "📌 Actual Class Distribution:    {0: 40}\n",
+      "\n",
+      "=== User 1 ===\n",
+      "✅ Accuracy: 82.50%\n",
+      "📊 Predicted Class Distribution: {1: 33, 31: 7}\n",
+      "📌 Actual Class Distribution:    {1: 40}\n",
+      "\n",
+      "=== User 2 ===\n",
+      "✅ Accuracy: 0.00%\n",
+      "📊 Predicted Class Distribution: {6: 2, 12: 12, 17: 13, 30: 12, 31: 1}\n",
+      "📌 Actual Class Distribution:    {2: 40}\n",
+      "\n",
+      "=== User 3 ===\n",
+      "✅ Accuracy: 41.03%\n",
+      "📊 Predicted Class Distribution: {3: 16, 6: 1, 12: 8, 29: 13, 30: 1}\n",
+      "📌 Actual Class Distribution:    {3: 39}\n",
+      "\n",
+      "=== User 4 ===\n",
+      "✅ Accuracy: 2.50%\n",
+      "📊 Predicted Class Distribution: {2: 1, 4: 1, 8: 2, 9: 3, 18: 11, 23: 3, 26: 16, 29: 1, 30: 1, 31: 1}\n",
+      "📌 Actual Class Distribution:    {4: 40}\n",
+      "\n",
+      "=== User 5 ===\n",
+      "✅ Accuracy: 57.50%\n",
+      "📊 Predicted Class Distribution: {2: 5, 5: 23, 23: 2, 29: 6, 30: 3, 31: 1}\n",
+      "📌 Actual Class Distribution:    {5: 40}\n",
+      "\n",
+      "=== User 6 ===\n",
+      "✅ Accuracy: 25.00%\n",
+      "📊 Predicted Class Distribution: {6: 10, 17: 1, 30: 5, 31: 24}\n",
+      "📌 Actual Class Distribution:    {6: 40}\n",
+      "\n",
+      "=== User 7 ===\n",
+      "✅ Accuracy: 52.50%\n",
+      "📊 Predicted Class Distribution: {7: 21, 10: 3, 11: 14, 18: 2}\n",
+      "📌 Actual Class Distribution:    {7: 40}\n",
+      "\n",
+      "=== User 8 ===\n",
+      "✅ Accuracy: 62.50%\n",
+      "📊 Predicted Class Distribution: {8: 25, 23: 1, 29: 8, 30: 6}\n",
+      "📌 Actual Class Distribution:    {8: 40}\n",
+      "\n",
+      "=== User 9 ===\n",
+      "✅ Accuracy: 100.00%\n",
+      "📊 Predicted Class Distribution: {9: 40}\n",
+      "📌 Actual Class Distribution:    {9: 40}\n",
+      "\n",
+      "=== User 10 ===\n",
+      "✅ Accuracy: 57.50%\n",
+      "📊 Predicted Class Distribution: {10: 23, 11: 15, 30: 2}\n",
+      "📌 Actual Class Distribution:    {10: 40}\n",
+      "\n",
+      "=== User 11 ===\n",
+      "✅ Accuracy: 35.00%\n",
+      "📊 Predicted Class Distribution: {1: 1, 10: 15, 11: 14, 12: 1, 14: 4, 15: 2, 16: 2, 25: 1}\n",
+      "📌 Actual Class Distribution:    {11: 40}\n",
+      "\n",
+      "=== User 12 ===\n",
+      "✅ Accuracy: 62.50%\n",
+      "📊 Predicted Class Distribution: {3: 1, 12: 25, 26: 14}\n",
+      "📌 Actual Class Distribution:    {12: 40}\n",
+      "\n",
+      "=== User 13 ===\n",
+      "✅ Accuracy: 55.00%\n",
+      "📊 Predicted Class Distribution: {10: 3, 11: 3, 12: 2, 13: 22, 16: 1, 21: 9}\n",
+      "📌 Actual Class Distribution:    {13: 40}\n",
+      "\n",
+      "=== User 14 ===\n",
+      "✅ Accuracy: 70.00%\n",
+      "📊 Predicted Class Distribution: {0: 1, 14: 28, 16: 2, 18: 7, 25: 2}\n",
+      "📌 Actual Class Distribution:    {14: 40}\n",
+      "\n",
+      "=== User 15 ===\n",
+      "✅ Accuracy: 100.00%\n",
+      "📊 Predicted Class Distribution: {15: 40}\n",
+      "📌 Actual Class Distribution:    {15: 40}\n",
+      "\n",
+      "=== User 16 ===\n",
+      "✅ Accuracy: 17.50%\n",
+      "📊 Predicted Class Distribution: {15: 20, 16: 7, 18: 13}\n",
+      "📌 Actual Class Distribution:    {16: 40}\n",
+      "\n",
+      "=== User 17 ===\n",
+      "✅ Accuracy: 40.00%\n",
+      "📊 Predicted Class Distribution: {0: 2, 16: 6, 17: 16, 18: 1, 28: 1, 31: 14}\n",
+      "📌 Actual Class Distribution:    {17: 40}\n",
+      "\n",
+      "=== User 18 ===\n",
+      "✅ Accuracy: 97.50%\n",
+      "📊 Predicted Class Distribution: {0: 1, 18: 39}\n",
+      "📌 Actual Class Distribution:    {18: 40}\n",
+      "\n",
+      "=== User 19 ===\n",
+      "✅ Accuracy: 72.50%\n",
+      "📊 Predicted Class Distribution: {1: 3, 6: 7, 19: 29, 22: 1}\n",
+      "📌 Actual Class Distribution:    {19: 40}\n",
+      "\n",
+      "=== User 20 ===\n",
+      "✅ Accuracy: 77.50%\n",
+      "📊 Predicted Class Distribution: {2: 8, 20: 31, 26: 1}\n",
+      "📌 Actual Class Distribution:    {20: 40}\n",
+      "\n",
+      "=== User 21 ===\n",
+      "✅ Accuracy: 92.50%\n",
+      "📊 Predicted Class Distribution: {21: 37, 24: 3}\n",
+      "📌 Actual Class Distribution:    {21: 40}\n",
+      "\n",
+      "=== User 22 ===\n",
+      "✅ Accuracy: 0.00%\n",
+      "📊 Predicted Class Distribution: {8: 4, 9: 2, 23: 1, 29: 27, 30: 1}\n",
+      "📌 Actual Class Distribution:    {22: 35}\n",
+      "\n",
+      "=== User 23 ===\n",
+      "✅ Accuracy: 77.50%\n",
+      "📊 Predicted Class Distribution: {3: 9, 23: 31}\n",
+      "📌 Actual Class Distribution:    {23: 40}\n",
+      "\n",
+      "=== User 24 ===\n",
+      "✅ Accuracy: 92.50%\n",
+      "📊 Predicted Class Distribution: {21: 3, 24: 37}\n",
+      "📌 Actual Class Distribution:    {24: 40}\n",
+      "\n",
+      "=== User 25 ===\n",
+      "✅ Accuracy: 2.50%\n",
+      "📊 Predicted Class Distribution: {2: 14, 12: 11, 23: 1, 25: 1, 29: 4, 30: 9}\n",
+      "📌 Actual Class Distribution:    {25: 40}\n",
+      "\n",
+      "=== User 26 ===\n",
+      "✅ Accuracy: 0.00%\n",
+      "📊 Predicted Class Distribution: {12: 18, 18: 3, 21: 13, 24: 6}\n",
+      "📌 Actual Class Distribution:    {26: 40}\n",
+      "\n",
+      "=== User 27 ===\n",
+      "✅ Accuracy: 0.00%\n",
+      "📊 Predicted Class Distribution: {12: 38, 21: 1, 24: 1}\n",
+      "📌 Actual Class Distribution:    {27: 40}\n",
+      "\n",
+      "=== User 28 ===\n",
+      "✅ Accuracy: 100.00%\n",
+      "📊 Predicted Class Distribution: {28: 40}\n",
+      "📌 Actual Class Distribution:    {28: 40}\n",
+      "\n",
+      "=== User 29 ===\n",
+      "✅ Accuracy: 40.00%\n",
+      "📊 Predicted Class Distribution: {12: 12, 26: 1, 29: 16, 30: 11}\n",
+      "📌 Actual Class Distribution:    {29: 40}\n",
+      "\n",
+      "=== User 30 ===\n",
+      "✅ Accuracy: 35.00%\n",
+      "📊 Predicted Class Distribution: {12: 1, 18: 9, 23: 5, 25: 3, 26: 3, 29: 2, 30: 14, 31: 3}\n",
+      "📌 Actual Class Distribution:    {30: 40}\n",
+      "\n",
+      "=== User 31 ===\n",
+      "✅ Accuracy: 50.00%\n",
+      "📊 Predicted Class Distribution: {12: 2, 18: 18, 31: 20}\n",
+      "📌 Actual Class Distribution:    {31: 40}\n",
+      "\n",
+      "🟩 Final Evaluation Summary for Sequence Length 20:\n",
+      "Users with >50% Accuracy: 17 / 32\n",
+      "✅ Final Success Rate: 53.12%\n",
+      "\n",
+      "🔍 Testing Model for Sequence Length: 25\n",
+      "\n",
+      "🧪 Evaluating on Test Data...\n",
+      "\n",
+      "=== User 0 ===\n",
+      "✅ Accuracy: 17.14%\n",
+      "📊 Predicted Class Distribution: {0: 6, 18: 2, 24: 3, 25: 2, 26: 14, 30: 7, 31: 1}\n",
+      "📌 Actual Class Distribution:    {0: 35}\n",
+      "\n",
+      "=== User 1 ===\n",
+      "✅ Accuracy: 8.57%\n",
+      "📊 Predicted Class Distribution: {1: 3, 31: 32}\n",
+      "📌 Actual Class Distribution:    {1: 35}\n",
+      "\n",
+      "=== User 2 ===\n",
+      "✅ Accuracy: 5.71%\n",
+      "📊 Predicted Class Distribution: {2: 2, 12: 5, 17: 11, 21: 1, 30: 3, 31: 13}\n",
+      "📌 Actual Class Distribution:    {2: 35}\n",
+      "\n",
+      "=== User 3 ===\n",
+      "✅ Accuracy: 14.71%\n",
+      "📊 Predicted Class Distribution: {3: 5, 12: 1, 29: 5, 30: 16, 31: 7}\n",
+      "📌 Actual Class Distribution:    {3: 34}\n",
+      "\n",
+      "=== User 4 ===\n",
+      "✅ Accuracy: 0.00%\n",
+      "📊 Predicted Class Distribution: {2: 4, 9: 4, 10: 1, 25: 7, 26: 5, 27: 1, 30: 12, 31: 1}\n",
+      "📌 Actual Class Distribution:    {4: 35}\n",
+      "\n",
+      "=== User 5 ===\n",
+      "✅ Accuracy: 100.00%\n",
+      "📊 Predicted Class Distribution: {5: 35}\n",
+      "📌 Actual Class Distribution:    {5: 35}\n",
+      "\n",
+      "=== User 6 ===\n",
+      "✅ Accuracy: 31.43%\n",
+      "📊 Predicted Class Distribution: {6: 11, 31: 24}\n",
+      "📌 Actual Class Distribution:    {6: 35}\n",
+      "\n",
+      "=== User 7 ===\n",
+      "✅ Accuracy: 65.71%\n",
+      "📊 Predicted Class Distribution: {7: 23, 10: 3, 13: 9}\n",
+      "📌 Actual Class Distribution:    {7: 35}\n",
+      "\n",
+      "=== User 8 ===\n",
+      "✅ Accuracy: 82.86%\n",
+      "📊 Predicted Class Distribution: {4: 2, 8: 29, 22: 2, 30: 2}\n",
+      "📌 Actual Class Distribution:    {8: 35}\n",
+      "\n",
+      "=== User 9 ===\n",
+      "✅ Accuracy: 97.14%\n",
+      "📊 Predicted Class Distribution: {4: 1, 9: 34}\n",
+      "📌 Actual Class Distribution:    {9: 35}\n",
+      "\n",
+      "=== User 10 ===\n",
+      "✅ Accuracy: 40.00%\n",
+      "📊 Predicted Class Distribution: {10: 14, 13: 6, 23: 3, 25: 2, 30: 10}\n",
+      "📌 Actual Class Distribution:    {10: 35}\n",
+      "\n",
+      "=== User 11 ===\n",
+      "✅ Accuracy: 31.43%\n",
+      "📊 Predicted Class Distribution: {10: 22, 11: 11, 12: 1, 19: 1}\n",
+      "📌 Actual Class Distribution:    {11: 35}\n",
+      "\n",
+      "=== User 12 ===\n",
+      "✅ Accuracy: 57.14%\n",
+      "📊 Predicted Class Distribution: {12: 20, 29: 15}\n",
+      "📌 Actual Class Distribution:    {12: 35}\n",
+      "\n",
+      "=== User 13 ===\n",
+      "✅ Accuracy: 57.14%\n",
+      "📊 Predicted Class Distribution: {12: 1, 13: 20, 21: 14}\n",
+      "📌 Actual Class Distribution:    {13: 35}\n",
+      "\n",
+      "=== User 14 ===\n",
+      "✅ Accuracy: 62.86%\n",
+      "📊 Predicted Class Distribution: {0: 4, 14: 22, 15: 2, 18: 7}\n",
+      "📌 Actual Class Distribution:    {14: 35}\n",
+      "\n",
+      "=== User 15 ===\n",
+      "✅ Accuracy: 100.00%\n",
+      "📊 Predicted Class Distribution: {15: 35}\n",
+      "📌 Actual Class Distribution:    {15: 35}\n",
+      "\n",
+      "=== User 16 ===\n",
+      "✅ Accuracy: 40.00%\n",
+      "📊 Predicted Class Distribution: {7: 2, 15: 13, 16: 14, 18: 6}\n",
+      "📌 Actual Class Distribution:    {16: 35}\n",
+      "\n",
+      "=== User 17 ===\n",
+      "✅ Accuracy: 65.71%\n",
+      "📊 Predicted Class Distribution: {0: 1, 16: 11, 17: 23}\n",
+      "📌 Actual Class Distribution:    {17: 35}\n",
+      "\n",
+      "=== User 18 ===\n",
+      "✅ Accuracy: 82.86%\n",
+      "📊 Predicted Class Distribution: {0: 6, 18: 29}\n",
+      "📌 Actual Class Distribution:    {18: 35}\n",
+      "\n",
+      "=== User 19 ===\n",
+      "✅ Accuracy: 60.00%\n",
+      "📊 Predicted Class Distribution: {6: 13, 19: 21, 22: 1}\n",
+      "📌 Actual Class Distribution:    {19: 35}\n",
+      "\n",
+      "=== User 20 ===\n",
+      "✅ Accuracy: 5.71%\n",
+      "📊 Predicted Class Distribution: {2: 33, 20: 2}\n",
+      "📌 Actual Class Distribution:    {20: 35}\n",
+      "\n",
+      "=== User 21 ===\n",
+      "✅ Accuracy: 100.00%\n",
+      "📊 Predicted Class Distribution: {21: 35}\n",
+      "📌 Actual Class Distribution:    {21: 35}\n",
+      "\n",
+      "=== User 22 ===\n",
+      "✅ Accuracy: 0.00%\n",
+      "📊 Predicted Class Distribution: {8: 2, 9: 2, 29: 26}\n",
+      "📌 Actual Class Distribution:    {22: 30}\n",
+      "\n",
+      "=== User 23 ===\n",
+      "✅ Accuracy: 65.71%\n",
+      "📊 Predicted Class Distribution: {3: 4, 23: 23, 30: 8}\n",
+      "📌 Actual Class Distribution:    {23: 35}\n",
+      "\n",
+      "=== User 24 ===\n",
+      "✅ Accuracy: 100.00%\n",
+      "📊 Predicted Class Distribution: {24: 35}\n",
+      "📌 Actual Class Distribution:    {24: 35}\n",
+      "\n",
+      "=== User 25 ===\n",
+      "✅ Accuracy: 0.00%\n",
+      "📊 Predicted Class Distribution: {2: 33, 12: 1, 30: 1}\n",
+      "📌 Actual Class Distribution:    {25: 35}\n",
+      "\n",
+      "=== User 26 ===\n",
+      "✅ Accuracy: 0.00%\n",
+      "📊 Predicted Class Distribution: {12: 29, 21: 6}\n",
+      "📌 Actual Class Distribution:    {26: 35}\n",
+      "\n",
+      "=== User 27 ===\n",
+      "✅ Accuracy: 0.00%\n",
+      "📊 Predicted Class Distribution: {12: 35}\n",
+      "📌 Actual Class Distribution:    {27: 35}\n",
+      "\n",
+      "=== User 28 ===\n",
+      "✅ Accuracy: 100.00%\n",
+      "📊 Predicted Class Distribution: {28: 35}\n",
+      "📌 Actual Class Distribution:    {28: 35}\n",
+      "\n",
+      "=== User 29 ===\n",
+      "✅ Accuracy: 28.57%\n",
+      "📊 Predicted Class Distribution: {2: 1, 12: 2, 26: 8, 29: 10, 30: 14}\n",
+      "📌 Actual Class Distribution:    {29: 35}\n",
+      "\n",
+      "=== User 30 ===\n",
+      "✅ Accuracy: 34.29%\n",
+      "📊 Predicted Class Distribution: {2: 4, 26: 2, 27: 4, 29: 13, 30: 12}\n",
+      "📌 Actual Class Distribution:    {30: 35}\n",
+      "\n",
+      "=== User 31 ===\n",
+      "✅ Accuracy: 60.00%\n",
+      "📊 Predicted Class Distribution: {12: 1, 16: 1, 18: 12, 31: 21}\n",
+      "📌 Actual Class Distribution:    {31: 35}\n",
+      "\n",
+      "🟩 Final Evaluation Summary for Sequence Length 25:\n",
+      "Users with >50% Accuracy: 16 / 32\n",
+      "✅ Final Success Rate: 50.00%\n",
+      "\n",
+      "✅ All evaluations completed. Results saved to: /kaggle/working/evaluation_results.xlsx\n"
+     ]
+    }
+   ],
+   "source": [
+    "from pandas import ExcelWriter\n",
+    "\n",
+    "# === Run evaluation for each trained sequence length ===\n",
+    "test_scenario = get_user_input_for_test()\n",
+    "test_data = filter_test_data(df, test_scenario)\n",
+    "\n",
+    "output_excel_path = \"/kaggle/working/evaluation_results.xlsx\"\n",
+    "\n",
+    "with ExcelWriter(output_excel_path) as writer:\n",
+    "    for sequence_length, result in best_models.items():\n",
+    "        print(f\"\\n🔍 Testing Model for Sequence Length: {sequence_length}\")\n",
+    "        evaluate_model_on_test_data(\n",
+    "            result['model'],\n",
+    "            test_data.copy(),\n",
+    "            sequence_length,\n",
+    "            writer  # 👈 pass the writer\n",
+    "        )\n",
+    "\n",
+    "print(f\"\\n✅ All evaluations completed. Results saved to: {output_excel_path}\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "trusted": true
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "trusted": true
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "trusted": true
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "trusted": true
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "trusted": true
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "trusted": true
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "trusted": true
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "# # === Evaluation function (your version) ===\n",
+    "# def evaluate_model_on_test_data(model, test_df, sequence_length):\n",
+    "#     print(\"\\n🧪 Evaluating on Test Data...\")\n",
+    "#     test_df = test_df.drop(columns=['Month', 'Year', 'date', 'DayOfWeek'])\n",
+    "#     test_df = test_df.sort_values(by='user').reset_index(drop=True)\n",
+    "\n",
+    "#     users = test_df['user'].unique()\n",
+    "#     results = {}\n",
+    "#     accuracy_above_50 = 0\n",
+    "\n",
+    "#     for user in users:\n",
+    "#         user_df = test_df[test_df['user'] == user]\n",
+    "#         X, y_true = [], []\n",
+    "#         user_features = user_df.drop(columns=['user']).values\n",
+    "#         user_labels = user_df['user'].values\n",
+    "\n",
+    "#         if len(user_df) <= sequence_length:\n",
+    "#             print(f\"Skipping User {user} (not enough data for sequence length {sequence_length})\")\n",
+    "#             continue\n",
+    "\n",
+    "#         for i in range(len(user_df) - sequence_length):\n",
+    "#             seq_x = user_features[i:i + sequence_length]\n",
+    "#             seq_y = user_labels[i + sequence_length]\n",
+    "#             X.append(seq_x)\n",
+    "#             y_true.append(seq_y)\n",
+    "\n",
+    "#         X = np.array(X)\n",
+    "#         y_true = np.array(y_true)\n",
+    "\n",
+    "#         if len(X) == 0:\n",
+    "#             continue\n",
+    "\n",
+    "#         y_pred = model.predict(X, verbose=0)\n",
+    "#         y_pred_classes = np.argmax(y_pred, axis=1)\n",
+    "\n",
+    "#         unique_pred, counts_pred = np.unique(y_pred_classes, return_counts=True)\n",
+    "#         label_counts_pred = dict(zip(unique_pred, counts_pred))\n",
+    "\n",
+    "#         unique_true, counts_true = np.unique(y_true, return_counts=True)\n",
+    "#         label_counts_true = dict(zip(unique_true, counts_true))\n",
+    "\n",
+    "#         acc = accuracy_score(y_true, y_pred_classes)\n",
+    "#         if acc > 0.5:\n",
+    "#             accuracy_above_50 += 1\n",
+    "\n",
+    "#         results[user] = {\n",
+    "#             'accuracy': acc,\n",
+    "#             'predicted_counts': label_counts_pred,\n",
+    "#             'actual_counts': label_counts_true\n",
+    "#         }\n",
+    "\n",
+    "#         print(f\"\\n=== User {user} ===\")\n",
+    "#         print(f\"✅ Accuracy: {acc * 100:.2f}%\")\n",
+    "#         print(\"📊 Predicted Class Distribution:\", label_counts_pred)\n",
+    "#         print(\"📌 Actual Class Distribution:   \", label_counts_true)\n",
+    "\n",
+    "#     final_accuracy_percent = (accuracy_above_50 / 32) * 100\n",
+    "#     print(f\"\\n🟩 Final Evaluation Summary for Sequence Length {sequence_length}:\")\n",
+    "#     print(f\"Users with >50% Accuracy: {accuracy_above_50} / 32\")\n",
+    "#     print(f\"✅ Final Success Rate: {final_accuracy_percent:.2f}%\")\n",
+    "\n",
+    "# # === Run evaluation for each trained sequence length ===\n",
+    "# test_scenario = get_user_input_for_test()\n",
+    "# test_data = filter_test_data(df, test_scenario)\n",
+    "\n",
+    "# for sequence_length, result in best_models.items():\n",
+    "#     print(f\"\\n🔍 Testing Model for Sequence Length: {sequence_length}\")\n",
+    "#     evaluate_model_on_test_data(result['model'], test_data.copy(), sequence_length)\n",
+    "\n",
+    "# print(\"\\n✅ All evaluations completed.\")\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kaggle": {
+   "accelerator": "nvidiaTeslaT4",
+   "dataSources": [
+    {
+     "datasetId": 5775075,
+     "sourceId": 9494285,
+     "sourceType": "datasetVersion"
+    }
+   ],
+   "dockerImageVersionId": 31011,
+   "isGpuEnabled": true,
+   "isInternetEnabled": true,
+   "language": "python",
+   "sourceType": "notebook"
+  },
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.18"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}