Step_Data_Project_India/final-32-automated-code-new...


								{

								 "cells": [

								  {

								   "cell_type": "code",

								   "execution_count": 4,

								   "metadata": {

								    "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",

								    "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",

								    "trusted": true

								   },

								   "outputs": [],

								   "source": [

								    "# This Python 3 environment comes with many helpful analytics libraries installed\n",

								    "# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python\n",

								    "# For example, here's several helpful packages to load\n",

								    "\n",

								    "import numpy as np # linear algebra\n",

								    "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n",

								    "\n",

								    "# Input data files are available in the read-only \"../input/\" directory\n",

								    "# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n",

								    "\n",

								    "import os\n",

								    "for dirname, _, filenames in os.walk('/kaggle/input'):\n",

								    "    for filename in filenames:\n",

								    "        print(os.path.join(dirname, filename))\n",

								    "\n",

								    "# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\" \n",

								    "# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": 6,

								   "metadata": {

								    "collapsed": true,

								    "execution": {

								     "iopub.execute_input": "2025-05-02T07:51:57.538752Z",

								     "iopub.status.busy": "2025-05-02T07:51:57.538555Z",

								     "iopub.status.idle": "2025-05-02T08:46:51.909800Z",

								     "shell.execute_reply": "2025-05-02T08:46:51.909147Z",

								     "shell.execute_reply.started": "2025-05-02T07:51:57.538734Z"

								    },

								    "jupyter": {

								     "outputs_hidden": true

								    },

								    "trusted": true

								   },

								   "outputs": [

								    {

								     "ename": "ModuleNotFoundError",

								     "evalue": "No module named 'sklearn'",

								     "output_type": "error",

								     "traceback": [

								      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",

								      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",

								      "Cell \u001b[0;32mIn[6], line 11\u001b[0m\n\u001b[1;32m      9\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mkeras_tuner\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mkt\u001b[39;00m\n\u001b[1;32m     10\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mkeras_tuner\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m RandomSearch\n\u001b[0;32m---> 11\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01msklearn\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmetrics\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m accuracy_score\n\u001b[1;32m     13\u001b[0m \u001b[38;5;66;03m# === Clean previous tuning directory ===\u001b[39;00m\n\u001b[1;32m     14\u001b[0m shutil\u001b[38;5;241m.\u001b[39mrmtree(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m/kaggle/working/my_dir\u001b[39m\u001b[38;5;124m\"\u001b[39m, ignore_errors\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n",

								      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'sklearn'"

								     ]

								    }

								   ],

								   "source": [

								    "import numpy as np\n",

								    "import pandas as pd\n",

								    "import shutil\n",

								    "import os\n",

								    "from tensorflow.keras.models import Sequential\n",

								    "from tensorflow.keras.layers import LSTM, Dense, Dropout,GRU,Bidirectional\n",

								    "from tensorflow.keras.optimizers import Adam\n",

								    "from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping\n",

								    "import keras_tuner as kt\n",

								    "from keras_tuner import RandomSearch\n",

								    "from sklearn.metrics import accuracy_score\n",

								    "\n",

								    "# === Clean previous tuning directory ===\n",

								    "shutil.rmtree(\"/kaggle/working/my_dir\", ignore_errors=True)\n",

								    "\n",

								    "# === Load dataset ===\n",

								    "file_path = '/kaggle/input/32usrs/ALLUSERS32_15MIN_WITHOUTTHREHOLD.xlsx' \n",

								    "\n",

								    "df = pd.read_excel(file_path)\n",

								    "\n",

								    "\n",

								    "\n",

								    "\n",

								    "\n",

								    "\n"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "metadata": {

								    "trusted": true

								   },

								   "outputs": [],

								   "source": []

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "metadata": {

								    "trusted": true

								   },

								   "outputs": [],

								   "source": [

								    "# === Helper functions for scenario selection ===\n",

								    "def get_user_input_for_scenario(scenario_type):\n",

								    "    print(f\"\\nPlease define your custom {scenario_type} scenario:\")\n",

								    "    years_input = input(f\"Enter {scenario_type} years (comma-separated, e.g., 2017,2018): \").strip()\n",

								    "    years = list(map(int, years_input.split(',')))\n",

								    "    years_months = []\n",

								    "    for year in years:\n",

								    "        months_input = input(f\"Enter months for year {year} (comma-separated, e.g., 1,2,3): \").strip()\n",

								    "        months = list(map(int, months_input.split(',')))\n",

								    "        years_months.append((year, months))\n",

								    "    return years_months\n",

								    "\n",

								    "def display_warning_about_2020_data():\n",

								    "    print(\"\\n⚠️ Warning: 2020 data after February is excluded due to COVID-19.\")\n",

								    "    print(\"✅ Only Jan and Feb 2020 are used for testing. Do not use them in training/validation.\")\n",

								    "\n",

								    "def display_warnings_for_scenarios(scenario_type):\n",

								    "    if scenario_type == \"training\":\n",

								    "        print(\"\\n⚠️ Predefined Training Scenarios (for reference only):\")\n",

								    "        for name, scenario in predefined_training_scenarios.items():\n",

								    "            parts = [f\"{year}-{months}\" for year, months in scenario['years_months']]\n",

								    "            print(f\"  {name}: {', '.join(parts)}\")\n",

								    "    elif scenario_type == \"validation\":\n",

								    "        print(\"\\n⚠️ Predefined Validation Scenario:\")\n",

								    "        for name, scenario in predefined_validation_scenarios.items():\n",

								    "            parts = [f\"{year}-{months}\" for year, months in scenario['years_months']]\n",

								    "            print(f\"  {name}: {', '.join(parts)}\")\n",

								    "        print(\"  - This uses Oct, Nov, Dec of 2019\")\n",

								    "\n",

								    "predefined_training_scenarios = {\n",

								    "    \"Scenario 1\": {\"years_months\": [(2018, list(range(1, 13))), (2019, list(range(1, 10)))]},\n",

								    "    \"Scenario 2\": {\"years_months\": [(2017, list(range(1, 13))), (2018, list(range(1, 13))), (2019, list(range(1, 10)))]}\n",

								    "}\n",

								    "predefined_validation_scenarios = {\n",

								    "    \"Scenario A\": {\"years_months\": [(2019, [10, 11, 12])]}\n",

								    "}\n"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "metadata": {

								    "trusted": true

								   },

								   "outputs": [],

								   "source": [

								    "# === Get user-defined training and validation scenarios ===\n",

								    "print(\"=== Training Scenario Setup ===\")\n",

								    "display_warning_about_2020_data()\n",

								    "display_warnings_for_scenarios(\"training\")\n",

								    "training_scenario = get_user_input_for_scenario(\"training\")\n",

								    "\n",

								    "print(\"\\n=== Validation Scenario Setup ===\")\n",

								    "display_warning_about_2020_data()\n",

								    "display_warnings_for_scenarios(\"validation\")\n",

								    "validation_scenario = get_user_input_for_scenario(\"validation\")\n",

								    "\n",

								    "# === Filter and preprocess data ===\n",

								    "def filter_data(df, scenario):\n",

								    "    filtered = pd.DataFrame()\n",

								    "    for year, months in scenario:\n",

								    "        filtered = pd.concat([filtered, df[(df['Year'] == year) & (df['Month'].isin(months))]])\n",

								    "    return filtered.drop(columns=['Month', 'Year', 'date', 'DayOfWeek']) \n",

								    "\n",

								    "data = filter_data(df, training_scenario)\n",

								    "data_val = filter_data(df, validation_scenario)"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "metadata": {

								    "trusted": true

								   },

								   "outputs": [],

								   "source": [

								    "\n",

								    "\n",

								    "# === Organize by user ===\n",

								    "df_sorted = data.sort_values(by='user').reset_index(drop=True)\n",

								    "df_sorted_val = data_val.sort_values(by='user').reset_index(drop=True)\n",

								    "users = df_sorted['user'].unique()\n",

								    "users_val = df_sorted_val['user'].unique()\n",

								    "\n",

								    "user_data = {user: df_sorted[df_sorted['user'] == user] for user in users}\n",

								    "user_data_val = {user: df_sorted_val[df_sorted_val['user'] == user] for user in users_val}\n",

								    "\n",

								    "# === Callbacks ===\n",

								    "early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)\n",

								    "lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, verbose=1)"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "metadata": {

								    "trusted": true

								   },

								   "outputs": [],

								   "source": [

								    "# === Model tuning and training loop ===\n",

								    "best_models = {}\n",

								    "\n",

								    "for sequence_length in range(20, 30, 5):\n",

								    "    print(f\"\\n=== Training for Sequence Length: {sequence_length} ===\")\n",

								    "\n",

								    "    # Training data\n",

								    "    X, y = [], []\n",

								    "    for user, data in user_data.items():\n",

								    "        features = data.drop('user', axis=1).values\n",

								    "        labels = data['user'].values\n",

								    "        for i in range(len(features) - sequence_length):\n",

								    "            X.append(features[i:i + sequence_length])\n",

								    "            y.append(labels[i + sequence_length])\n",

								    "    X = np.array(X)\n",

								    "    y = np.array(y)\n",

								    "\n",

								    "    # Validation data\n",

								    "    X_val, y_val = [], []\n",

								    "    for user, data in user_data_val.items():\n",

								    "        features = data.drop('user', axis=1).values\n",

								    "        labels = data['user'].values\n",

								    "        for i in range(len(features) - sequence_length):\n",

								    "            X_val.append(features[i:i + sequence_length])\n",

								    "            y_val.append(labels[i + sequence_length])\n",

								    "    X_val = np.array(X_val)\n",

								    "    y_val = np.array(y_val)\n",

								    "\n",

								    "    if X.shape[0] == 0 or X_val.shape[0] == 0:\n",

								    "        print(f\"⚠️ Skipped sequence length {sequence_length} due to insufficient data.\")\n",

								    "        continue\n",

								    "\n",

								    "    n_features = X.shape[2]\n",

								    "\n",

								    "    def build_model(hp):\n",

								    "        model = Sequential()\n",

								    "        model.add(Bidirectional(LSTM(units=hp.Int('units', 32, 256, step=2),\n",

								    "                                     input_shape=(sequence_length, n_features))))\n",

								    "        model.add(Dropout(hp.Float('dropout_rate', 0.1, 0.5, step=0.1)))\n",

								    "        model.add(Dense(len(users), activation='softmax'))\n",

								    "        model.compile(\n",

								    "            optimizer=Adam(learning_rate=hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])),\n",

								    "            loss='sparse_categorical_crossentropy',\n",

								    "            metrics=['accuracy']\n",

								    "        )\n",

								    "        return model\n",

								    "\n",

								    "    tuner = RandomSearch(\n",

								    "        build_model,\n",

								    "        objective='val_loss',\n",

								    "        max_trials=30,\n",

								    "        executions_per_trial=2,\n",

								    "        directory='/kaggle/working/my_dir',\n",

								    "        project_name=f'lstm_seq_{sequence_length}'\n",

								    "    )\n",

								    "\n",

								    "    tuner.search(X, y, epochs=30, validation_data=(X_val, y_val),\n",

								    "                 callbacks=[early_stopping, lr_scheduler], verbose=1)\n",

								    "\n",

								    "    best_hps = tuner.get_best_hyperparameters(1)[0]\n",

								    "    best_model = tuner.hypermodel.build(best_hps)\n",

								    "    best_model.fit(X, y, epochs=30, validation_data=(X_val, y_val),\n",

								    "                   callbacks=[early_stopping, lr_scheduler], verbose=0)\n",

								    "\n",

								    "    best_models[sequence_length] = {\n",

								    "        'model': best_model,\n",

								    "        'best_hyperparameters': {\n",

								    "            'units': best_hps.get('units'),\n",

								    "            'dropout_rate': best_hps.get('dropout_rate'),\n",

								    "            'learning_rate': best_hps.get('learning_rate')\n",

								    "        }\n",

								    "    }"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "metadata": {

								    "trusted": true

								   },

								   "outputs": [],

								   "source": [

								    "\n",

								    "\n",

								    "# === Get test scenario input ===\n",

								    "def get_user_input_for_test():\n",

								    "    print(\"\\n=== Testing Scenario Setup ===\")\n",

								    "    print(\"⚠️ Only January and February of 2020 were used for testing in predefined setup.\")\n",

								    "    print(\"⚠️ Avoid using 2020 data after February due to COVID-19 impact.\\n\")\n",

								    "    years_input = input(\"Enter test years (comma-separated, e.g., 2020): \").strip()\n",

								    "    years = list(map(int, years_input.split(',')))\n",

								    "    years_months = []\n",

								    "    for year in years:\n",

								    "        months_input = input(f\"Enter months for year {year} (comma-separated, e.g., 1,2): \").strip()\n",

								    "        months = list(map(int, months_input.split(',')))\n",

								    "        years_months.append((year, months))\n",

								    "    return years_months\n",

								    "\n",

								    "def filter_test_data(df, scenario):\n",

								    "    data_parts = []\n",

								    "    for year, months in scenario:\n",

								    "        part = df[(df['Year'] == year) & (df['Month'].isin(months))]\n",

								    "        data_parts.append(part)\n",

								    "    return pd.concat(data_parts, ignore_index=True)"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": 3,

								   "metadata": {

								    "execution": {

								     "iopub.execute_input": "2025-05-02T08:53:17.334789Z",

								     "iopub.status.busy": "2025-05-02T08:53:17.334489Z",

								     "iopub.status.idle": "2025-05-02T08:53:17.344855Z",

								     "shell.execute_reply": "2025-05-02T08:53:17.344176Z",

								     "shell.execute_reply.started": "2025-05-02T08:53:17.334766Z"

								    },

								    "trusted": true

								   },

								   "outputs": [],

								   "source": [

								    "import pandas as pd\n",

								    "import os\n",

								    "\n",

								    "def evaluate_model_on_test_data(model, test_df, sequence_length, excel_writer):\n",

								    "    print(\"\\n🧪 Evaluating on Test Data...\")\n",

								    "    test_df = test_df.drop(columns=['Month', 'Year', 'date', 'DayOfWeek'])\n",

								    "    test_df = test_df.sort_values(by='user').reset_index(drop=True)\n",

								    "\n",

								    "    users = test_df['user'].unique()\n",

								    "    results = []\n",

								    "    accuracy_above_50 = 0\n",

								    "\n",

								    "    for user in users:\n",

								    "        user_df = test_df[test_df['user'] == user]\n",

								    "        X, y_true = [], []\n",

								    "        user_features = user_df.drop(columns=['user']).values\n",

								    "        user_labels = user_df['user'].values\n",

								    "\n",

								    "        if len(user_df) <= sequence_length:\n",

								    "            print(f\"Skipping User {user} (not enough data for sequence length {sequence_length})\")\n",

								    "            continue\n",

								    "\n",

								    "        for i in range(len(user_df) - sequence_length):\n",

								    "            seq_x = user_features[i:i + sequence_length]\n",

								    "            seq_y = user_labels[i + sequence_length]\n",

								    "            X.append(seq_x)\n",

								    "            y_true.append(seq_y)\n",

								    "\n",

								    "        X = np.array(X)\n",

								    "        y_true = np.array(y_true)\n",

								    "\n",

								    "        if len(X) == 0:\n",

								    "            continue\n",

								    "\n",

								    "        y_pred = model.predict(X, verbose=0)\n",

								    "        y_pred_classes = np.argmax(y_pred, axis=1)\n",

								    "\n",

								    "        unique_pred, counts_pred = np.unique(y_pred_classes, return_counts=True)\n",

								    "        label_counts_pred = dict(zip(unique_pred, counts_pred))\n",

								    "\n",

								    "        unique_true, counts_true = np.unique(y_true, return_counts=True)\n",

								    "        label_counts_true = dict(zip(unique_true, counts_true))\n",

								    "\n",

								    "        acc = accuracy_score(y_true, y_pred_classes)\n",

								    "        if acc > 0.5:\n",

								    "            accuracy_above_50 += 1\n",

								    "\n",

								    "        # Append result to list\n",

								    "        results.append({\n",

								    "            'User': user,\n",

								    "            'Accuracy (%)': acc * 100,\n",

								    "            'Predicted Class Distribution': str(label_counts_pred),\n",

								    "            'Actual Class Distribution': str(label_counts_true)\n",

								    "        })\n",

								    "\n",

								    "        print(f\"\\n=== User {user} ===\")\n",

								    "        print(f\"✅ Accuracy: {acc * 100:.2f}%\")\n",

								    "        print(\"📊 Predicted Class Distribution:\", label_counts_pred)\n",

								    "        print(\"📌 Actual Class Distribution:   \", label_counts_true)\n",

								    "\n",

								    "    final_accuracy_percent = (accuracy_above_50 / 32) * 100\n",

								    "    print(f\"\\n🟩 Final Evaluation Summary for Sequence Length {sequence_length}:\")\n",

								    "    print(f\"Users with >50% Accuracy: {accuracy_above_50} / 32\")\n",

								    "    print(f\"✅ Final Success Rate: {final_accuracy_percent:.2f}%\")\n",

								    "\n",

								    "    # Append overall stats as a new row\n",

								    "    results.append({\n",

								    "        'User': 'TOTAL',\n",

								    "        'Accuracy (%)': '',\n",

								    "        'Predicted Class Distribution': f'Users >50% Acc: {accuracy_above_50}/32',\n",

								    "        'Actual Class Distribution': f'Success Rate: {final_accuracy_percent:.2f}%'\n",

								    "    })\n",

								    "\n",

								    "    # Save results to Excel sheet\n",

								    "    df_results = pd.DataFrame(results)\n",

								    "    df_results.to_excel(excel_writer, sheet_name=f\"SeqLen_{sequence_length}\", index=False)\n"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": 6,

								   "metadata": {

								    "collapsed": true,

								    "execution": {

								     "iopub.execute_input": "2025-05-02T08:56:14.082755Z",

								     "iopub.status.busy": "2025-05-02T08:56:14.082010Z",

								     "iopub.status.idle": "2025-05-02T08:56:28.518300Z",

								     "shell.execute_reply": "2025-05-02T08:56:28.517562Z",

								     "shell.execute_reply.started": "2025-05-02T08:56:14.082721Z"

								    },

								    "jupyter": {

								     "outputs_hidden": true

								    },

								    "trusted": true

								   },

								   "outputs": [

								    {

								     "name": "stdout",

								     "output_type": "stream",

								     "text": [

								      "\n",

								      "=== Testing Scenario Setup ===\n",

								      "⚠️ Only January and February of 2020 were used for testing in predefined setup.\n",

								      "⚠️ Avoid using 2020 data after February due to COVID-19 impact.\n",

								      "\n"

								     ]

								    },

								    {

								     "name": "stdout",

								     "output_type": "stream",

								     "text": [

								      "Enter test years (comma-separated, e.g., 2020):  2020\n",

								      "Enter months for year 2020 (comma-separated, e.g., 1,2):  1,2\n"

								     ]

								    },

								    {

								     "name": "stdout",

								     "output_type": "stream",

								     "text": [

								      "\n",

								      "🔍 Testing Model for Sequence Length: 20\n",

								      "\n",

								      "🧪 Evaluating on Test Data...\n",

								      "\n",

								      "=== User 0 ===\n",

								      "✅ Accuracy: 47.50%\n",

								      "📊 Predicted Class Distribution: {0: 19, 18: 9, 24: 7, 26: 1, 30: 3, 31: 1}\n",

								      "📌 Actual Class Distribution:    {0: 40}\n",

								      "\n",

								      "=== User 1 ===\n",

								      "✅ Accuracy: 82.50%\n",

								      "📊 Predicted Class Distribution: {1: 33, 31: 7}\n",

								      "📌 Actual Class Distribution:    {1: 40}\n",

								      "\n",

								      "=== User 2 ===\n",

								      "✅ Accuracy: 0.00%\n",

								      "📊 Predicted Class Distribution: {6: 2, 12: 12, 17: 13, 30: 12, 31: 1}\n",

								      "📌 Actual Class Distribution:    {2: 40}\n",

								      "\n",

								      "=== User 3 ===\n",

								      "✅ Accuracy: 41.03%\n",

								      "📊 Predicted Class Distribution: {3: 16, 6: 1, 12: 8, 29: 13, 30: 1}\n",

								      "📌 Actual Class Distribution:    {3: 39}\n",

								      "\n",

								      "=== User 4 ===\n",

								      "✅ Accuracy: 2.50%\n",

								      "📊 Predicted Class Distribution: {2: 1, 4: 1, 8: 2, 9: 3, 18: 11, 23: 3, 26: 16, 29: 1, 30: 1, 31: 1}\n",

								      "📌 Actual Class Distribution:    {4: 40}\n",

								      "\n",

								      "=== User 5 ===\n",

								      "✅ Accuracy: 57.50%\n",

								      "📊 Predicted Class Distribution: {2: 5, 5: 23, 23: 2, 29: 6, 30: 3, 31: 1}\n",

								      "📌 Actual Class Distribution:    {5: 40}\n",

								      "\n",

								      "=== User 6 ===\n",

								      "✅ Accuracy: 25.00%\n",

								      "📊 Predicted Class Distribution: {6: 10, 17: 1, 30: 5, 31: 24}\n",

								      "📌 Actual Class Distribution:    {6: 40}\n",

								      "\n",

								      "=== User 7 ===\n",

								      "✅ Accuracy: 52.50%\n",

								      "📊 Predicted Class Distribution: {7: 21, 10: 3, 11: 14, 18: 2}\n",

								      "📌 Actual Class Distribution:    {7: 40}\n",

								      "\n",

								      "=== User 8 ===\n",

								      "✅ Accuracy: 62.50%\n",

								      "📊 Predicted Class Distribution: {8: 25, 23: 1, 29: 8, 30: 6}\n",

								      "📌 Actual Class Distribution:    {8: 40}\n",

								      "\n",

								      "=== User 9 ===\n",

								      "✅ Accuracy: 100.00%\n",

								      "📊 Predicted Class Distribution: {9: 40}\n",

								      "📌 Actual Class Distribution:    {9: 40}\n",

								      "\n",

								      "=== User 10 ===\n",

								      "✅ Accuracy: 57.50%\n",

								      "📊 Predicted Class Distribution: {10: 23, 11: 15, 30: 2}\n",

								      "📌 Actual Class Distribution:    {10: 40}\n",

								      "\n",

								      "=== User 11 ===\n",

								      "✅ Accuracy: 35.00%\n",

								      "📊 Predicted Class Distribution: {1: 1, 10: 15, 11: 14, 12: 1, 14: 4, 15: 2, 16: 2, 25: 1}\n",

								      "📌 Actual Class Distribution:    {11: 40}\n",

								      "\n",

								      "=== User 12 ===\n",

								      "✅ Accuracy: 62.50%\n",

								      "📊 Predicted Class Distribution: {3: 1, 12: 25, 26: 14}\n",

								      "📌 Actual Class Distribution:    {12: 40}\n",

								      "\n",

								      "=== User 13 ===\n",

								      "✅ Accuracy: 55.00%\n",

								      "📊 Predicted Class Distribution: {10: 3, 11: 3, 12: 2, 13: 22, 16: 1, 21: 9}\n",

								      "📌 Actual Class Distribution:    {13: 40}\n",

								      "\n",

								      "=== User 14 ===\n",

								      "✅ Accuracy: 70.00%\n",

								      "📊 Predicted Class Distribution: {0: 1, 14: 28, 16: 2, 18: 7, 25: 2}\n",

								      "📌 Actual Class Distribution:    {14: 40}\n",

								      "\n",

								      "=== User 15 ===\n",

								      "✅ Accuracy: 100.00%\n",

								      "📊 Predicted Class Distribution: {15: 40}\n",

								      "📌 Actual Class Distribution:    {15: 40}\n",

								      "\n",

								      "=== User 16 ===\n",

								      "✅ Accuracy: 17.50%\n",

								      "📊 Predicted Class Distribution: {15: 20, 16: 7, 18: 13}\n",

								      "📌 Actual Class Distribution:    {16: 40}\n",

								      "\n",

								      "=== User 17 ===\n",

								      "✅ Accuracy: 40.00%\n",

								      "📊 Predicted Class Distribution: {0: 2, 16: 6, 17: 16, 18: 1, 28: 1, 31: 14}\n",

								      "📌 Actual Class Distribution:    {17: 40}\n",

								      "\n",

								      "=== User 18 ===\n",

								      "✅ Accuracy: 97.50%\n",

								      "📊 Predicted Class Distribution: {0: 1, 18: 39}\n",

								      "📌 Actual Class Distribution:    {18: 40}\n",

								      "\n",

								      "=== User 19 ===\n",

								      "✅ Accuracy: 72.50%\n",

								      "📊 Predicted Class Distribution: {1: 3, 6: 7, 19: 29, 22: 1}\n",

								      "📌 Actual Class Distribution:    {19: 40}\n",

								      "\n",

								      "=== User 20 ===\n",

								      "✅ Accuracy: 77.50%\n",

								      "📊 Predicted Class Distribution: {2: 8, 20: 31, 26: 1}\n",

								      "📌 Actual Class Distribution:    {20: 40}\n",

								      "\n",

								      "=== User 21 ===\n",

								      "✅ Accuracy: 92.50%\n",

								      "📊 Predicted Class Distribution: {21: 37, 24: 3}\n",

								      "📌 Actual Class Distribution:    {21: 40}\n",

								      "\n",

								      "=== User 22 ===\n",

								      "✅ Accuracy: 0.00%\n",

								      "📊 Predicted Class Distribution: {8: 4, 9: 2, 23: 1, 29: 27, 30: 1}\n",

								      "📌 Actual Class Distribution:    {22: 35}\n",

								      "\n",

								      "=== User 23 ===\n",

								      "✅ Accuracy: 77.50%\n",

								      "📊 Predicted Class Distribution: {3: 9, 23: 31}\n",

								      "📌 Actual Class Distribution:    {23: 40}\n",

								      "\n",

								      "=== User 24 ===\n",

								      "✅ Accuracy: 92.50%\n",

								      "📊 Predicted Class Distribution: {21: 3, 24: 37}\n",

								      "📌 Actual Class Distribution:    {24: 40}\n",

								      "\n",

								      "=== User 25 ===\n",

								      "✅ Accuracy: 2.50%\n",

								      "📊 Predicted Class Distribution: {2: 14, 12: 11, 23: 1, 25: 1, 29: 4, 30: 9}\n",

								      "📌 Actual Class Distribution:    {25: 40}\n",

								      "\n",

								      "=== User 26 ===\n",

								      "✅ Accuracy: 0.00%\n",

								      "📊 Predicted Class Distribution: {12: 18, 18: 3, 21: 13, 24: 6}\n",

								      "📌 Actual Class Distribution:    {26: 40}\n",

								      "\n",

								      "=== User 27 ===\n",

								      "✅ Accuracy: 0.00%\n",

								      "📊 Predicted Class Distribution: {12: 38, 21: 1, 24: 1}\n",

								      "📌 Actual Class Distribution:    {27: 40}\n",

								      "\n",

								      "=== User 28 ===\n",

								      "✅ Accuracy: 100.00%\n",

								      "📊 Predicted Class Distribution: {28: 40}\n",

								      "📌 Actual Class Distribution:    {28: 40}\n",

								      "\n",

								      "=== User 29 ===\n",

								      "✅ Accuracy: 40.00%\n",

								      "📊 Predicted Class Distribution: {12: 12, 26: 1, 29: 16, 30: 11}\n",

								      "📌 Actual Class Distribution:    {29: 40}\n",

								      "\n",

								      "=== User 30 ===\n",

								      "✅ Accuracy: 35.00%\n",

								      "📊 Predicted Class Distribution: {12: 1, 18: 9, 23: 5, 25: 3, 26: 3, 29: 2, 30: 14, 31: 3}\n",

								      "📌 Actual Class Distribution:    {30: 40}\n",

								      "\n",

								      "=== User 31 ===\n",

								      "✅ Accuracy: 50.00%\n",

								      "📊 Predicted Class Distribution: {12: 2, 18: 18, 31: 20}\n",

								      "📌 Actual Class Distribution:    {31: 40}\n",

								      "\n",

								      "🟩 Final Evaluation Summary for Sequence Length 20:\n",

								      "Users with >50% Accuracy: 17 / 32\n",

								      "✅ Final Success Rate: 53.12%\n",

								      "\n",

								      "🔍 Testing Model for Sequence Length: 25\n",

								      "\n",

								      "🧪 Evaluating on Test Data...\n",

								      "\n",

								      "=== User 0 ===\n",

								      "✅ Accuracy: 17.14%\n",

								      "📊 Predicted Class Distribution: {0: 6, 18: 2, 24: 3, 25: 2, 26: 14, 30: 7, 31: 1}\n",

								      "📌 Actual Class Distribution:    {0: 35}\n",

								      "\n",

								      "=== User 1 ===\n",

								      "✅ Accuracy: 8.57%\n",

								      "📊 Predicted Class Distribution: {1: 3, 31: 32}\n",

								      "📌 Actual Class Distribution:    {1: 35}\n",

								      "\n",

								      "=== User 2 ===\n",

								      "✅ Accuracy: 5.71%\n",

								      "📊 Predicted Class Distribution: {2: 2, 12: 5, 17: 11, 21: 1, 30: 3, 31: 13}\n",

								      "📌 Actual Class Distribution:    {2: 35}\n",

								      "\n",

								      "=== User 3 ===\n",

								      "✅ Accuracy: 14.71%\n",

								      "📊 Predicted Class Distribution: {3: 5, 12: 1, 29: 5, 30: 16, 31: 7}\n",

								      "📌 Actual Class Distribution:    {3: 34}\n",

								      "\n",

								      "=== User 4 ===\n",

								      "✅ Accuracy: 0.00%\n",

								      "📊 Predicted Class Distribution: {2: 4, 9: 4, 10: 1, 25: 7, 26: 5, 27: 1, 30: 12, 31: 1}\n",

								      "📌 Actual Class Distribution:    {4: 35}\n",

								      "\n",

								      "=== User 5 ===\n",

								      "✅ Accuracy: 100.00%\n",

								      "📊 Predicted Class Distribution: {5: 35}\n",

								      "📌 Actual Class Distribution:    {5: 35}\n",

								      "\n",

								      "=== User 6 ===\n",

								      "✅ Accuracy: 31.43%\n",

								      "📊 Predicted Class Distribution: {6: 11, 31: 24}\n",

								      "📌 Actual Class Distribution:    {6: 35}\n",

								      "\n",

								      "=== User 7 ===\n",

								      "✅ Accuracy: 65.71%\n",

								      "📊 Predicted Class Distribution: {7: 23, 10: 3, 13: 9}\n",

								      "📌 Actual Class Distribution:    {7: 35}\n",

								      "\n",

								      "=== User 8 ===\n",

								      "✅ Accuracy: 82.86%\n",

								      "📊 Predicted Class Distribution: {4: 2, 8: 29, 22: 2, 30: 2}\n",

								      "📌 Actual Class Distribution:    {8: 35}\n",

								      "\n",

								      "=== User 9 ===\n",

								      "✅ Accuracy: 97.14%\n",

								      "📊 Predicted Class Distribution: {4: 1, 9: 34}\n",

								      "📌 Actual Class Distribution:    {9: 35}\n",

								      "\n",

								      "=== User 10 ===\n",

								      "✅ Accuracy: 40.00%\n",

								      "📊 Predicted Class Distribution: {10: 14, 13: 6, 23: 3, 25: 2, 30: 10}\n",

								      "📌 Actual Class Distribution:    {10: 35}\n",

								      "\n",

								      "=== User 11 ===\n",

								      "✅ Accuracy: 31.43%\n",

								      "📊 Predicted Class Distribution: {10: 22, 11: 11, 12: 1, 19: 1}\n",

								      "📌 Actual Class Distribution:    {11: 35}\n",

								      "\n",

								      "=== User 12 ===\n",

								      "✅ Accuracy: 57.14%\n",

								      "📊 Predicted Class Distribution: {12: 20, 29: 15}\n",

								      "📌 Actual Class Distribution:    {12: 35}\n",

								      "\n",

								      "=== User 13 ===\n",

								      "✅ Accuracy: 57.14%\n",

								      "📊 Predicted Class Distribution: {12: 1, 13: 20, 21: 14}\n",

								      "📌 Actual Class Distribution:    {13: 35}\n",

								      "\n",

								      "=== User 14 ===\n",

								      "✅ Accuracy: 62.86%\n",

								      "📊 Predicted Class Distribution: {0: 4, 14: 22, 15: 2, 18: 7}\n",

								      "📌 Actual Class Distribution:    {14: 35}\n",

								      "\n",

								      "=== User 15 ===\n",

								      "✅ Accuracy: 100.00%\n",

								      "📊 Predicted Class Distribution: {15: 35}\n",

								      "📌 Actual Class Distribution:    {15: 35}\n",

								      "\n",

								      "=== User 16 ===\n",

								      "✅ Accuracy: 40.00%\n",

								      "📊 Predicted Class Distribution: {7: 2, 15: 13, 16: 14, 18: 6}\n",

								      "📌 Actual Class Distribution:    {16: 35}\n",

								      "\n",

								      "=== User 17 ===\n",

								      "✅ Accuracy: 65.71%\n",

								      "📊 Predicted Class Distribution: {0: 1, 16: 11, 17: 23}\n",

								      "📌 Actual Class Distribution:    {17: 35}\n",

								      "\n",

								      "=== User 18 ===\n",

								      "✅ Accuracy: 82.86%\n",

								      "📊 Predicted Class Distribution: {0: 6, 18: 29}\n",

								      "📌 Actual Class Distribution:    {18: 35}\n",

								      "\n",

								      "=== User 19 ===\n",

								      "✅ Accuracy: 60.00%\n",

								      "📊 Predicted Class Distribution: {6: 13, 19: 21, 22: 1}\n",

								      "📌 Actual Class Distribution:    {19: 35}\n",

								      "\n",

								      "=== User 20 ===\n",

								      "✅ Accuracy: 5.71%\n",

								      "📊 Predicted Class Distribution: {2: 33, 20: 2}\n",

								      "📌 Actual Class Distribution:    {20: 35}\n",

								      "\n",

								      "=== User 21 ===\n",

								      "✅ Accuracy: 100.00%\n",

								      "📊 Predicted Class Distribution: {21: 35}\n",

								      "📌 Actual Class Distribution:    {21: 35}\n",

								      "\n",

								      "=== User 22 ===\n",

								      "✅ Accuracy: 0.00%\n",

								      "📊 Predicted Class Distribution: {8: 2, 9: 2, 29: 26}\n",

								      "📌 Actual Class Distribution:    {22: 30}\n",

								      "\n",

								      "=== User 23 ===\n",

								      "✅ Accuracy: 65.71%\n",

								      "📊 Predicted Class Distribution: {3: 4, 23: 23, 30: 8}\n",

								      "📌 Actual Class Distribution:    {23: 35}\n",

								      "\n",

								      "=== User 24 ===\n",

								      "✅ Accuracy: 100.00%\n",

								      "📊 Predicted Class Distribution: {24: 35}\n",

								      "📌 Actual Class Distribution:    {24: 35}\n",

								      "\n",

								      "=== User 25 ===\n",

								      "✅ Accuracy: 0.00%\n",

								      "📊 Predicted Class Distribution: {2: 33, 12: 1, 30: 1}\n",

								      "📌 Actual Class Distribution:    {25: 35}\n",

								      "\n",

								      "=== User 26 ===\n",

								      "✅ Accuracy: 0.00%\n",

								      "📊 Predicted Class Distribution: {12: 29, 21: 6}\n",

								      "📌 Actual Class Distribution:    {26: 35}\n",

								      "\n",

								      "=== User 27 ===\n",

								      "✅ Accuracy: 0.00%\n",

								      "📊 Predicted Class Distribution: {12: 35}\n",

								      "📌 Actual Class Distribution:    {27: 35}\n",

								      "\n",

								      "=== User 28 ===\n",

								      "✅ Accuracy: 100.00%\n",

								      "📊 Predicted Class Distribution: {28: 35}\n",

								      "📌 Actual Class Distribution:    {28: 35}\n",

								      "\n",

								      "=== User 29 ===\n",

								      "✅ Accuracy: 28.57%\n",

								      "📊 Predicted Class Distribution: {2: 1, 12: 2, 26: 8, 29: 10, 30: 14}\n",

								      "📌 Actual Class Distribution:    {29: 35}\n",

								      "\n",

								      "=== User 30 ===\n",

								      "✅ Accuracy: 34.29%\n",

								      "📊 Predicted Class Distribution: {2: 4, 26: 2, 27: 4, 29: 13, 30: 12}\n",

								      "📌 Actual Class Distribution:    {30: 35}\n",

								      "\n",

								      "=== User 31 ===\n",

								      "✅ Accuracy: 60.00%\n",

								      "📊 Predicted Class Distribution: {12: 1, 16: 1, 18: 12, 31: 21}\n",

								      "📌 Actual Class Distribution:    {31: 35}\n",

								      "\n",

								      "🟩 Final Evaluation Summary for Sequence Length 25:\n",

								      "Users with >50% Accuracy: 16 / 32\n",

								      "✅ Final Success Rate: 50.00%\n",

								      "\n",

								      "✅ All evaluations completed. Results saved to: /kaggle/working/evaluation_results.xlsx\n"

								     ]

								    }

								   ],

								   "source": [

								    "from pandas import ExcelWriter\n",

								    "\n",

								    "# === Run evaluation for each trained sequence length ===\n",

								    "test_scenario = get_user_input_for_test()\n",

								    "test_data = filter_test_data(df, test_scenario)\n",

								    "\n",

								    "output_excel_path = \"/kaggle/working/evaluation_results.xlsx\"\n",

								    "\n",

								    "with ExcelWriter(output_excel_path) as writer:\n",

								    "    for sequence_length, result in best_models.items():\n",

								    "        print(f\"\\n🔍 Testing Model for Sequence Length: {sequence_length}\")\n",

								    "        evaluate_model_on_test_data(\n",

								    "            result['model'],\n",

								    "            test_data.copy(),\n",

								    "            sequence_length,\n",

								    "            writer  # 👈 pass the writer\n",

								    "        )\n",

								    "\n",

								    "print(f\"\\n✅ All evaluations completed. Results saved to: {output_excel_path}\")\n"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "metadata": {

								    "trusted": true

								   },

								   "outputs": [],

								   "source": [

								    "\n",

								    "\n",

								    "\n",

								    "\n"

								   ]

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "metadata": {

								    "trusted": true

								   },

								   "outputs": [],

								   "source": []

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "metadata": {

								    "trusted": true

								   },

								   "outputs": [],

								   "source": []

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "metadata": {

								    "trusted": true

								   },

								   "outputs": [],

								   "source": []

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "metadata": {

								    "trusted": true

								   },

								   "outputs": [],

								   "source": []

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "metadata": {

								    "trusted": true

								   },

								   "outputs": [],

								   "source": []

								  },

								  {

								   "cell_type": "code",

								   "execution_count": null,

								   "metadata": {

								    "trusted": true

								   },

								   "outputs": [],

								   "source": [

								    "\n",

								    "# # === Evaluation function (your version) ===\n",

								    "# def evaluate_model_on_test_data(model, test_df, sequence_length):\n",

								    "#     print(\"\\n🧪 Evaluating on Test Data...\")\n",

								    "#     test_df = test_df.drop(columns=['Month', 'Year', 'date', 'DayOfWeek'])\n",

								    "#     test_df = test_df.sort_values(by='user').reset_index(drop=True)\n",

								    "\n",

								    "#     users = test_df['user'].unique()\n",

								    "#     results = {}\n",

								    "#     accuracy_above_50 = 0\n",

								    "\n",

								    "#     for user in users:\n",

								    "#         user_df = test_df[test_df['user'] == user]\n",

								    "#         X, y_true = [], []\n",

								    "#         user_features = user_df.drop(columns=['user']).values\n",

								    "#         user_labels = user_df['user'].values\n",

								    "\n",

								    "#         if len(user_df) <= sequence_length:\n",

								    "#             print(f\"Skipping User {user} (not enough data for sequence length {sequence_length})\")\n",

								    "#             continue\n",

								    "\n",

								    "#         for i in range(len(user_df) - sequence_length):\n",

								    "#             seq_x = user_features[i:i + sequence_length]\n",

								    "#             seq_y = user_labels[i + sequence_length]\n",

								    "#             X.append(seq_x)\n",

								    "#             y_true.append(seq_y)\n",

								    "\n",

								    "#         X = np.array(X)\n",

								    "#         y_true = np.array(y_true)\n",

								    "\n",

								    "#         if len(X) == 0:\n",

								    "#             continue\n",

								    "\n",

								    "#         y_pred = model.predict(X, verbose=0)\n",

								    "#         y_pred_classes = np.argmax(y_pred, axis=1)\n",

								    "\n",

								    "#         unique_pred, counts_pred = np.unique(y_pred_classes, return_counts=True)\n",

								    "#         label_counts_pred = dict(zip(unique_pred, counts_pred))\n",

								    "\n",

								    "#         unique_true, counts_true = np.unique(y_true, return_counts=True)\n",

								    "#         label_counts_true = dict(zip(unique_true, counts_true))\n",

								    "\n",

								    "#         acc = accuracy_score(y_true, y_pred_classes)\n",

								    "#         if acc > 0.5:\n",

								    "#             accuracy_above_50 += 1\n",

								    "\n",

								    "#         results[user] = {\n",

								    "#             'accuracy': acc,\n",

								    "#             'predicted_counts': label_counts_pred,\n",

								    "#             'actual_counts': label_counts_true\n",

								    "#         }\n",

								    "\n",

								    "#         print(f\"\\n=== User {user} ===\")\n",

								    "#         print(f\"✅ Accuracy: {acc * 100:.2f}%\")\n",

								    "#         print(\"📊 Predicted Class Distribution:\", label_counts_pred)\n",

								    "#         print(\"📌 Actual Class Distribution:   \", label_counts_true)\n",

								    "\n",

								    "#     final_accuracy_percent = (accuracy_above_50 / 32) * 100\n",

								    "#     print(f\"\\n🟩 Final Evaluation Summary for Sequence Length {sequence_length}:\")\n",

								    "#     print(f\"Users with >50% Accuracy: {accuracy_above_50} / 32\")\n",

								    "#     print(f\"✅ Final Success Rate: {final_accuracy_percent:.2f}%\")\n",

								    "\n",

								    "# # === Run evaluation for each trained sequence length ===\n",

								    "# test_scenario = get_user_input_for_test()\n",

								    "# test_data = filter_test_data(df, test_scenario)\n",

								    "\n",

								    "# for sequence_length, result in best_models.items():\n",

								    "#     print(f\"\\n🔍 Testing Model for Sequence Length: {sequence_length}\")\n",

								    "#     evaluate_model_on_test_data(result['model'], test_data.copy(), sequence_length)\n",

								    "\n",

								    "# print(\"\\n✅ All evaluations completed.\")\n"

								   ]

								  }

								 ],

								 "metadata": {

								  "kaggle": {

								   "accelerator": "nvidiaTeslaT4",

								   "dataSources": [

								    {

								     "datasetId": 5775075,

								     "sourceId": 9494285,

								     "sourceType": "datasetVersion"

								    }

								   ],

								   "dockerImageVersionId": 31011,

								   "isGpuEnabled": true,

								   "isInternetEnabled": true,

								   "language": "python",

								   "sourceType": "notebook"

								  },

								  "kernelspec": {

								   "display_name": ".venv",

								   "language": "python",

								   "name": "python3"

								  },

								  "language_info": {

								   "codemirror_mode": {

								    "name": "ipython",

								    "version": 3

								   },

								   "file_extension": ".py",

								   "mimetype": "text/x-python",

								   "name": "python",

								   "nbconvert_exporter": "python",

								   "pygments_lexer": "ipython3",

								   "version": "3.10.18"

								  }

								 },

								 "nbformat": 4,

								 "nbformat_minor": 4

								}