You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
987 lines
37 KiB
987 lines
37 KiB
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {
|
|
"_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
|
|
"_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
|
|
"trusted": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# This Python 3 environment comes with many helpful analytics libraries installed\n",
|
|
"# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python\n",
|
|
"# For example, here's several helpful packages to load\n",
|
|
"\n",
|
|
"import numpy as np # linear algebra\n",
|
|
"import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n",
|
|
"\n",
|
|
"# Input data files are available in the read-only \"../input/\" directory\n",
|
|
"# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n",
|
|
"\n",
|
|
"import os\n",
|
|
"for dirname, _, filenames in os.walk('/kaggle/input'):\n",
|
|
" for filename in filenames:\n",
|
|
" print(os.path.join(dirname, filename))\n",
|
|
"\n",
|
|
"# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\" \n",
|
|
"# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {
|
|
"collapsed": true,
|
|
"execution": {
|
|
"iopub.execute_input": "2025-05-02T07:51:57.538752Z",
|
|
"iopub.status.busy": "2025-05-02T07:51:57.538555Z",
|
|
"iopub.status.idle": "2025-05-02T08:46:51.909800Z",
|
|
"shell.execute_reply": "2025-05-02T08:46:51.909147Z",
|
|
"shell.execute_reply.started": "2025-05-02T07:51:57.538734Z"
|
|
},
|
|
"jupyter": {
|
|
"outputs_hidden": true
|
|
},
|
|
"trusted": true
|
|
},
|
|
"outputs": [
|
|
{
|
|
"ename": "ModuleNotFoundError",
|
|
"evalue": "No module named 'sklearn'",
|
|
"output_type": "error",
|
|
"traceback": [
|
|
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
|
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
|
|
"Cell \u001b[0;32mIn[6], line 11\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mkeras_tuner\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mkt\u001b[39;00m\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mkeras_tuner\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m RandomSearch\n\u001b[0;32m---> 11\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01msklearn\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmetrics\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m accuracy_score\n\u001b[1;32m 13\u001b[0m \u001b[38;5;66;03m# === Clean previous tuning directory ===\u001b[39;00m\n\u001b[1;32m 14\u001b[0m shutil\u001b[38;5;241m.\u001b[39mrmtree(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m/kaggle/working/my_dir\u001b[39m\u001b[38;5;124m\"\u001b[39m, ignore_errors\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n",
|
|
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'sklearn'"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"import numpy as np\n",
|
|
"import pandas as pd\n",
|
|
"import shutil\n",
|
|
"import os\n",
|
|
"from tensorflow.keras.models import Sequential\n",
|
|
"from tensorflow.keras.layers import LSTM, Dense, Dropout,GRU,Bidirectional\n",
|
|
"from tensorflow.keras.optimizers import Adam\n",
|
|
"from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping\n",
|
|
"import keras_tuner as kt\n",
|
|
"from keras_tuner import RandomSearch\n",
|
|
"from sklearn.metrics import accuracy_score\n",
|
|
"\n",
|
|
"# === Clean previous tuning directory ===\n",
|
|
"shutil.rmtree(\"/kaggle/working/my_dir\", ignore_errors=True)\n",
|
|
"\n",
|
|
"# === Load dataset ===\n",
|
|
"file_path = '/kaggle/input/32usrs/ALLUSERS32_15MIN_WITHOUTTHREHOLD.xlsx' \n",
|
|
"\n",
|
|
"df = pd.read_excel(file_path)\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"trusted": true
|
|
},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"trusted": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# === Helper functions for scenario selection ===\n",
|
|
"def get_user_input_for_scenario(scenario_type):\n",
|
|
" print(f\"\\nPlease define your custom {scenario_type} scenario:\")\n",
|
|
" years_input = input(f\"Enter {scenario_type} years (comma-separated, e.g., 2017,2018): \").strip()\n",
|
|
" years = list(map(int, years_input.split(',')))\n",
|
|
" years_months = []\n",
|
|
" for year in years:\n",
|
|
" months_input = input(f\"Enter months for year {year} (comma-separated, e.g., 1,2,3): \").strip()\n",
|
|
" months = list(map(int, months_input.split(',')))\n",
|
|
" years_months.append((year, months))\n",
|
|
" return years_months\n",
|
|
"\n",
|
|
"def display_warning_about_2020_data():\n",
|
|
" print(\"\\n⚠️ Warning: 2020 data after February is excluded due to COVID-19.\")\n",
|
|
" print(\"✅ Only Jan and Feb 2020 are used for testing. Do not use them in training/validation.\")\n",
|
|
"\n",
|
|
"def display_warnings_for_scenarios(scenario_type):\n",
|
|
" if scenario_type == \"training\":\n",
|
|
" print(\"\\n⚠️ Predefined Training Scenarios (for reference only):\")\n",
|
|
" for name, scenario in predefined_training_scenarios.items():\n",
|
|
" parts = [f\"{year}-{months}\" for year, months in scenario['years_months']]\n",
|
|
" print(f\" {name}: {', '.join(parts)}\")\n",
|
|
" elif scenario_type == \"validation\":\n",
|
|
" print(\"\\n⚠️ Predefined Validation Scenario:\")\n",
|
|
" for name, scenario in predefined_validation_scenarios.items():\n",
|
|
" parts = [f\"{year}-{months}\" for year, months in scenario['years_months']]\n",
|
|
" print(f\" {name}: {', '.join(parts)}\")\n",
|
|
" print(\" - This uses Oct, Nov, Dec of 2019\")\n",
|
|
"\n",
|
|
"predefined_training_scenarios = {\n",
|
|
" \"Scenario 1\": {\"years_months\": [(2018, list(range(1, 13))), (2019, list(range(1, 10)))]},\n",
|
|
" \"Scenario 2\": {\"years_months\": [(2017, list(range(1, 13))), (2018, list(range(1, 13))), (2019, list(range(1, 10)))]}\n",
|
|
"}\n",
|
|
"predefined_validation_scenarios = {\n",
|
|
" \"Scenario A\": {\"years_months\": [(2019, [10, 11, 12])]}\n",
|
|
"}\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"trusted": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# === Get user-defined training and validation scenarios ===\n",
|
|
"print(\"=== Training Scenario Setup ===\")\n",
|
|
"display_warning_about_2020_data()\n",
|
|
"display_warnings_for_scenarios(\"training\")\n",
|
|
"training_scenario = get_user_input_for_scenario(\"training\")\n",
|
|
"\n",
|
|
"print(\"\\n=== Validation Scenario Setup ===\")\n",
|
|
"display_warning_about_2020_data()\n",
|
|
"display_warnings_for_scenarios(\"validation\")\n",
|
|
"validation_scenario = get_user_input_for_scenario(\"validation\")\n",
|
|
"\n",
|
|
"# === Filter and preprocess data ===\n",
|
|
"def filter_data(df, scenario):\n",
|
|
" filtered = pd.DataFrame()\n",
|
|
" for year, months in scenario:\n",
|
|
" filtered = pd.concat([filtered, df[(df['Year'] == year) & (df['Month'].isin(months))]])\n",
|
|
" return filtered.drop(columns=['Month', 'Year', 'date', 'DayOfWeek']) \n",
|
|
"\n",
|
|
"data = filter_data(df, training_scenario)\n",
|
|
"data_val = filter_data(df, validation_scenario)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"trusted": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"\n",
|
|
"\n",
|
|
"# === Organize by user ===\n",
|
|
"df_sorted = data.sort_values(by='user').reset_index(drop=True)\n",
|
|
"df_sorted_val = data_val.sort_values(by='user').reset_index(drop=True)\n",
|
|
"users = df_sorted['user'].unique()\n",
|
|
"users_val = df_sorted_val['user'].unique()\n",
|
|
"\n",
|
|
"user_data = {user: df_sorted[df_sorted['user'] == user] for user in users}\n",
|
|
"user_data_val = {user: df_sorted_val[df_sorted_val['user'] == user] for user in users_val}\n",
|
|
"\n",
|
|
"# === Callbacks ===\n",
|
|
"early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)\n",
|
|
"lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, verbose=1)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"trusted": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# === Model tuning and training loop ===\n",
|
|
"best_models = {}\n",
|
|
"\n",
|
|
"for sequence_length in range(20, 30, 5):\n",
|
|
" print(f\"\\n=== Training for Sequence Length: {sequence_length} ===\")\n",
|
|
"\n",
|
|
" # Training data\n",
|
|
" X, y = [], []\n",
|
|
" for user, data in user_data.items():\n",
|
|
" features = data.drop('user', axis=1).values\n",
|
|
" labels = data['user'].values\n",
|
|
" for i in range(len(features) - sequence_length):\n",
|
|
" X.append(features[i:i + sequence_length])\n",
|
|
" y.append(labels[i + sequence_length])\n",
|
|
" X = np.array(X)\n",
|
|
" y = np.array(y)\n",
|
|
"\n",
|
|
" # Validation data\n",
|
|
" X_val, y_val = [], []\n",
|
|
" for user, data in user_data_val.items():\n",
|
|
" features = data.drop('user', axis=1).values\n",
|
|
" labels = data['user'].values\n",
|
|
" for i in range(len(features) - sequence_length):\n",
|
|
" X_val.append(features[i:i + sequence_length])\n",
|
|
" y_val.append(labels[i + sequence_length])\n",
|
|
" X_val = np.array(X_val)\n",
|
|
" y_val = np.array(y_val)\n",
|
|
"\n",
|
|
" if X.shape[0] == 0 or X_val.shape[0] == 0:\n",
|
|
" print(f\"⚠️ Skipped sequence length {sequence_length} due to insufficient data.\")\n",
|
|
" continue\n",
|
|
"\n",
|
|
" n_features = X.shape[2]\n",
|
|
"\n",
|
|
" def build_model(hp):\n",
|
|
" model = Sequential()\n",
|
|
" model.add(Bidirectional(LSTM(units=hp.Int('units', 32, 256, step=2),\n",
|
|
" input_shape=(sequence_length, n_features))))\n",
|
|
" model.add(Dropout(hp.Float('dropout_rate', 0.1, 0.5, step=0.1)))\n",
|
|
" model.add(Dense(len(users), activation='softmax'))\n",
|
|
" model.compile(\n",
|
|
" optimizer=Adam(learning_rate=hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])),\n",
|
|
" loss='sparse_categorical_crossentropy',\n",
|
|
" metrics=['accuracy']\n",
|
|
" )\n",
|
|
" return model\n",
|
|
"\n",
|
|
" tuner = RandomSearch(\n",
|
|
" build_model,\n",
|
|
" objective='val_loss',\n",
|
|
" max_trials=30,\n",
|
|
" executions_per_trial=2,\n",
|
|
" directory='/kaggle/working/my_dir',\n",
|
|
" project_name=f'lstm_seq_{sequence_length}'\n",
|
|
" )\n",
|
|
"\n",
|
|
" tuner.search(X, y, epochs=30, validation_data=(X_val, y_val),\n",
|
|
" callbacks=[early_stopping, lr_scheduler], verbose=1)\n",
|
|
"\n",
|
|
" best_hps = tuner.get_best_hyperparameters(1)[0]\n",
|
|
" best_model = tuner.hypermodel.build(best_hps)\n",
|
|
" best_model.fit(X, y, epochs=30, validation_data=(X_val, y_val),\n",
|
|
" callbacks=[early_stopping, lr_scheduler], verbose=0)\n",
|
|
"\n",
|
|
" best_models[sequence_length] = {\n",
|
|
" 'model': best_model,\n",
|
|
" 'best_hyperparameters': {\n",
|
|
" 'units': best_hps.get('units'),\n",
|
|
" 'dropout_rate': best_hps.get('dropout_rate'),\n",
|
|
" 'learning_rate': best_hps.get('learning_rate')\n",
|
|
" }\n",
|
|
" }"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"trusted": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"\n",
|
|
"\n",
|
|
"# === Get test scenario input ===\n",
|
|
"def get_user_input_for_test():\n",
|
|
" print(\"\\n=== Testing Scenario Setup ===\")\n",
|
|
" print(\"⚠️ Only January and February of 2020 were used for testing in predefined setup.\")\n",
|
|
" print(\"⚠️ Avoid using 2020 data after February due to COVID-19 impact.\\n\")\n",
|
|
" years_input = input(\"Enter test years (comma-separated, e.g., 2020): \").strip()\n",
|
|
" years = list(map(int, years_input.split(',')))\n",
|
|
" years_months = []\n",
|
|
" for year in years:\n",
|
|
" months_input = input(f\"Enter months for year {year} (comma-separated, e.g., 1,2): \").strip()\n",
|
|
" months = list(map(int, months_input.split(',')))\n",
|
|
" years_months.append((year, months))\n",
|
|
" return years_months\n",
|
|
"\n",
|
|
"def filter_test_data(df, scenario):\n",
|
|
" data_parts = []\n",
|
|
" for year, months in scenario:\n",
|
|
" part = df[(df['Year'] == year) & (df['Month'].isin(months))]\n",
|
|
" data_parts.append(part)\n",
|
|
" return pd.concat(data_parts, ignore_index=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {
|
|
"execution": {
|
|
"iopub.execute_input": "2025-05-02T08:53:17.334789Z",
|
|
"iopub.status.busy": "2025-05-02T08:53:17.334489Z",
|
|
"iopub.status.idle": "2025-05-02T08:53:17.344855Z",
|
|
"shell.execute_reply": "2025-05-02T08:53:17.344176Z",
|
|
"shell.execute_reply.started": "2025-05-02T08:53:17.334766Z"
|
|
},
|
|
"trusted": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"import os\n",
|
|
"\n",
|
|
"def evaluate_model_on_test_data(model, test_df, sequence_length, excel_writer):\n",
|
|
" print(\"\\n🧪 Evaluating on Test Data...\")\n",
|
|
" test_df = test_df.drop(columns=['Month', 'Year', 'date', 'DayOfWeek'])\n",
|
|
" test_df = test_df.sort_values(by='user').reset_index(drop=True)\n",
|
|
"\n",
|
|
" users = test_df['user'].unique()\n",
|
|
" results = []\n",
|
|
" accuracy_above_50 = 0\n",
|
|
"\n",
|
|
" for user in users:\n",
|
|
" user_df = test_df[test_df['user'] == user]\n",
|
|
" X, y_true = [], []\n",
|
|
" user_features = user_df.drop(columns=['user']).values\n",
|
|
" user_labels = user_df['user'].values\n",
|
|
"\n",
|
|
" if len(user_df) <= sequence_length:\n",
|
|
" print(f\"Skipping User {user} (not enough data for sequence length {sequence_length})\")\n",
|
|
" continue\n",
|
|
"\n",
|
|
" for i in range(len(user_df) - sequence_length):\n",
|
|
" seq_x = user_features[i:i + sequence_length]\n",
|
|
" seq_y = user_labels[i + sequence_length]\n",
|
|
" X.append(seq_x)\n",
|
|
" y_true.append(seq_y)\n",
|
|
"\n",
|
|
" X = np.array(X)\n",
|
|
" y_true = np.array(y_true)\n",
|
|
"\n",
|
|
" if len(X) == 0:\n",
|
|
" continue\n",
|
|
"\n",
|
|
" y_pred = model.predict(X, verbose=0)\n",
|
|
" y_pred_classes = np.argmax(y_pred, axis=1)\n",
|
|
"\n",
|
|
" unique_pred, counts_pred = np.unique(y_pred_classes, return_counts=True)\n",
|
|
" label_counts_pred = dict(zip(unique_pred, counts_pred))\n",
|
|
"\n",
|
|
" unique_true, counts_true = np.unique(y_true, return_counts=True)\n",
|
|
" label_counts_true = dict(zip(unique_true, counts_true))\n",
|
|
"\n",
|
|
" acc = accuracy_score(y_true, y_pred_classes)\n",
|
|
" if acc > 0.5:\n",
|
|
" accuracy_above_50 += 1\n",
|
|
"\n",
|
|
" # Append result to list\n",
|
|
" results.append({\n",
|
|
" 'User': user,\n",
|
|
" 'Accuracy (%)': acc * 100,\n",
|
|
" 'Predicted Class Distribution': str(label_counts_pred),\n",
|
|
" 'Actual Class Distribution': str(label_counts_true)\n",
|
|
" })\n",
|
|
"\n",
|
|
" print(f\"\\n=== User {user} ===\")\n",
|
|
" print(f\"✅ Accuracy: {acc * 100:.2f}%\")\n",
|
|
" print(\"📊 Predicted Class Distribution:\", label_counts_pred)\n",
|
|
" print(\"📌 Actual Class Distribution: \", label_counts_true)\n",
|
|
"\n",
|
|
" final_accuracy_percent = (accuracy_above_50 / 32) * 100\n",
|
|
" print(f\"\\n🟩 Final Evaluation Summary for Sequence Length {sequence_length}:\")\n",
|
|
" print(f\"Users with >50% Accuracy: {accuracy_above_50} / 32\")\n",
|
|
" print(f\"✅ Final Success Rate: {final_accuracy_percent:.2f}%\")\n",
|
|
"\n",
|
|
" # Append overall stats as a new row\n",
|
|
" results.append({\n",
|
|
" 'User': 'TOTAL',\n",
|
|
" 'Accuracy (%)': '',\n",
|
|
" 'Predicted Class Distribution': f'Users >50% Acc: {accuracy_above_50}/32',\n",
|
|
" 'Actual Class Distribution': f'Success Rate: {final_accuracy_percent:.2f}%'\n",
|
|
" })\n",
|
|
"\n",
|
|
" # Save results to Excel sheet\n",
|
|
" df_results = pd.DataFrame(results)\n",
|
|
" df_results.to_excel(excel_writer, sheet_name=f\"SeqLen_{sequence_length}\", index=False)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {
|
|
"collapsed": true,
|
|
"execution": {
|
|
"iopub.execute_input": "2025-05-02T08:56:14.082755Z",
|
|
"iopub.status.busy": "2025-05-02T08:56:14.082010Z",
|
|
"iopub.status.idle": "2025-05-02T08:56:28.518300Z",
|
|
"shell.execute_reply": "2025-05-02T08:56:28.517562Z",
|
|
"shell.execute_reply.started": "2025-05-02T08:56:14.082721Z"
|
|
},
|
|
"jupyter": {
|
|
"outputs_hidden": true
|
|
},
|
|
"trusted": true
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\n",
|
|
"=== Testing Scenario Setup ===\n",
|
|
"⚠️ Only January and February of 2020 were used for testing in predefined setup.\n",
|
|
"⚠️ Avoid using 2020 data after February due to COVID-19 impact.\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Enter test years (comma-separated, e.g., 2020): 2020\n",
|
|
"Enter months for year 2020 (comma-separated, e.g., 1,2): 1,2\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\n",
|
|
"🔍 Testing Model for Sequence Length: 20\n",
|
|
"\n",
|
|
"🧪 Evaluating on Test Data...\n",
|
|
"\n",
|
|
"=== User 0 ===\n",
|
|
"✅ Accuracy: 47.50%\n",
|
|
"📊 Predicted Class Distribution: {0: 19, 18: 9, 24: 7, 26: 1, 30: 3, 31: 1}\n",
|
|
"📌 Actual Class Distribution: {0: 40}\n",
|
|
"\n",
|
|
"=== User 1 ===\n",
|
|
"✅ Accuracy: 82.50%\n",
|
|
"📊 Predicted Class Distribution: {1: 33, 31: 7}\n",
|
|
"📌 Actual Class Distribution: {1: 40}\n",
|
|
"\n",
|
|
"=== User 2 ===\n",
|
|
"✅ Accuracy: 0.00%\n",
|
|
"📊 Predicted Class Distribution: {6: 2, 12: 12, 17: 13, 30: 12, 31: 1}\n",
|
|
"📌 Actual Class Distribution: {2: 40}\n",
|
|
"\n",
|
|
"=== User 3 ===\n",
|
|
"✅ Accuracy: 41.03%\n",
|
|
"📊 Predicted Class Distribution: {3: 16, 6: 1, 12: 8, 29: 13, 30: 1}\n",
|
|
"📌 Actual Class Distribution: {3: 39}\n",
|
|
"\n",
|
|
"=== User 4 ===\n",
|
|
"✅ Accuracy: 2.50%\n",
|
|
"📊 Predicted Class Distribution: {2: 1, 4: 1, 8: 2, 9: 3, 18: 11, 23: 3, 26: 16, 29: 1, 30: 1, 31: 1}\n",
|
|
"📌 Actual Class Distribution: {4: 40}\n",
|
|
"\n",
|
|
"=== User 5 ===\n",
|
|
"✅ Accuracy: 57.50%\n",
|
|
"📊 Predicted Class Distribution: {2: 5, 5: 23, 23: 2, 29: 6, 30: 3, 31: 1}\n",
|
|
"📌 Actual Class Distribution: {5: 40}\n",
|
|
"\n",
|
|
"=== User 6 ===\n",
|
|
"✅ Accuracy: 25.00%\n",
|
|
"📊 Predicted Class Distribution: {6: 10, 17: 1, 30: 5, 31: 24}\n",
|
|
"📌 Actual Class Distribution: {6: 40}\n",
|
|
"\n",
|
|
"=== User 7 ===\n",
|
|
"✅ Accuracy: 52.50%\n",
|
|
"📊 Predicted Class Distribution: {7: 21, 10: 3, 11: 14, 18: 2}\n",
|
|
"📌 Actual Class Distribution: {7: 40}\n",
|
|
"\n",
|
|
"=== User 8 ===\n",
|
|
"✅ Accuracy: 62.50%\n",
|
|
"📊 Predicted Class Distribution: {8: 25, 23: 1, 29: 8, 30: 6}\n",
|
|
"📌 Actual Class Distribution: {8: 40}\n",
|
|
"\n",
|
|
"=== User 9 ===\n",
|
|
"✅ Accuracy: 100.00%\n",
|
|
"📊 Predicted Class Distribution: {9: 40}\n",
|
|
"📌 Actual Class Distribution: {9: 40}\n",
|
|
"\n",
|
|
"=== User 10 ===\n",
|
|
"✅ Accuracy: 57.50%\n",
|
|
"📊 Predicted Class Distribution: {10: 23, 11: 15, 30: 2}\n",
|
|
"📌 Actual Class Distribution: {10: 40}\n",
|
|
"\n",
|
|
"=== User 11 ===\n",
|
|
"✅ Accuracy: 35.00%\n",
|
|
"📊 Predicted Class Distribution: {1: 1, 10: 15, 11: 14, 12: 1, 14: 4, 15: 2, 16: 2, 25: 1}\n",
|
|
"📌 Actual Class Distribution: {11: 40}\n",
|
|
"\n",
|
|
"=== User 12 ===\n",
|
|
"✅ Accuracy: 62.50%\n",
|
|
"📊 Predicted Class Distribution: {3: 1, 12: 25, 26: 14}\n",
|
|
"📌 Actual Class Distribution: {12: 40}\n",
|
|
"\n",
|
|
"=== User 13 ===\n",
|
|
"✅ Accuracy: 55.00%\n",
|
|
"📊 Predicted Class Distribution: {10: 3, 11: 3, 12: 2, 13: 22, 16: 1, 21: 9}\n",
|
|
"📌 Actual Class Distribution: {13: 40}\n",
|
|
"\n",
|
|
"=== User 14 ===\n",
|
|
"✅ Accuracy: 70.00%\n",
|
|
"📊 Predicted Class Distribution: {0: 1, 14: 28, 16: 2, 18: 7, 25: 2}\n",
|
|
"📌 Actual Class Distribution: {14: 40}\n",
|
|
"\n",
|
|
"=== User 15 ===\n",
|
|
"✅ Accuracy: 100.00%\n",
|
|
"📊 Predicted Class Distribution: {15: 40}\n",
|
|
"📌 Actual Class Distribution: {15: 40}\n",
|
|
"\n",
|
|
"=== User 16 ===\n",
|
|
"✅ Accuracy: 17.50%\n",
|
|
"📊 Predicted Class Distribution: {15: 20, 16: 7, 18: 13}\n",
|
|
"📌 Actual Class Distribution: {16: 40}\n",
|
|
"\n",
|
|
"=== User 17 ===\n",
|
|
"✅ Accuracy: 40.00%\n",
|
|
"📊 Predicted Class Distribution: {0: 2, 16: 6, 17: 16, 18: 1, 28: 1, 31: 14}\n",
|
|
"📌 Actual Class Distribution: {17: 40}\n",
|
|
"\n",
|
|
"=== User 18 ===\n",
|
|
"✅ Accuracy: 97.50%\n",
|
|
"📊 Predicted Class Distribution: {0: 1, 18: 39}\n",
|
|
"📌 Actual Class Distribution: {18: 40}\n",
|
|
"\n",
|
|
"=== User 19 ===\n",
|
|
"✅ Accuracy: 72.50%\n",
|
|
"📊 Predicted Class Distribution: {1: 3, 6: 7, 19: 29, 22: 1}\n",
|
|
"📌 Actual Class Distribution: {19: 40}\n",
|
|
"\n",
|
|
"=== User 20 ===\n",
|
|
"✅ Accuracy: 77.50%\n",
|
|
"📊 Predicted Class Distribution: {2: 8, 20: 31, 26: 1}\n",
|
|
"📌 Actual Class Distribution: {20: 40}\n",
|
|
"\n",
|
|
"=== User 21 ===\n",
|
|
"✅ Accuracy: 92.50%\n",
|
|
"📊 Predicted Class Distribution: {21: 37, 24: 3}\n",
|
|
"📌 Actual Class Distribution: {21: 40}\n",
|
|
"\n",
|
|
"=== User 22 ===\n",
|
|
"✅ Accuracy: 0.00%\n",
|
|
"📊 Predicted Class Distribution: {8: 4, 9: 2, 23: 1, 29: 27, 30: 1}\n",
|
|
"📌 Actual Class Distribution: {22: 35}\n",
|
|
"\n",
|
|
"=== User 23 ===\n",
|
|
"✅ Accuracy: 77.50%\n",
|
|
"📊 Predicted Class Distribution: {3: 9, 23: 31}\n",
|
|
"📌 Actual Class Distribution: {23: 40}\n",
|
|
"\n",
|
|
"=== User 24 ===\n",
|
|
"✅ Accuracy: 92.50%\n",
|
|
"📊 Predicted Class Distribution: {21: 3, 24: 37}\n",
|
|
"📌 Actual Class Distribution: {24: 40}\n",
|
|
"\n",
|
|
"=== User 25 ===\n",
|
|
"✅ Accuracy: 2.50%\n",
|
|
"📊 Predicted Class Distribution: {2: 14, 12: 11, 23: 1, 25: 1, 29: 4, 30: 9}\n",
|
|
"📌 Actual Class Distribution: {25: 40}\n",
|
|
"\n",
|
|
"=== User 26 ===\n",
|
|
"✅ Accuracy: 0.00%\n",
|
|
"📊 Predicted Class Distribution: {12: 18, 18: 3, 21: 13, 24: 6}\n",
|
|
"📌 Actual Class Distribution: {26: 40}\n",
|
|
"\n",
|
|
"=== User 27 ===\n",
|
|
"✅ Accuracy: 0.00%\n",
|
|
"📊 Predicted Class Distribution: {12: 38, 21: 1, 24: 1}\n",
|
|
"📌 Actual Class Distribution: {27: 40}\n",
|
|
"\n",
|
|
"=== User 28 ===\n",
|
|
"✅ Accuracy: 100.00%\n",
|
|
"📊 Predicted Class Distribution: {28: 40}\n",
|
|
"📌 Actual Class Distribution: {28: 40}\n",
|
|
"\n",
|
|
"=== User 29 ===\n",
|
|
"✅ Accuracy: 40.00%\n",
|
|
"📊 Predicted Class Distribution: {12: 12, 26: 1, 29: 16, 30: 11}\n",
|
|
"📌 Actual Class Distribution: {29: 40}\n",
|
|
"\n",
|
|
"=== User 30 ===\n",
|
|
"✅ Accuracy: 35.00%\n",
|
|
"📊 Predicted Class Distribution: {12: 1, 18: 9, 23: 5, 25: 3, 26: 3, 29: 2, 30: 14, 31: 3}\n",
|
|
"📌 Actual Class Distribution: {30: 40}\n",
|
|
"\n",
|
|
"=== User 31 ===\n",
|
|
"✅ Accuracy: 50.00%\n",
|
|
"📊 Predicted Class Distribution: {12: 2, 18: 18, 31: 20}\n",
|
|
"📌 Actual Class Distribution: {31: 40}\n",
|
|
"\n",
|
|
"🟩 Final Evaluation Summary for Sequence Length 20:\n",
|
|
"Users with >50% Accuracy: 17 / 32\n",
|
|
"✅ Final Success Rate: 53.12%\n",
|
|
"\n",
|
|
"🔍 Testing Model for Sequence Length: 25\n",
|
|
"\n",
|
|
"🧪 Evaluating on Test Data...\n",
|
|
"\n",
|
|
"=== User 0 ===\n",
|
|
"✅ Accuracy: 17.14%\n",
|
|
"📊 Predicted Class Distribution: {0: 6, 18: 2, 24: 3, 25: 2, 26: 14, 30: 7, 31: 1}\n",
|
|
"📌 Actual Class Distribution: {0: 35}\n",
|
|
"\n",
|
|
"=== User 1 ===\n",
|
|
"✅ Accuracy: 8.57%\n",
|
|
"📊 Predicted Class Distribution: {1: 3, 31: 32}\n",
|
|
"📌 Actual Class Distribution: {1: 35}\n",
|
|
"\n",
|
|
"=== User 2 ===\n",
|
|
"✅ Accuracy: 5.71%\n",
|
|
"📊 Predicted Class Distribution: {2: 2, 12: 5, 17: 11, 21: 1, 30: 3, 31: 13}\n",
|
|
"📌 Actual Class Distribution: {2: 35}\n",
|
|
"\n",
|
|
"=== User 3 ===\n",
|
|
"✅ Accuracy: 14.71%\n",
|
|
"📊 Predicted Class Distribution: {3: 5, 12: 1, 29: 5, 30: 16, 31: 7}\n",
|
|
"📌 Actual Class Distribution: {3: 34}\n",
|
|
"\n",
|
|
"=== User 4 ===\n",
|
|
"✅ Accuracy: 0.00%\n",
|
|
"📊 Predicted Class Distribution: {2: 4, 9: 4, 10: 1, 25: 7, 26: 5, 27: 1, 30: 12, 31: 1}\n",
|
|
"📌 Actual Class Distribution: {4: 35}\n",
|
|
"\n",
|
|
"=== User 5 ===\n",
|
|
"✅ Accuracy: 100.00%\n",
|
|
"📊 Predicted Class Distribution: {5: 35}\n",
|
|
"📌 Actual Class Distribution: {5: 35}\n",
|
|
"\n",
|
|
"=== User 6 ===\n",
|
|
"✅ Accuracy: 31.43%\n",
|
|
"📊 Predicted Class Distribution: {6: 11, 31: 24}\n",
|
|
"📌 Actual Class Distribution: {6: 35}\n",
|
|
"\n",
|
|
"=== User 7 ===\n",
|
|
"✅ Accuracy: 65.71%\n",
|
|
"📊 Predicted Class Distribution: {7: 23, 10: 3, 13: 9}\n",
|
|
"📌 Actual Class Distribution: {7: 35}\n",
|
|
"\n",
|
|
"=== User 8 ===\n",
|
|
"✅ Accuracy: 82.86%\n",
|
|
"📊 Predicted Class Distribution: {4: 2, 8: 29, 22: 2, 30: 2}\n",
|
|
"📌 Actual Class Distribution: {8: 35}\n",
|
|
"\n",
|
|
"=== User 9 ===\n",
|
|
"✅ Accuracy: 97.14%\n",
|
|
"📊 Predicted Class Distribution: {4: 1, 9: 34}\n",
|
|
"📌 Actual Class Distribution: {9: 35}\n",
|
|
"\n",
|
|
"=== User 10 ===\n",
|
|
"✅ Accuracy: 40.00%\n",
|
|
"📊 Predicted Class Distribution: {10: 14, 13: 6, 23: 3, 25: 2, 30: 10}\n",
|
|
"📌 Actual Class Distribution: {10: 35}\n",
|
|
"\n",
|
|
"=== User 11 ===\n",
|
|
"✅ Accuracy: 31.43%\n",
|
|
"📊 Predicted Class Distribution: {10: 22, 11: 11, 12: 1, 19: 1}\n",
|
|
"📌 Actual Class Distribution: {11: 35}\n",
|
|
"\n",
|
|
"=== User 12 ===\n",
|
|
"✅ Accuracy: 57.14%\n",
|
|
"📊 Predicted Class Distribution: {12: 20, 29: 15}\n",
|
|
"📌 Actual Class Distribution: {12: 35}\n",
|
|
"\n",
|
|
"=== User 13 ===\n",
|
|
"✅ Accuracy: 57.14%\n",
|
|
"📊 Predicted Class Distribution: {12: 1, 13: 20, 21: 14}\n",
|
|
"📌 Actual Class Distribution: {13: 35}\n",
|
|
"\n",
|
|
"=== User 14 ===\n",
|
|
"✅ Accuracy: 62.86%\n",
|
|
"📊 Predicted Class Distribution: {0: 4, 14: 22, 15: 2, 18: 7}\n",
|
|
"📌 Actual Class Distribution: {14: 35}\n",
|
|
"\n",
|
|
"=== User 15 ===\n",
|
|
"✅ Accuracy: 100.00%\n",
|
|
"📊 Predicted Class Distribution: {15: 35}\n",
|
|
"📌 Actual Class Distribution: {15: 35}\n",
|
|
"\n",
|
|
"=== User 16 ===\n",
|
|
"✅ Accuracy: 40.00%\n",
|
|
"📊 Predicted Class Distribution: {7: 2, 15: 13, 16: 14, 18: 6}\n",
|
|
"📌 Actual Class Distribution: {16: 35}\n",
|
|
"\n",
|
|
"=== User 17 ===\n",
|
|
"✅ Accuracy: 65.71%\n",
|
|
"📊 Predicted Class Distribution: {0: 1, 16: 11, 17: 23}\n",
|
|
"📌 Actual Class Distribution: {17: 35}\n",
|
|
"\n",
|
|
"=== User 18 ===\n",
|
|
"✅ Accuracy: 82.86%\n",
|
|
"📊 Predicted Class Distribution: {0: 6, 18: 29}\n",
|
|
"📌 Actual Class Distribution: {18: 35}\n",
|
|
"\n",
|
|
"=== User 19 ===\n",
|
|
"✅ Accuracy: 60.00%\n",
|
|
"📊 Predicted Class Distribution: {6: 13, 19: 21, 22: 1}\n",
|
|
"📌 Actual Class Distribution: {19: 35}\n",
|
|
"\n",
|
|
"=== User 20 ===\n",
|
|
"✅ Accuracy: 5.71%\n",
|
|
"📊 Predicted Class Distribution: {2: 33, 20: 2}\n",
|
|
"📌 Actual Class Distribution: {20: 35}\n",
|
|
"\n",
|
|
"=== User 21 ===\n",
|
|
"✅ Accuracy: 100.00%\n",
|
|
"📊 Predicted Class Distribution: {21: 35}\n",
|
|
"📌 Actual Class Distribution: {21: 35}\n",
|
|
"\n",
|
|
"=== User 22 ===\n",
|
|
"✅ Accuracy: 0.00%\n",
|
|
"📊 Predicted Class Distribution: {8: 2, 9: 2, 29: 26}\n",
|
|
"📌 Actual Class Distribution: {22: 30}\n",
|
|
"\n",
|
|
"=== User 23 ===\n",
|
|
"✅ Accuracy: 65.71%\n",
|
|
"📊 Predicted Class Distribution: {3: 4, 23: 23, 30: 8}\n",
|
|
"📌 Actual Class Distribution: {23: 35}\n",
|
|
"\n",
|
|
"=== User 24 ===\n",
|
|
"✅ Accuracy: 100.00%\n",
|
|
"📊 Predicted Class Distribution: {24: 35}\n",
|
|
"📌 Actual Class Distribution: {24: 35}\n",
|
|
"\n",
|
|
"=== User 25 ===\n",
|
|
"✅ Accuracy: 0.00%\n",
|
|
"📊 Predicted Class Distribution: {2: 33, 12: 1, 30: 1}\n",
|
|
"📌 Actual Class Distribution: {25: 35}\n",
|
|
"\n",
|
|
"=== User 26 ===\n",
|
|
"✅ Accuracy: 0.00%\n",
|
|
"📊 Predicted Class Distribution: {12: 29, 21: 6}\n",
|
|
"📌 Actual Class Distribution: {26: 35}\n",
|
|
"\n",
|
|
"=== User 27 ===\n",
|
|
"✅ Accuracy: 0.00%\n",
|
|
"📊 Predicted Class Distribution: {12: 35}\n",
|
|
"📌 Actual Class Distribution: {27: 35}\n",
|
|
"\n",
|
|
"=== User 28 ===\n",
|
|
"✅ Accuracy: 100.00%\n",
|
|
"📊 Predicted Class Distribution: {28: 35}\n",
|
|
"📌 Actual Class Distribution: {28: 35}\n",
|
|
"\n",
|
|
"=== User 29 ===\n",
|
|
"✅ Accuracy: 28.57%\n",
|
|
"📊 Predicted Class Distribution: {2: 1, 12: 2, 26: 8, 29: 10, 30: 14}\n",
|
|
"📌 Actual Class Distribution: {29: 35}\n",
|
|
"\n",
|
|
"=== User 30 ===\n",
|
|
"✅ Accuracy: 34.29%\n",
|
|
"📊 Predicted Class Distribution: {2: 4, 26: 2, 27: 4, 29: 13, 30: 12}\n",
|
|
"📌 Actual Class Distribution: {30: 35}\n",
|
|
"\n",
|
|
"=== User 31 ===\n",
|
|
"✅ Accuracy: 60.00%\n",
|
|
"📊 Predicted Class Distribution: {12: 1, 16: 1, 18: 12, 31: 21}\n",
|
|
"📌 Actual Class Distribution: {31: 35}\n",
|
|
"\n",
|
|
"🟩 Final Evaluation Summary for Sequence Length 25:\n",
|
|
"Users with >50% Accuracy: 16 / 32\n",
|
|
"✅ Final Success Rate: 50.00%\n",
|
|
"\n",
|
|
"✅ All evaluations completed. Results saved to: /kaggle/working/evaluation_results.xlsx\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"from pandas import ExcelWriter\n",
|
|
"\n",
|
|
"# === Run evaluation for each trained sequence length ===\n",
|
|
"test_scenario = get_user_input_for_test()\n",
|
|
"test_data = filter_test_data(df, test_scenario)\n",
|
|
"\n",
|
|
"output_excel_path = \"/kaggle/working/evaluation_results.xlsx\"\n",
|
|
"\n",
|
|
"with ExcelWriter(output_excel_path) as writer:\n",
|
|
" for sequence_length, result in best_models.items():\n",
|
|
" print(f\"\\n🔍 Testing Model for Sequence Length: {sequence_length}\")\n",
|
|
" evaluate_model_on_test_data(\n",
|
|
" result['model'],\n",
|
|
" test_data.copy(),\n",
|
|
" sequence_length,\n",
|
|
" writer # 👈 pass the writer\n",
|
|
" )\n",
|
|
"\n",
|
|
"print(f\"\\n✅ All evaluations completed. Results saved to: {output_excel_path}\")\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"trusted": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"\n",
|
|
"\n",
|
|
"\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"trusted": true
|
|
},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"trusted": true
|
|
},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"trusted": true
|
|
},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"trusted": true
|
|
},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"trusted": true
|
|
},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"trusted": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"\n",
|
|
"# # === Evaluation function (your version) ===\n",
|
|
"# def evaluate_model_on_test_data(model, test_df, sequence_length):\n",
|
|
"# print(\"\\n🧪 Evaluating on Test Data...\")\n",
|
|
"# test_df = test_df.drop(columns=['Month', 'Year', 'date', 'DayOfWeek'])\n",
|
|
"# test_df = test_df.sort_values(by='user').reset_index(drop=True)\n",
|
|
"\n",
|
|
"# users = test_df['user'].unique()\n",
|
|
"# results = {}\n",
|
|
"# accuracy_above_50 = 0\n",
|
|
"\n",
|
|
"# for user in users:\n",
|
|
"# user_df = test_df[test_df['user'] == user]\n",
|
|
"# X, y_true = [], []\n",
|
|
"# user_features = user_df.drop(columns=['user']).values\n",
|
|
"# user_labels = user_df['user'].values\n",
|
|
"\n",
|
|
"# if len(user_df) <= sequence_length:\n",
|
|
"# print(f\"Skipping User {user} (not enough data for sequence length {sequence_length})\")\n",
|
|
"# continue\n",
|
|
"\n",
|
|
"# for i in range(len(user_df) - sequence_length):\n",
|
|
"# seq_x = user_features[i:i + sequence_length]\n",
|
|
"# seq_y = user_labels[i + sequence_length]\n",
|
|
"# X.append(seq_x)\n",
|
|
"# y_true.append(seq_y)\n",
|
|
"\n",
|
|
"# X = np.array(X)\n",
|
|
"# y_true = np.array(y_true)\n",
|
|
"\n",
|
|
"# if len(X) == 0:\n",
|
|
"# continue\n",
|
|
"\n",
|
|
"# y_pred = model.predict(X, verbose=0)\n",
|
|
"# y_pred_classes = np.argmax(y_pred, axis=1)\n",
|
|
"\n",
|
|
"# unique_pred, counts_pred = np.unique(y_pred_classes, return_counts=True)\n",
|
|
"# label_counts_pred = dict(zip(unique_pred, counts_pred))\n",
|
|
"\n",
|
|
"# unique_true, counts_true = np.unique(y_true, return_counts=True)\n",
|
|
"# label_counts_true = dict(zip(unique_true, counts_true))\n",
|
|
"\n",
|
|
"# acc = accuracy_score(y_true, y_pred_classes)\n",
|
|
"# if acc > 0.5:\n",
|
|
"# accuracy_above_50 += 1\n",
|
|
"\n",
|
|
"# results[user] = {\n",
|
|
"# 'accuracy': acc,\n",
|
|
"# 'predicted_counts': label_counts_pred,\n",
|
|
"# 'actual_counts': label_counts_true\n",
|
|
"# }\n",
|
|
"\n",
|
|
"# print(f\"\\n=== User {user} ===\")\n",
|
|
"# print(f\"✅ Accuracy: {acc * 100:.2f}%\")\n",
|
|
"# print(\"📊 Predicted Class Distribution:\", label_counts_pred)\n",
|
|
"# print(\"📌 Actual Class Distribution: \", label_counts_true)\n",
|
|
"\n",
|
|
"# final_accuracy_percent = (accuracy_above_50 / 32) * 100\n",
|
|
"# print(f\"\\n🟩 Final Evaluation Summary for Sequence Length {sequence_length}:\")\n",
|
|
"# print(f\"Users with >50% Accuracy: {accuracy_above_50} / 32\")\n",
|
|
"# print(f\"✅ Final Success Rate: {final_accuracy_percent:.2f}%\")\n",
|
|
"\n",
|
|
"# # === Run evaluation for each trained sequence length ===\n",
|
|
"# test_scenario = get_user_input_for_test()\n",
|
|
"# test_data = filter_test_data(df, test_scenario)\n",
|
|
"\n",
|
|
"# for sequence_length, result in best_models.items():\n",
|
|
"# print(f\"\\n🔍 Testing Model for Sequence Length: {sequence_length}\")\n",
|
|
"# evaluate_model_on_test_data(result['model'], test_data.copy(), sequence_length)\n",
|
|
"\n",
|
|
"# print(\"\\n✅ All evaluations completed.\")\n"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kaggle": {
|
|
"accelerator": "nvidiaTeslaT4",
|
|
"dataSources": [
|
|
{
|
|
"datasetId": 5775075,
|
|
"sourceId": 9494285,
|
|
"sourceType": "datasetVersion"
|
|
}
|
|
],
|
|
"dockerImageVersionId": 31011,
|
|
"isGpuEnabled": true,
|
|
"isInternetEnabled": true,
|
|
"language": "python",
|
|
"sourceType": "notebook"
|
|
},
|
|
"kernelspec": {
|
|
"display_name": ".venv",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.10.18"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 4
|
|
}
|