From 40b32c30d365c4e51aa09388c683fa0af20ae402 Mon Sep 17 00:00:00 2001 From: Bianca Steffes Date: Tue, 18 Nov 2025 13:11:48 +0100 Subject: [PATCH] Added Baselines + extended manual evaluation --- .gitignore | 1 + main.py | 57 ++++++++++++++++++++++++++++++++++++++++++++++------- pipeline.py | 32 ++++++++++++++++++++---------- 3 files changed, 73 insertions(+), 17 deletions(-) diff --git a/.gitignore b/.gitignore index fca8e1c..5997bd5 100644 --- a/.gitignore +++ b/.gitignore @@ -142,3 +142,4 @@ cython_debug/ working/tuner working figures +baseline_results.json diff --git a/main.py b/main.py index a71702d..e4f23b1 100644 --- a/main.py +++ b/main.py @@ -4,7 +4,10 @@ import os import numpy as np import pandas as pd import sklearn +from keras.src.regularizers import L1L2 from matplotlib import pyplot as plt +from pandas import DataFrame +from sklearn.dummy import DummyClassifier from pipeline import ( load_dataset, @@ -13,7 +16,8 @@ from pipeline import ( prepare_user_data, train_models, evaluate_models, - prepare_data_for_model, model_type_gru, model_type_lstm, model_type_bilstm, train_models_v2, train_one_model + prepare_data_for_model, model_type_gru, model_type_lstm, model_type_bilstm, train_models_v2, train_one_model, + eval_metrics ) year_str = 'Year' @@ -29,6 +33,7 @@ timespan_str = 'time used' hour_timespan_str = '1HR' min_timespan_str = '15MIN' sequence_length_str = 'sequence length' +accuracy_str = 'accuracy' precision_str = 'precision' recall_str = 'recall' f1_string = 'f1 score' @@ -332,7 +337,7 @@ def manual_tuning(model_type): df = load_dataset(file_path) df = remove_covid_data(df) - tr, val, te = split_data_by_userdata_percentage(df, percentages=(80, 10, 10), sample=20) + tr, val, te = split_data_by_userdata_percentage(df, percentages=(80, 10, 10), sample=100) tr = reduce_columns(tr, data_filename) val = reduce_columns(val, data_filename) te = reduce_columns(te, data_filename) @@ -342,15 +347,18 @@ def manual_tuning(model_type): # fit and evaluate model # config - repeats = 5 - n_batch = 4 + repeats = 3 + n_batch = 1024 n_epochs = 500 - n_neurons = 1 + n_neurons = 16 + l_rate = 1e-4 + reg = L1L2(l1=0.0, l2=0.0) history_list = list() # run diagnostic tests for i in range(repeats): - history = train_one_model(user_data_train, user_data_val, n_batch, n_epochs, n_neurons, + history = train_one_model(user_data_train, user_data_val, n_batch, n_epochs, + n_neurons, l_rate, reg, sequence_length=sequence_length, model_type=model_type) history_list.append(history) @@ -358,11 +366,45 @@ def manual_tuning(model_type): for history in history_list: plt.plot(history['train_'+metric], color='blue') plt.plot(history['test_'+metric], color='orange') - plt.savefig(figure_path+metric+'_epochs_diagnostic.png') + plt.savefig(figure_path+metric+'_e'+str(n_epochs)+'_n'+str(n_neurons)+'_b'+ + str(n_batch)+'_l'+str(l_rate)+'_diagnostic.png') plt.clf() print('Done') + +def calculate_baselines(): + file_combinations = [(hour_timespan_str, with_threshold_str,'ALL32USERS1HR_WITHTHRESHOLD.xlsx'), + (min_timespan_str, with_threshold_str, 'ALL32USERS15MIN_WITHTHRESHOLD.xlsx'), + (min_timespan_str, without_threshold_str, 'ALLUSERS32_15MIN_WITHOUTTHREHOLD.xlsx'), + (hour_timespan_str, without_threshold_str, 'ALLUSERS_32_1HR_WITHOUT_THRESHOLD.xlsx'), + ] + baseline_res = pd.DataFrame() + for timespan_id, threshold_id, filename in file_combinations: + file_path = os.path.join(dataset_path, filename) + df = load_dataset(file_path) + df = remove_covid_data(df) + + _, _, te = split_data_by_userdata_percentage(df, percentages=(80, 10, 10), sample=20) + te = reduce_columns(te, filename) + user_data_te = prepare_user_data(te) + for sequence_length in range(5,30, 5): + x, y = prepare_data_for_model(user_data=user_data_te, sequence_length=sequence_length) + + for strategy in ['most_frequent', 'stratified', 'uniform']: + cls = DummyClassifier(strategy=strategy) + cls.fit(x,y) + y_pred = cls.predict(x) + acc, p, r, f1 = eval_metrics(y_true=y, y_pred=y_pred) + baseline_res = pd.concat([baseline_res, + DataFrame({ 'strategy':[strategy], threshold_str:[threshold_id], + timespan_str:[timespan_id], sequence_length_str:[sequence_length], + accuracy_str:[acc],precision_str:[p],recall_str:[r], + f1_string:f1})], ignore_index=True) + baseline_res.to_json('baseline_results.json') + print('Done') + + if __name__ == "__main__": # main_two_v1() # visualise_results_v1() @@ -370,4 +412,5 @@ if __name__ == "__main__": # main_two_v2(model_type=model_type_gru) #visualise_results_v2() manual_tuning(model_type=model_type_lstm) + #calculate_baselines() print('Done') diff --git a/pipeline.py b/pipeline.py index 38c4356..1e875ad 100644 --- a/pipeline.py +++ b/pipeline.py @@ -1,3 +1,5 @@ +import random + import keras_tuner import numpy as np import pandas as pd @@ -78,6 +80,8 @@ def prepare_data_for_model(user_data, sequence_length): x_new, y_new = make_sequences(data, sequence_length) x = x + x_new y = y + y_new + random.Random(17).shuffle(x) + random.Random(17).shuffle(y) x = np.array(x) y = np.array(y) return x,y @@ -205,7 +209,7 @@ def train_models_v2(user_data, user_data_val, sequence_length, model_type): return tuner.get_best_models(num_models=1)[0] -def train_one_model(train_data, val_data, n_batch, n_epochs, n_neurons, sequence_length, model_type): +def train_one_model(train_data, val_data, n_batch, n_epochs, n_neurons, l_rate, reg, sequence_length, model_type): x, y = prepare_data_for_model(user_data=train_data, sequence_length=sequence_length) n_features = x.shape[2] users = list(train_data.keys()) @@ -213,7 +217,7 @@ def train_one_model(train_data, val_data, n_batch, n_epochs, n_neurons, sequence # prepare model def build_model(): model = Sequential() - model.add(Input(shape=(sequence_length, n_features), batch_size=n_batch)) + model.add(Input(shape=(sequence_length, n_features), batch_size=n_batch, bias_regularizer=reg)) # if model_type == model_type_bilstm: # model.add(Bidirectional(units=units_hp)) if model_type == model_type_lstm: @@ -225,7 +229,7 @@ def train_one_model(train_data, val_data, n_batch, n_epochs, n_neurons, sequence # model.add(Dropout(hp.Float('dropout_rate', 0.1, 0.2, step=0.1))) model.add(Dense(len(users), activation='softmax')) model.compile( - optimizer=Adam(learning_rate=1e-5), + optimizer=Adam(learning_rate=l_rate), loss=SparseCategoricalCrossentropy(), metrics=[SparseCategoricalAccuracy()], ) @@ -234,21 +238,24 @@ def train_one_model(train_data, val_data, n_batch, n_epochs, n_neurons, sequence model = build_model() # fit model - train_p, test_p, train_r, test_r, train_f1, test_f1 = list(), list(),list(), list(),list(), list() + train_acc, test_acc, train_p, test_p, train_r, test_r, train_f1, test_f1 = list(), list(),list(), list(),list(), list(),list(), list() for i in range(n_epochs): model.fit(x, y, batch_size=n_batch, epochs=1, verbose=0, shuffle=False) # evaluate model on train data - p, r, f1 = evaluate(model, train_data, sequence_length, n_batch) + acc, p, r, f1 = evaluate(model, train_data, sequence_length, n_batch) + train_acc.append(acc) train_p.append(p) train_r.append(r) train_f1.append(f1) # evaluate model on test data - p, r, f1 = evaluate(model, val_data, sequence_length, n_batch) + acc, p, r, f1 = evaluate(model, val_data, sequence_length, n_batch) + test_acc.append(acc) test_p.append(p) test_r.append(r) test_f1.append(f1) history = DataFrame() + history['train_acc'], history['test_acc'] = train_acc, test_acc history['train_p'], history['test_p'] = train_p, test_p history['train_r'], history['test_r'] = train_r, test_r history['train_f1'], history['test_f1'] = train_f1, test_f1 @@ -262,11 +269,16 @@ def evaluate(model, df, sequence_length, batch_size): y_pred = model.predict(x, verbose=0, batch_size=batch_size) y_pred_classes = np.argmax(y_pred, axis=1) - f1 = f1_score(y_true=y_true, y_pred=y_pred_classes, average='weighted') - p = precision_score(y_true=y_true, y_pred=y_pred_classes, average='weighted') - r = recall_score(y_true=y_true, y_pred=y_pred_classes, average='weighted') - return p, r, f1 + return eval_metrics(y_true=y_true, y_pred=y_pred_classes) + + +def eval_metrics(y_true, y_pred): + f1 = f1_score(y_true=y_true, y_pred=y_pred, average='weighted') + p = precision_score(y_true=y_true, y_pred=y_pred, average='weighted') + r = recall_score(y_true=y_true, y_pred=y_pred, average='weighted') + acc = accuracy_score(y_true=y_true, y_pred=y_pred) + return acc, p, r, f1 # === Evaluation === def evaluate_models(best_models, df_test, sequence_lengths, output_excel_path, ALLUSERS32_15MIN_WITHOUTTHREHOLD):