Added Baselines + extended manual evaluation

2 months ago · 40b32c30d3
3 changed files with 73 additions and 17 deletions
--- a/.gitignore
+++ b/.gitignore
@ -142,3 +142,4 @@ cython_debug/
 working/tuner
 working
 figures
+baseline_results.json
--- a/main.py
+++ b/main.py
@ -4,7 +4,10 @@ import os
 import numpy as np
 import pandas as pd
 import sklearn
+from keras.src.regularizers import L1L2
 from matplotlib import pyplot as plt
+from pandas import DataFrame
+from sklearn.dummy import DummyClassifier

 from pipeline import (
    load_dataset,
@ -13,7 +16,8 @@ from pipeline import (
    prepare_user_data,
    train_models,
    evaluate_models,
-    prepare_data_for_model, model_type_gru, model_type_lstm, model_type_bilstm, train_models_v2, train_one_model
+    prepare_data_for_model, model_type_gru, model_type_lstm, model_type_bilstm, train_models_v2, train_one_model,
+    eval_metrics
 )

 year_str = 'Year'
@ -29,6 +33,7 @@ timespan_str = 'time used'
 hour_timespan_str = '1HR'
 min_timespan_str = '15MIN'
 sequence_length_str = 'sequence length'
+accuracy_str = 'accuracy'
 precision_str = 'precision'
 recall_str = 'recall'
 f1_string = 'f1 score'
@ -332,7 +337,7 @@ def manual_tuning(model_type):
    df = load_dataset(file_path)
    df = remove_covid_data(df)

-    tr, val, te = split_data_by_userdata_percentage(df, percentages=(80, 10, 10), sample=20)
+    tr, val, te = split_data_by_userdata_percentage(df, percentages=(80, 10, 10), sample=100)
    tr = reduce_columns(tr, data_filename)
    val = reduce_columns(val, data_filename)
    te = reduce_columns(te, data_filename)
@ -342,15 +347,18 @@ def manual_tuning(model_type):

    # fit and evaluate model
    # config
-    repeats = 5
-    n_batch = 4
+    repeats = 3
+    n_batch = 1024
    n_epochs = 500
-    n_neurons = 1
+    n_neurons = 16
+    l_rate = 1e-4
+    reg = L1L2(l1=0.0, l2=0.0)

    history_list = list()
    # run diagnostic tests
    for i in range(repeats):
-        history = train_one_model(user_data_train, user_data_val, n_batch, n_epochs, n_neurons,
+        history = train_one_model(user_data_train, user_data_val, n_batch, n_epochs,
+                                  n_neurons, l_rate, reg,
                                     sequence_length=sequence_length,
                                     model_type=model_type)
        history_list.append(history)
@ -358,11 +366,45 @@ def manual_tuning(model_type):
        for history in history_list:
            plt.plot(history['train_'+metric], color='blue')
            plt.plot(history['test_'+metric], color='orange')
-        plt.savefig(figure_path+metric+'_epochs_diagnostic.png')
+        plt.savefig(figure_path+metric+'_e'+str(n_epochs)+'_n'+str(n_neurons)+'_b'+
+                    str(n_batch)+'_l'+str(l_rate)+'_diagnostic.png')
        plt.clf()
    print('Done')


+
+def calculate_baselines():
+    file_combinations = [(hour_timespan_str, with_threshold_str,'ALL32USERS1HR_WITHTHRESHOLD.xlsx'),
+                         (min_timespan_str, with_threshold_str, 'ALL32USERS15MIN_WITHTHRESHOLD.xlsx'),
+                         (min_timespan_str, without_threshold_str, 'ALLUSERS32_15MIN_WITHOUTTHREHOLD.xlsx'),
+                         (hour_timespan_str, without_threshold_str, 'ALLUSERS_32_1HR_WITHOUT_THRESHOLD.xlsx'),
+                         ]
+    baseline_res = pd.DataFrame()
+    for timespan_id, threshold_id, filename in file_combinations:
+        file_path = os.path.join(dataset_path, filename)
+        df = load_dataset(file_path)
+        df = remove_covid_data(df)
+
+        _, _, te = split_data_by_userdata_percentage(df, percentages=(80, 10, 10), sample=20)
+        te = reduce_columns(te, filename)
+        user_data_te = prepare_user_data(te)
+        for sequence_length in range(5,30, 5):
+            x, y = prepare_data_for_model(user_data=user_data_te, sequence_length=sequence_length)
+
+            for strategy in ['most_frequent', 'stratified', 'uniform']:
+                cls = DummyClassifier(strategy=strategy)
+                cls.fit(x,y)
+                y_pred = cls.predict(x)
+                acc, p, r, f1 = eval_metrics(y_true=y, y_pred=y_pred)
+                baseline_res = pd.concat([baseline_res,
+                                          DataFrame({ 'strategy':[strategy], threshold_str:[threshold_id],
+                                                      timespan_str:[timespan_id], sequence_length_str:[sequence_length],
+                                              accuracy_str:[acc],precision_str:[p],recall_str:[r],
+                                                     f1_string:f1})], ignore_index=True)
+    baseline_res.to_json('baseline_results.json')
+    print('Done')
+
+
 if __name__ == "__main__":
    # main_two_v1()
    # visualise_results_v1()
@ -370,4 +412,5 @@ if __name__ == "__main__":
   # main_two_v2(model_type=model_type_gru)
    #visualise_results_v2()
    manual_tuning(model_type=model_type_lstm)
+    #calculate_baselines()
    print('Done')
--- a/pipeline.py
+++ b/pipeline.py
@ -1,3 +1,5 @@
+import random
+
 import keras_tuner
 import numpy as np
 import pandas as pd
@ -78,6 +80,8 @@ def prepare_data_for_model(user_data, sequence_length):
        x_new, y_new = make_sequences(data, sequence_length)
        x = x + x_new
        y = y + y_new
+    random.Random(17).shuffle(x)
+    random.Random(17).shuffle(y)
    x = np.array(x)
    y = np.array(y)
    return x,y
@ -205,7 +209,7 @@ def train_models_v2(user_data, user_data_val, sequence_length, model_type):
    return tuner.get_best_models(num_models=1)[0]


-def train_one_model(train_data, val_data, n_batch, n_epochs, n_neurons, sequence_length, model_type):
+def train_one_model(train_data, val_data, n_batch, n_epochs, n_neurons, l_rate, reg, sequence_length, model_type):
    x, y = prepare_data_for_model(user_data=train_data, sequence_length=sequence_length)
    n_features = x.shape[2]
    users = list(train_data.keys())
@ -213,7 +217,7 @@ def train_one_model(train_data, val_data, n_batch, n_epochs, n_neurons, sequence
    # prepare model
    def build_model():
        model = Sequential()
-        model.add(Input(shape=(sequence_length, n_features), batch_size=n_batch))
+        model.add(Input(shape=(sequence_length, n_features), batch_size=n_batch, bias_regularizer=reg))
 #        if model_type == model_type_bilstm:
 #           model.add(Bidirectional(units=units_hp))
        if model_type == model_type_lstm:
@ -225,7 +229,7 @@ def train_one_model(train_data, val_data, n_batch, n_epochs, n_neurons, sequence
       # model.add(Dropout(hp.Float('dropout_rate', 0.1, 0.2, step=0.1)))
        model.add(Dense(len(users), activation='softmax'))
        model.compile(
-            optimizer=Adam(learning_rate=1e-5),
+            optimizer=Adam(learning_rate=l_rate),
            loss=SparseCategoricalCrossentropy(),
            metrics=[SparseCategoricalAccuracy()],
        )
@ -234,21 +238,24 @@ def train_one_model(train_data, val_data, n_batch, n_epochs, n_neurons, sequence
    model = build_model()

    # fit model
-    train_p, test_p, train_r, test_r, train_f1, test_f1 = list(), list(),list(), list(),list(), list()
+    train_acc, test_acc, train_p, test_p, train_r, test_r, train_f1, test_f1 = list(), list(),list(), list(),list(), list(),list(), list()
    for i in range(n_epochs):
        model.fit(x, y, batch_size=n_batch, epochs=1, verbose=0, shuffle=False)
        # evaluate model on train data
-        p, r, f1 = evaluate(model, train_data, sequence_length, n_batch)
+        acc, p, r, f1 = evaluate(model, train_data, sequence_length, n_batch)
+        train_acc.append(acc)
        train_p.append(p)
        train_r.append(r)
        train_f1.append(f1)
        # evaluate model on test data
-        p, r, f1 = evaluate(model, val_data, sequence_length, n_batch)
+        acc, p, r, f1 = evaluate(model, val_data, sequence_length, n_batch)
+        test_acc.append(acc)
        test_p.append(p)
        test_r.append(r)
        test_f1.append(f1)

    history = DataFrame()
+    history['train_acc'], history['test_acc'] = train_acc, test_acc
    history['train_p'], history['test_p'] = train_p, test_p
    history['train_r'], history['test_r'] = train_r, test_r
    history['train_f1'], history['test_f1'] = train_f1, test_f1
@ -262,11 +269,16 @@ def evaluate(model, df, sequence_length, batch_size):

    y_pred = model.predict(x, verbose=0, batch_size=batch_size)
    y_pred_classes = np.argmax(y_pred, axis=1)
-    f1 = f1_score(y_true=y_true, y_pred=y_pred_classes, average='weighted')
-    p = precision_score(y_true=y_true, y_pred=y_pred_classes, average='weighted')
-    r = recall_score(y_true=y_true, y_pred=y_pred_classes, average='weighted')
-    return p, r, f1
+    return eval_metrics(y_true=y_true, y_pred=y_pred_classes)
+
+

+def eval_metrics(y_true, y_pred):
+    f1 = f1_score(y_true=y_true, y_pred=y_pred, average='weighted')
+    p = precision_score(y_true=y_true, y_pred=y_pred, average='weighted')
+    r = recall_score(y_true=y_true, y_pred=y_pred, average='weighted')
+    acc = accuracy_score(y_true=y_true, y_pred=y_pred)
+    return acc, p, r, f1

 # === Evaluation ===
 def evaluate_models(best_models, df_test, sequence_lengths, output_excel_path, ALLUSERS32_15MIN_WITHOUTTHREHOLD):