From a3efef815352509c57fc11a185701e5494e83fa1 Mon Sep 17 00:00:00 2001
From: Bianca Steffes <bianca.steffes@uni-saarland.de>
Date: Wed, 17 Sep 2025 11:39:38 +0200
Subject: [PATCH] Updated v2 to use f1 score and dont retrain model

---
 main.py     | 33 ++++++++++++++++++++++++++++++---
 pipeline.py | 39 ++++++++++++++++++---------------------
 2 files changed, 48 insertions(+), 24 deletions(-)

diff --git a/main.py b/main.py
index ee2607c..a5579bc 100644
--- a/main.py
+++ b/main.py
@@ -126,7 +126,7 @@ def load_previous_results(filename):
     return results
 
 def main_two_v2(model_type):
-    seq_length = range(20,31, 10)
+    seq_length = range(10,31, 5)
     for sequence_length in seq_length:
         for data_filename in os.listdir(dataset_path):
             timespan_id = hour_timespan_str
@@ -156,13 +156,13 @@ def main_two_v2(model_type):
             user_data_train = prepare_user_data(tr)
             user_data_val = prepare_user_data(val)
 
-            best_models = train_models_v2(user_data_train, user_data_val,
+            best_model = train_models_v2(user_data_train, user_data_val,
                                           sequence_length=sequence_length,
                                           model_type=model_type)
 
             results = load_previous_results(result_filename_v2)
             results = pd.concat([results,
-                                 evaluate_model_on_test_data(model=best_models[sequence_length]['model'],
+                                 evaluate_model_on_test_data(model=best_model,
                                                              test_df=te,
                                                              sequence_length=sequence_length,
                                                              time_span_id=timespan_id,
@@ -259,8 +259,35 @@ def visualise_results_v1():
     # Fazit: keine eindeutig besseren Versionen erkennbar
 
 
+def visualise_results_v2():
+    results = pd.DataFrame(json.load(open(result_filename_v2)))
+    with_threshold = results[results[threshold_str] == with_threshold_str]
+    without_threshold = results[results[threshold_str] == without_threshold_str]
+    fig, axes = plt.subplots(2, 3)
+    ax_col_id = 0
+    ax_row_id = -1
+    for timespan in [hour_timespan_str,min_timespan_str]:
+        ax_row_id +=1
+        for model in [model_type_lstm, model_type_bilstm, model_type_gru]:
+            with_sub = with_threshold[(with_threshold[timespan_str] == timespan) & (with_threshold[model_type_str] == model)]
+            without_sub = without_threshold[(without_threshold[timespan_str] == timespan) & (without_threshold[model_type_str] == model)]
+            with_sub = with_sub.sort_values(sequence_length_str)
+            without_sub = without_sub.sort_values(sequence_length_str)
+            ax = axes[ax_row_id, ax_col_id]
+            ax.set_title(model+' '+timespan)
+            ax.plot(with_sub[sequence_length_str], with_sub[f1_string], label=with_threshold_str)
+            ax.plot(without_sub[sequence_length_str], without_sub[f1_string], label=without_threshold_str)
+            ax.legend()
+            ax_col_id +=1
+            ax_col_id %= 3
+    fig.tight_layout()
+    fig.savefig(figure_path+'v2_results.svg')
+    # Fazit: keine eindeutig besseren Versionen erkennbar
+
+
 if __name__ == "__main__":
     # main_two_v1()
     # visualise_results_v1()
     main_two_v2(model_type=model_type_gru)
+    #visualise_results_v2()
     print('Done')
diff --git a/pipeline.py b/pipeline.py
index f205a88..f2997c5 100644
--- a/pipeline.py
+++ b/pipeline.py
@@ -2,11 +2,10 @@ import keras_tuner
 import numpy as np
 import pandas as pd
 import shutil
-import os
 
+from keras import Input
 from keras.src.metrics import F1Score
 from pandas import ExcelWriter
-import keras_tuner as kt
 from tensorflow.keras.models import Sequential
 from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional,GRU
 from tensorflow.keras.optimizers import Adam
@@ -14,7 +13,7 @@ from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
 from keras_tuner import RandomSearch
 from sklearn.metrics import accuracy_score
 
-epochs = 30
+epochs = 50
 model_type_gru = 'GRU'
 model_type_lstm = 'LSTM'
 model_type_bilstm = 'BiLSTM'
@@ -144,10 +143,10 @@ def train_models(user_data, user_data_val, sequence_lengths, tuner_dir="./workin
 
 # === Training & Validation ===
 def train_models_v2(user_data, user_data_val, sequence_length, model_type):
-    tuner_dir = "./working/tuner"
+    tuner_dir = "./working/tuner/"+model_type
 
-    early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
-    lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2)
+    early_stopping = EarlyStopping(monitor='val_f1', patience=3, restore_best_weights=True)
+    lr_scheduler = ReduceLROnPlateau(monitor='val_f1', factor=0.5, patience=2)
 
     shutil.rmtree(tuner_dir, ignore_errors=True)
 
@@ -157,41 +156,39 @@ def train_models_v2(user_data, user_data_val, sequence_length, model_type):
     n_features = x.shape[2]
     users = list(user_data.keys())
 
+    y_val = np.array(y_val).reshape(-1, 1)
+    y = np.array(y).reshape(-1, 1)
+
     def build_model(hp):
+        units_hp = hp.Int('units', 2, 256, step=2, sampling="log")
+
         model = Sequential()
+        model.add(Input((sequence_length, n_features)))
         if model_type==model_type_bilstm:
-            model.add(Bidirectional(LSTM(units=hp.Int('units', 32, 256, step=2),
-                                         input_shape=(sequence_length, n_features))))
+            model.add(Bidirectional(LSTM(units=units_hp)))
         if model_type==model_type_lstm:
-            model.add(LSTM(units=hp.Int('units', 32, 256, step=2),
-                                         input_shape=(sequence_length, n_features)))
+            model.add(LSTM(units=units_hp))
         if model_type==model_type_gru:
-            model.add(GRU(units=hp.Int('units', 32, 256, step=2),
-                                         input_shape=(sequence_length, n_features)))
+            model.add(GRU(units=units_hp))
         model.add(Dropout(hp.Float('dropout_rate', 0.1, 0.5, step=0.1)))
         model.add(Dense(len(users), activation='softmax'))
         model.compile(
             optimizer=Adam(learning_rate=hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])),
             loss='sparse_categorical_crossentropy',
-            metrics=['accuracy']
+            metrics=[F1Score(name='f1', average='weighted')]
         )
         return model
 
     tuner = RandomSearch(
         build_model,
-        objective='val_loss',
-        max_trials=100,
+        objective=keras_tuner.Objective("val_f1", direction="max"),
+        max_trials=120,
         directory=tuner_dir,
     )
 
     tuner.search(x, y, epochs=epochs, validation_data=(x_val, y_val),
                  callbacks=[early_stopping, lr_scheduler])
-
-    best_hps = tuner.get_best_hyperparameters(1)[0]
-    best_model = tuner.hypermodel.build(best_hps)
-    best_model.fit(x, y, epochs=epochs, validation_data=(x_val, y_val),
-                   callbacks=[early_stopping, lr_scheduler])
-    return best_model
+    return tuner.get_best_models(num_models=1)[0]
 
 
 # === Evaluation ===