Browse Source

Added method for manual testing of hyperparameters

master
Bianca Steffes 4 weeks ago
parent
commit
ac57fef0e5
  1. 46
      main.py
  2. 95
      pipeline.py

46
main.py

@ -13,7 +13,7 @@ from pipeline import (
prepare_user_data,
train_models,
evaluate_models,
prepare_data_for_model, model_type_gru, model_type_lstm, model_type_bilstm, train_models_v2
prepare_data_for_model, model_type_gru, model_type_lstm, model_type_bilstm, train_models_v2, train_one_model
)
year_str = 'Year'
@ -321,11 +321,53 @@ def test(model_type):
ignore_index=True)
print(results)
def manual_tuning(model_type):
# load dataset
sequence_length = 20
data_filename = 'ALL32USERS15MIN_WITHTHRESHOLD.xlsx'
timespan_id = min_timespan_str
threshold_id = with_threshold_str
file_path = os.path.join(dataset_path, data_filename)
df = load_dataset(file_path)
df = remove_covid_data(df)
tr, val, te = split_data_by_userdata_percentage(df, percentages=(80, 10, 10), sample=20)
tr = reduce_columns(tr, data_filename)
val = reduce_columns(val, data_filename)
te = reduce_columns(te, data_filename)
user_data_train = prepare_user_data(tr)
user_data_val = prepare_user_data(val)
# fit and evaluate model
# config
repeats = 5
n_batch = 4
n_epochs = 500
n_neurons = 1
history_list = list()
# run diagnostic tests
for i in range(repeats):
history = train_one_model(user_data_train, user_data_val, n_batch, n_epochs, n_neurons,
sequence_length=sequence_length,
model_type=model_type)
history_list.append(history)
for metric in ['p', 'r', 'f1']:
for history in history_list:
plt.plot(history['train_'+metric], color='blue')
plt.plot(history['test_'+metric], color='orange')
plt.savefig(figure_path+metric+'_epochs_diagnostic.png')
plt.clf()
print('Done')
if __name__ == "__main__":
# main_two_v1()
# visualise_results_v1()
test(model_type=model_type_gru)
#test(model_type=model_type_gru)
# main_two_v2(model_type=model_type_gru)
#visualise_results_v2()
manual_tuning(model_type=model_type_lstm)
print('Done')

95
pipeline.py

@ -4,14 +4,15 @@ import pandas as pd
import shutil
from keras import Input
from keras.src.metrics import F1Score, Precision, Recall, Accuracy
from pandas import ExcelWriter
from keras.src.losses import SparseCategoricalCrossentropy
from keras.src.metrics import F1Score, Precision, Recall, Accuracy, SparseCategoricalAccuracy
from pandas import ExcelWriter, DataFrame
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional,GRU
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from keras_tuner import RandomSearch
from sklearn.metrics import accuracy_score
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
epochs = 5#50
model_type_gru = 'GRU'
@ -61,18 +62,25 @@ def prepare_user_data(df):
users = df['user'].unique()
return {user: df[df['user'] == user] for user in users}
def make_sequences(data, sequence_length):
x, y = [], []
features = data.drop('user', axis=1).values
features = features.astype(int)
labels = data['user'].values
for i in range(len(features) - sequence_length):
x.append(features[i:i + sequence_length])
y.append(labels[i + sequence_length])
return x, y
def prepare_data_for_model(user_data, sequence_length):
X, y = [], []
x, y = [], []
for user, data in user_data.items():
features = data.drop('user', axis=1).values
features = features.astype(int)
labels = data['user'].values
for i in range(len(features) - sequence_length):
X.append(features[i:i + sequence_length])
y.append(labels[i + sequence_length])
X = np.array(X)
x_new, y_new = make_sequences(data, sequence_length)
x = x + x_new
y = y + y_new
x = np.array(x)
y = np.array(y)
return X,y
return x,y
# === Training & Validation ===
def train_models(user_data, user_data_val, sequence_lengths, tuner_dir="./working/tuner", model_type=model_type_lstm):
@ -197,6 +205,69 @@ def train_models_v2(user_data, user_data_val, sequence_length, model_type):
return tuner.get_best_models(num_models=1)[0]
def train_one_model(train_data, val_data, n_batch, n_epochs, n_neurons, sequence_length, model_type):
x, y = prepare_data_for_model(user_data=train_data, sequence_length=sequence_length)
n_features = x.shape[2]
users = list(train_data.keys())
# prepare model
def build_model():
model = Sequential()
model.add(Input(shape=(sequence_length, n_features), batch_size=n_batch))
# if model_type == model_type_bilstm:
# model.add(Bidirectional(units=units_hp))
if model_type == model_type_lstm:
model.add(LSTM(n_neurons))
# if model_type == model_type_gru:
# model.add(GRU(units=units_hp))
# TODO: add another dense layer
#model.add(Dense(256, activation='relu'))
# model.add(Dropout(hp.Float('dropout_rate', 0.1, 0.2, step=0.1)))
model.add(Dense(len(users), activation='softmax'))
model.compile(
optimizer=Adam(learning_rate=1e-5),
loss=SparseCategoricalCrossentropy(),
metrics=[SparseCategoricalAccuracy()],
)
return model
model = build_model()
# fit model
train_p, test_p, train_r, test_r, train_f1, test_f1 = list(), list(),list(), list(),list(), list()
for i in range(n_epochs):
model.fit(x, y, batch_size=n_batch, epochs=1, verbose=0, shuffle=False)
# evaluate model on train data
p, r, f1 = evaluate(model, train_data, sequence_length, n_batch)
train_p.append(p)
train_r.append(r)
train_f1.append(f1)
# evaluate model on test data
p, r, f1 = evaluate(model, val_data, sequence_length, n_batch)
test_p.append(p)
test_r.append(r)
test_f1.append(f1)
history = DataFrame()
history['train_p'], history['test_p'] = train_p, test_p
history['train_r'], history['test_r'] = train_r, test_r
history['train_f1'], history['test_f1'] = train_f1, test_f1
return history
def evaluate(model, df, sequence_length, batch_size):
x, y = prepare_data_for_model(user_data=df, sequence_length=sequence_length)
x = np.array(x)
y_true = np.array(y)
y_pred = model.predict(x, verbose=0, batch_size=batch_size)
y_pred_classes = np.argmax(y_pred, axis=1)
f1 = f1_score(y_true=y_true, y_pred=y_pred_classes, average='weighted')
p = precision_score(y_true=y_true, y_pred=y_pred_classes, average='weighted')
r = recall_score(y_true=y_true, y_pred=y_pred_classes, average='weighted')
return p, r, f1
# === Evaluation ===
def evaluate_models(best_models, df_test, sequence_lengths, output_excel_path, ALLUSERS32_15MIN_WITHOUTTHREHOLD):
print("\n🧪 Evaluating on Test Data...")

Loading…
Cancel
Save