|
|
|
@ -4,7 +4,7 @@ import pandas as pd |
|
|
|
import shutil |
|
|
|
|
|
|
|
from keras import Input |
|
|
|
from keras.src.metrics import F1Score |
|
|
|
from keras.src.metrics import F1Score, Precision, Recall, Accuracy |
|
|
|
from pandas import ExcelWriter |
|
|
|
from tensorflow.keras.models import Sequential |
|
|
|
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional,GRU |
|
|
|
@ -13,7 +13,7 @@ from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping |
|
|
|
from keras_tuner import RandomSearch |
|
|
|
from sklearn.metrics import accuracy_score |
|
|
|
|
|
|
|
epochs = 50 |
|
|
|
epochs = 5#50 |
|
|
|
model_type_gru = 'GRU' |
|
|
|
model_type_lstm = 'LSTM' |
|
|
|
model_type_bilstm = 'BiLSTM' |
|
|
|
@ -57,14 +57,15 @@ def filter_test_data(df, scenario): |
|
|
|
return pd.concat(data_parts, ignore_index=True) |
|
|
|
|
|
|
|
def prepare_user_data(df): |
|
|
|
df_sorted = df.sort_values(by='user').reset_index(drop=True) |
|
|
|
users = df_sorted['user'].unique() |
|
|
|
return {user: df_sorted[df_sorted['user'] == user] for user in users} |
|
|
|
#df_sorted = df.sort_values(by='user').reset_index(drop=True) |
|
|
|
users = df['user'].unique() |
|
|
|
return {user: df[df['user'] == user] for user in users} |
|
|
|
|
|
|
|
def prepare_data_for_model(user_data, sequence_length): |
|
|
|
X, y = [], [] |
|
|
|
for user, data in user_data.items(): |
|
|
|
features = data.drop('user', axis=1).values |
|
|
|
features = features.astype(int) |
|
|
|
labels = data['user'].values |
|
|
|
for i in range(len(features) - sequence_length): |
|
|
|
X.append(features[i:i + sequence_length]) |
|
|
|
@ -144,9 +145,11 @@ def train_models(user_data, user_data_val, sequence_lengths, tuner_dir="./workin |
|
|
|
# === Training & Validation === |
|
|
|
def train_models_v2(user_data, user_data_val, sequence_length, model_type): |
|
|
|
tuner_dir = "./working/tuner/"+model_type |
|
|
|
#val_metric = 'val_f1' |
|
|
|
val_metric = 'val_precision' |
|
|
|
|
|
|
|
early_stopping = EarlyStopping(monitor='val_f1', patience=3, restore_best_weights=True) |
|
|
|
lr_scheduler = ReduceLROnPlateau(monitor='val_f1', factor=0.5, patience=2) |
|
|
|
early_stopping = EarlyStopping(monitor=val_metric, patience=3, restore_best_weights=True) |
|
|
|
lr_scheduler = ReduceLROnPlateau(monitor=val_metric, factor=0.5, patience=2) |
|
|
|
|
|
|
|
shutil.rmtree(tuner_dir, ignore_errors=True) |
|
|
|
|
|
|
|
@ -156,11 +159,12 @@ def train_models_v2(user_data, user_data_val, sequence_length, model_type): |
|
|
|
n_features = x.shape[2] |
|
|
|
users = list(user_data.keys()) |
|
|
|
|
|
|
|
y_val = np.array(y_val).reshape(-1, 1) |
|
|
|
y = np.array(y).reshape(-1, 1) |
|
|
|
#y_val = np.array(y_val).reshape(-1, 1) |
|
|
|
#y = np.array(y).reshape(-1, 1) |
|
|
|
|
|
|
|
def build_model(hp): |
|
|
|
units_hp = hp.Int('units', 2, 256, step=2, sampling="log") |
|
|
|
units_hp = hp.Int('units', 2, 8, step=2, sampling="log") |
|
|
|
# units_hp = hp.Int('units', 2, 256, step=2, sampling="log") |
|
|
|
|
|
|
|
model = Sequential() |
|
|
|
model.add(Input((sequence_length, n_features))) |
|
|
|
@ -170,18 +174,20 @@ def train_models_v2(user_data, user_data_val, sequence_length, model_type): |
|
|
|
model.add(LSTM(units=units_hp)) |
|
|
|
if model_type==model_type_gru: |
|
|
|
model.add(GRU(units=units_hp)) |
|
|
|
model.add(Dropout(hp.Float('dropout_rate', 0.1, 0.5, step=0.1))) |
|
|
|
model.add(Dropout(hp.Float('dropout_rate', 0.1, 0.2, step=0.1))) |
|
|
|
model.add(Dense(len(users), activation='softmax')) |
|
|
|
model.compile( |
|
|
|
optimizer=Adam(learning_rate=hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])), |
|
|
|
optimizer=Adam(learning_rate=hp.Choice('learning_rate', [1e-5])), |
|
|
|
loss='sparse_categorical_crossentropy', |
|
|
|
metrics=[F1Score(name='f1', average='weighted')] |
|
|
|
metrics=[#F1Score(name='f1', average='weighted'), |
|
|
|
Precision(), #Recall(), Accuracy() |
|
|
|
] |
|
|
|
) |
|
|
|
return model |
|
|
|
|
|
|
|
tuner = RandomSearch( |
|
|
|
build_model, |
|
|
|
objective=keras_tuner.Objective("val_f1", direction="max"), |
|
|
|
objective=keras_tuner.Objective(val_metric, direction="max"), |
|
|
|
max_trials=120, |
|
|
|
directory=tuner_dir, |
|
|
|
) |
|
|
|
|