|
|
|
@ -4,14 +4,15 @@ import pandas as pd |
|
|
|
import shutil |
|
|
|
|
|
|
|
from keras import Input |
|
|
|
from keras.src.metrics import F1Score, Precision, Recall, Accuracy |
|
|
|
from pandas import ExcelWriter |
|
|
|
from keras.src.losses import SparseCategoricalCrossentropy |
|
|
|
from keras.src.metrics import F1Score, Precision, Recall, Accuracy, SparseCategoricalAccuracy |
|
|
|
from pandas import ExcelWriter, DataFrame |
|
|
|
from tensorflow.keras.models import Sequential |
|
|
|
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional,GRU |
|
|
|
from tensorflow.keras.optimizers import Adam |
|
|
|
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping |
|
|
|
from keras_tuner import RandomSearch |
|
|
|
from sklearn.metrics import accuracy_score |
|
|
|
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score |
|
|
|
|
|
|
|
epochs = 5#50 |
|
|
|
model_type_gru = 'GRU' |
|
|
|
@ -61,18 +62,25 @@ def prepare_user_data(df): |
|
|
|
users = df['user'].unique() |
|
|
|
return {user: df[df['user'] == user] for user in users} |
|
|
|
|
|
|
|
def make_sequences(data, sequence_length): |
|
|
|
x, y = [], [] |
|
|
|
features = data.drop('user', axis=1).values |
|
|
|
features = features.astype(int) |
|
|
|
labels = data['user'].values |
|
|
|
for i in range(len(features) - sequence_length): |
|
|
|
x.append(features[i:i + sequence_length]) |
|
|
|
y.append(labels[i + sequence_length]) |
|
|
|
return x, y |
|
|
|
|
|
|
|
def prepare_data_for_model(user_data, sequence_length): |
|
|
|
X, y = [], [] |
|
|
|
x, y = [], [] |
|
|
|
for user, data in user_data.items(): |
|
|
|
features = data.drop('user', axis=1).values |
|
|
|
features = features.astype(int) |
|
|
|
labels = data['user'].values |
|
|
|
for i in range(len(features) - sequence_length): |
|
|
|
X.append(features[i:i + sequence_length]) |
|
|
|
y.append(labels[i + sequence_length]) |
|
|
|
X = np.array(X) |
|
|
|
x_new, y_new = make_sequences(data, sequence_length) |
|
|
|
x = x + x_new |
|
|
|
y = y + y_new |
|
|
|
x = np.array(x) |
|
|
|
y = np.array(y) |
|
|
|
return X,y |
|
|
|
return x,y |
|
|
|
|
|
|
|
# === Training & Validation === |
|
|
|
def train_models(user_data, user_data_val, sequence_lengths, tuner_dir="./working/tuner", model_type=model_type_lstm): |
|
|
|
@ -197,6 +205,69 @@ def train_models_v2(user_data, user_data_val, sequence_length, model_type): |
|
|
|
return tuner.get_best_models(num_models=1)[0] |
|
|
|
|
|
|
|
|
|
|
|
def train_one_model(train_data, val_data, n_batch, n_epochs, n_neurons, sequence_length, model_type): |
|
|
|
x, y = prepare_data_for_model(user_data=train_data, sequence_length=sequence_length) |
|
|
|
n_features = x.shape[2] |
|
|
|
users = list(train_data.keys()) |
|
|
|
|
|
|
|
# prepare model |
|
|
|
def build_model(): |
|
|
|
model = Sequential() |
|
|
|
model.add(Input(shape=(sequence_length, n_features), batch_size=n_batch)) |
|
|
|
# if model_type == model_type_bilstm: |
|
|
|
# model.add(Bidirectional(units=units_hp)) |
|
|
|
if model_type == model_type_lstm: |
|
|
|
model.add(LSTM(n_neurons)) |
|
|
|
# if model_type == model_type_gru: |
|
|
|
# model.add(GRU(units=units_hp)) |
|
|
|
# TODO: add another dense layer |
|
|
|
#model.add(Dense(256, activation='relu')) |
|
|
|
# model.add(Dropout(hp.Float('dropout_rate', 0.1, 0.2, step=0.1))) |
|
|
|
model.add(Dense(len(users), activation='softmax')) |
|
|
|
model.compile( |
|
|
|
optimizer=Adam(learning_rate=1e-5), |
|
|
|
loss=SparseCategoricalCrossentropy(), |
|
|
|
metrics=[SparseCategoricalAccuracy()], |
|
|
|
) |
|
|
|
return model |
|
|
|
|
|
|
|
model = build_model() |
|
|
|
|
|
|
|
# fit model |
|
|
|
train_p, test_p, train_r, test_r, train_f1, test_f1 = list(), list(),list(), list(),list(), list() |
|
|
|
for i in range(n_epochs): |
|
|
|
model.fit(x, y, batch_size=n_batch, epochs=1, verbose=0, shuffle=False) |
|
|
|
# evaluate model on train data |
|
|
|
p, r, f1 = evaluate(model, train_data, sequence_length, n_batch) |
|
|
|
train_p.append(p) |
|
|
|
train_r.append(r) |
|
|
|
train_f1.append(f1) |
|
|
|
# evaluate model on test data |
|
|
|
p, r, f1 = evaluate(model, val_data, sequence_length, n_batch) |
|
|
|
test_p.append(p) |
|
|
|
test_r.append(r) |
|
|
|
test_f1.append(f1) |
|
|
|
|
|
|
|
history = DataFrame() |
|
|
|
history['train_p'], history['test_p'] = train_p, test_p |
|
|
|
history['train_r'], history['test_r'] = train_r, test_r |
|
|
|
history['train_f1'], history['test_f1'] = train_f1, test_f1 |
|
|
|
return history |
|
|
|
|
|
|
|
|
|
|
|
def evaluate(model, df, sequence_length, batch_size): |
|
|
|
x, y = prepare_data_for_model(user_data=df, sequence_length=sequence_length) |
|
|
|
x = np.array(x) |
|
|
|
y_true = np.array(y) |
|
|
|
|
|
|
|
y_pred = model.predict(x, verbose=0, batch_size=batch_size) |
|
|
|
y_pred_classes = np.argmax(y_pred, axis=1) |
|
|
|
f1 = f1_score(y_true=y_true, y_pred=y_pred_classes, average='weighted') |
|
|
|
p = precision_score(y_true=y_true, y_pred=y_pred_classes, average='weighted') |
|
|
|
r = recall_score(y_true=y_true, y_pred=y_pred_classes, average='weighted') |
|
|
|
return p, r, f1 |
|
|
|
|
|
|
|
|
|
|
|
# === Evaluation === |
|
|
|
def evaluate_models(best_models, df_test, sequence_lengths, output_excel_path, ALLUSERS32_15MIN_WITHOUTTHREHOLD): |
|
|
|
print("\n🧪 Evaluating on Test Data...") |
|
|
|
|