|
|
|
@ -408,8 +408,8 @@ def upsampling(df): |
|
|
|
|
|
|
|
|
|
|
|
def manual_tuning_v3(model_type): |
|
|
|
# TODO: hrs/min + different sequence lengths |
|
|
|
sequence_length = 7 |
|
|
|
# TODO: hrs/min |
|
|
|
sequence_length = 1 |
|
|
|
|
|
|
|
tr, val, te = get_prepared_data_v3(dataset_hrs_path) |
|
|
|
|
|
|
|
@ -417,7 +417,7 @@ def manual_tuning_v3(model_type): |
|
|
|
# config |
|
|
|
repeats = 3 |
|
|
|
n_batch = 1024 |
|
|
|
n_epochs = 200 |
|
|
|
n_epochs = 10 |
|
|
|
n_neurons = 256 |
|
|
|
n_neurons2 = 512 |
|
|
|
n_neurons3 = 512 |
|
|
|
@ -483,9 +483,9 @@ def get_prepared_data_v3(filename, sample=100): |
|
|
|
df = pd.read_json(filename) |
|
|
|
df = remove_covid_data(df) |
|
|
|
|
|
|
|
# remove users with too little data |
|
|
|
# remove users with too little data (optional) |
|
|
|
value_counts = df[user_str].value_counts() |
|
|
|
df = df[df[user_str].isin(value_counts[value_counts>1000].index)] |
|
|
|
# df = df[df[user_str].isin(value_counts[value_counts>1000].index)] |
|
|
|
|
|
|
|
adjusted_df = pd.DataFrame() |
|
|
|
# adjust labels |
|
|
|
@ -532,12 +532,12 @@ def scale_dataset(scaler, df): |
|
|
|
|
|
|
|
def calculate_baselines_v3(): |
|
|
|
file_combinations = [(hour_timespan_str, dataset_hrs_path), |
|
|
|
(min_timespan_str, dataset_min_path), |
|
|
|
# (min_timespan_str, dataset_min_path), # TODO: dataset bining not ready for minutes |
|
|
|
] |
|
|
|
baseline_res = pd.DataFrame() |
|
|
|
for timespan_id, filename in file_combinations: |
|
|
|
_, _, te = get_prepared_data_v3(filename) |
|
|
|
for sequence_length in range(5,30, 5): |
|
|
|
for sequence_length in range(1,30,5): |
|
|
|
x, y = prepare_data_for_model(user_data=te, sequence_length=sequence_length) |
|
|
|
|
|
|
|
for strategy in ['most_frequent', 'stratified', 'uniform']: |
|
|
|
@ -554,6 +554,10 @@ def calculate_baselines_v3(): |
|
|
|
print('Done') |
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
# Ordner erstellen, die benötigt werden |
|
|
|
create_dir('results/') |
|
|
|
create_dir(figure_path) |
|
|
|
|
|
|
|
# main_two_v1() |
|
|
|
# visualise_results_v1() |
|
|
|
#test(model_type=model_type_gru) |
|
|
|
@ -561,6 +565,8 @@ if __name__ == "__main__": |
|
|
|
#visualise_results_v2() |
|
|
|
#manual_tuning(model_type=model_type_lstm) |
|
|
|
#calculate_baselines() |
|
|
|
|
|
|
|
#### Ab hier aktuell (21.01.2026) |
|
|
|
#calculate_baselines_v3() |
|
|
|
manual_tuning_v3(model_type=model_type_lstm) |
|
|
|
print('Done') # TODO: unterschiedlich große Datenmengen als ein Problem (auch in der Evaluation) |
|
|
|
print('Done') |