|
|
|
@ -68,33 +68,34 @@ def prepare_user_data(df): |
|
|
|
def make_sequences(data, sequence_length): |
|
|
|
x, y = [], [] |
|
|
|
features = data.drop('user', axis=1).values |
|
|
|
#features = features.astype(int) |
|
|
|
labels = data['user'].values |
|
|
|
for i in range(len(features) - sequence_length+1): |
|
|
|
# for i in range(len(features) - sequence_length+1): # with overlap on days |
|
|
|
for i in range(0, len(features) - sequence_length + 1, sequence_length): # without overlap on days |
|
|
|
x.append(features[i:i + sequence_length]) |
|
|
|
y.append(labels[i + sequence_length-1]) |
|
|
|
return x, y |
|
|
|
|
|
|
|
def prepare_data_for_model(user_data, sequence_length): |
|
|
|
def prepare_data_for_model(user_data, sequence_length, print_counts=False): |
|
|
|
x, y = [], [] |
|
|
|
combined = pd.DataFrame() |
|
|
|
for user, data in user_data.items(): |
|
|
|
x_new, y_new = make_sequences(data, sequence_length) |
|
|
|
x = x + x_new |
|
|
|
y = y + y_new |
|
|
|
if len(x_new)>0: |
|
|
|
if print_counts and len(x_new)>0: |
|
|
|
var = [[pd.DataFrame(a[s])for s in range(sequence_length)] for a in x_new ] |
|
|
|
df_var = pd.concat([pd.concat(seq_list).T for seq_list in var]) |
|
|
|
df_var['user'] = user |
|
|
|
combined = pd.concat([combined, df_var], ignore_index=True) |
|
|
|
combined_ohne = combined.drop('user', axis=1) |
|
|
|
print('Alle', len(combined)) |
|
|
|
print('Unique mit user', len(combined.drop_duplicates())) |
|
|
|
print('Unique ohne user', len(combined_ohne.drop_duplicates())) |
|
|
|
print('Unique') |
|
|
|
print(combined.drop_duplicates()['user'].value_counts()) |
|
|
|
print('Alle') |
|
|
|
print(combined['user'].value_counts()) |
|
|
|
if print_counts: |
|
|
|
combined_ohne = combined.drop('user', axis=1) |
|
|
|
print('Alle', len(combined)) |
|
|
|
print('Unique mit user', len(combined.drop_duplicates())) |
|
|
|
print('Unique ohne user', len(combined_ohne.drop_duplicates())) |
|
|
|
print('Unique') |
|
|
|
print(combined.drop_duplicates()['user'].value_counts()) |
|
|
|
print('Alle') |
|
|
|
print(combined['user'].value_counts()) |
|
|
|
random.Random(17).shuffle(x) |
|
|
|
random.Random(17).shuffle(y) |
|
|
|
x = np.array(x) |
|
|
|
@ -239,11 +240,8 @@ def train_one_model(train_data, val_data, n_batch, n_epochs, n_neurons,n_neurons |
|
|
|
# model.add(LSTM(n_neurons, kernel_regularizer=reg1, return_sequences=True)) |
|
|
|
model.add(LSTM(n_neurons)) |
|
|
|
# model.add(LSTM(n_neurons2)) |
|
|
|
# model.add(LSTM(n_neurons3, return_sequences=True)) |
|
|
|
# model.add(LSTM(n_neurons4)) |
|
|
|
if model_type == model_type_gru: |
|
|
|
model.add(GRU(n_neurons)) |
|
|
|
# TODO: add another dense layer |
|
|
|
#model.add(Dense(n_neurons, activation='relu')) |
|
|
|
#model.add(Dropout(d1)) |
|
|
|
model.add(Dense(len(users), activation='softmax')) |
|
|
|
|