|
|
@ -77,10 +77,24 @@ def make_sequences(data, sequence_length): |
|
|
|
|
|
|
|
|
def prepare_data_for_model(user_data, sequence_length): |
|
|
def prepare_data_for_model(user_data, sequence_length): |
|
|
x, y = [], [] |
|
|
x, y = [], [] |
|
|
|
|
|
combined = pd.DataFrame() |
|
|
for user, data in user_data.items(): |
|
|
for user, data in user_data.items(): |
|
|
x_new, y_new = make_sequences(data, sequence_length) |
|
|
x_new, y_new = make_sequences(data, sequence_length) |
|
|
x = x + x_new |
|
|
x = x + x_new |
|
|
y = y + y_new |
|
|
y = y + y_new |
|
|
|
|
|
if len(x_new)>0: |
|
|
|
|
|
var = [[pd.DataFrame(a[s])for s in range(sequence_length)] for a in x_new ] |
|
|
|
|
|
df_var = pd.concat([pd.concat(seq_list).T for seq_list in var]) |
|
|
|
|
|
df_var['user'] = user |
|
|
|
|
|
combined = pd.concat([combined, df_var], ignore_index=True) |
|
|
|
|
|
combined_ohne = combined.drop('user', axis=1) |
|
|
|
|
|
print('Alle', len(combined)) |
|
|
|
|
|
print('Unique mit user', len(combined.drop_duplicates())) |
|
|
|
|
|
print('Unique ohne user', len(combined_ohne.drop_duplicates())) |
|
|
|
|
|
print('Unique') |
|
|
|
|
|
print(combined.drop_duplicates()['user'].value_counts()) |
|
|
|
|
|
print('Alle') |
|
|
|
|
|
print(combined['user'].value_counts()) |
|
|
random.Random(17).shuffle(x) |
|
|
random.Random(17).shuffle(x) |
|
|
random.Random(17).shuffle(y) |
|
|
random.Random(17).shuffle(y) |
|
|
x = np.array(x) |
|
|
x = np.array(x) |
|
|
|