Browse Source

Added counting of data

master
Bianca Steffes 2 weeks ago
parent
commit
b01cd988e6
  1. 14
      pipeline.py

14
pipeline.py

@ -77,10 +77,24 @@ def make_sequences(data, sequence_length):
def prepare_data_for_model(user_data, sequence_length):
x, y = [], []
combined = pd.DataFrame()
for user, data in user_data.items():
x_new, y_new = make_sequences(data, sequence_length)
x = x + x_new
y = y + y_new
if len(x_new)>0:
var = [[pd.DataFrame(a[s])for s in range(sequence_length)] for a in x_new ]
df_var = pd.concat([pd.concat(seq_list).T for seq_list in var])
df_var['user'] = user
combined = pd.concat([combined, df_var], ignore_index=True)
combined_ohne = combined.drop('user', axis=1)
print('Alle', len(combined))
print('Unique mit user', len(combined.drop_duplicates()))
print('Unique ohne user', len(combined_ohne.drop_duplicates()))
print('Unique')
print(combined.drop_duplicates()['user'].value_counts())
print('Alle')
print(combined['user'].value_counts())
random.Random(17).shuffle(x)
random.Random(17).shuffle(y)
x = np.array(x)

Loading…
Cancel
Save