def train(self, no_epochs=20, batches=1024, lr=0.001, no_factors=10, no_negatives=10, gen_mode='point', val_split=0.1): print('Generating training instances', 'of type', gen_mode) x, y = generator(self.observed_relevance, self.categories, self.no_categories, self.category_per_item, self.categories_per_user, no_negatives=no_negatives, gen_mode=gen_mode) print('Performing training -', 'Epochs', no_epochs, 'Batch Size', batches, 'Learning Rate', lr, 'Factors', no_factors, 'Negatives', no_negatives, 'Mode', gen_mode) self.model = self.__get_model(no_factors, np.array([64, 32, 16, 8], np.int32)) self.model.compile(optimizer=tf.keras.optimizers.Adam(lr=lr), loss='binary_crossentropy') user_input, item_i_input, _ = x labels = y callbacks = [ tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=2, verbose=1) ] self.model.fit([np.array(user_input), np.array(item_i_input)], np.array(labels), validation_split=val_split, batch_size=batches, epochs=no_epochs, verbose=1, shuffle=True, callbacks=callbacks)
def train(self, no_epochs=100, batches=1024, lr=0.001, no_factors=10, no_negatives=10, gen_mode='pair', val_split=0.01, val_interval=4): print('Generating training instances', 'of type', gen_mode) x, y = generator(self.observed_relevance, self.categories, self.no_categories, self.category_per_item, self.categories_per_user, no_negatives=no_negatives, gen_mode=gen_mode) print('Performing training -', 'Epochs', no_epochs, 'Batch Size', batches, 'Learning Rate', lr, 'Factors', no_factors, 'Negatives', no_negatives, 'Mode', gen_mode) self.model = self.__get_model() self.model.compile(optimizer=tf.keras.optimizers.Adam(lr=lr), loss=get_bpr_loss) user_input, item_i_input, item_j_input = x labels = y train_instance_indexes = np.random.choice( list(range(len(user_input))), int(len(user_input) * (1 - val_split)), replace=False) val_instance_indexes = np.array( list(set(range(len(user_input))) - set(train_instance_indexes))) user_input_train = user_input[train_instance_indexes] item_i_input_train = item_i_input[train_instance_indexes] item_j_input_train = item_j_input[train_instance_indexes] labels_train = labels[train_instance_indexes] user_input_val = user_input[val_instance_indexes] item_i_input_val = item_i_input[val_instance_indexes] item_j_input_val = item_j_input[val_instance_indexes] best_auc_score = 0 for epoch in range(no_epochs): self.model.fit( [user_input_train, item_i_input_train, item_j_input_train], labels_train, initial_epoch=epoch, epochs=epoch + 1, batch_size=batches, verbose=1, shuffle=True) if (epoch % val_interval) == 0: user_matrix = self.model.get_layer('UserEmb').get_weights()[0] item_matrix = self.model.get_layer('ItemEmb').get_weights()[0] auc_scores = [] for t, (u, i, j) in enumerate( zip(user_input_val, item_i_input_val, item_j_input_val)): auc_scores.append( 1 if np.dot(user_matrix[u], item_matrix[i]) > np. dot(user_matrix[u], item_matrix[j]) else 0) if (t % 1000) == 0: print('\rValidation accuracy:', auc_scores.count(1) / len(auc_scores), '(Sample', t, 'of', str(len(val_instance_indexes)) + ')', end='') print() if (auc_scores.count(1) / len(auc_scores)) < best_auc_score: break else: best_auc_score = (auc_scores.count(1) / len(auc_scores))