示例#1
0
    def train(self,
              no_epochs=20,
              batches=1024,
              lr=0.001,
              no_factors=10,
              no_negatives=10,
              gen_mode='point',
              val_split=0.1):
        print('Generating training instances', 'of type', gen_mode)
        x, y = generator(self.observed_relevance,
                         self.categories,
                         self.no_categories,
                         self.category_per_item,
                         self.categories_per_user,
                         no_negatives=no_negatives,
                         gen_mode=gen_mode)

        print('Performing training -', 'Epochs', no_epochs, 'Batch Size',
              batches, 'Learning Rate', lr, 'Factors', no_factors, 'Negatives',
              no_negatives, 'Mode', gen_mode)
        self.model = self.__get_model(no_factors,
                                      np.array([64, 32, 16, 8], np.int32))
        self.model.compile(optimizer=tf.keras.optimizers.Adam(lr=lr),
                           loss='binary_crossentropy')

        user_input, item_i_input, _ = x
        labels = y
        callbacks = [
            tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                             patience=2,
                                             verbose=1)
        ]
        self.model.fit([np.array(user_input),
                        np.array(item_i_input)],
                       np.array(labels),
                       validation_split=val_split,
                       batch_size=batches,
                       epochs=no_epochs,
                       verbose=1,
                       shuffle=True,
                       callbacks=callbacks)
示例#2
0
    def train(self,
              no_epochs=100,
              batches=1024,
              lr=0.001,
              no_factors=10,
              no_negatives=10,
              gen_mode='pair',
              val_split=0.01,
              val_interval=4):

        print('Generating training instances', 'of type', gen_mode)
        x, y = generator(self.observed_relevance,
                         self.categories,
                         self.no_categories,
                         self.category_per_item,
                         self.categories_per_user,
                         no_negatives=no_negatives,
                         gen_mode=gen_mode)

        print('Performing training -', 'Epochs', no_epochs, 'Batch Size',
              batches, 'Learning Rate', lr, 'Factors', no_factors, 'Negatives',
              no_negatives, 'Mode', gen_mode)
        self.model = self.__get_model()
        self.model.compile(optimizer=tf.keras.optimizers.Adam(lr=lr),
                           loss=get_bpr_loss)

        user_input, item_i_input, item_j_input = x
        labels = y

        train_instance_indexes = np.random.choice(
            list(range(len(user_input))),
            int(len(user_input) * (1 - val_split)),
            replace=False)
        val_instance_indexes = np.array(
            list(set(range(len(user_input))) - set(train_instance_indexes)))
        user_input_train = user_input[train_instance_indexes]
        item_i_input_train = item_i_input[train_instance_indexes]
        item_j_input_train = item_j_input[train_instance_indexes]
        labels_train = labels[train_instance_indexes]
        user_input_val = user_input[val_instance_indexes]
        item_i_input_val = item_i_input[val_instance_indexes]
        item_j_input_val = item_j_input[val_instance_indexes]

        best_auc_score = 0
        for epoch in range(no_epochs):
            self.model.fit(
                [user_input_train, item_i_input_train, item_j_input_train],
                labels_train,
                initial_epoch=epoch,
                epochs=epoch + 1,
                batch_size=batches,
                verbose=1,
                shuffle=True)

            if (epoch % val_interval) == 0:
                user_matrix = self.model.get_layer('UserEmb').get_weights()[0]
                item_matrix = self.model.get_layer('ItemEmb').get_weights()[0]
                auc_scores = []
                for t, (u, i, j) in enumerate(
                        zip(user_input_val, item_i_input_val,
                            item_j_input_val)):
                    auc_scores.append(
                        1 if np.dot(user_matrix[u], item_matrix[i]) > np.
                        dot(user_matrix[u], item_matrix[j]) else 0)
                    if (t % 1000) == 0:
                        print('\rValidation accuracy:',
                              auc_scores.count(1) / len(auc_scores),
                              '(Sample',
                              t,
                              'of',
                              str(len(val_instance_indexes)) + ')',
                              end='')
                print()
                if (auc_scores.count(1) / len(auc_scores)) < best_auc_score:
                    break
                else:
                    best_auc_score = (auc_scores.count(1) / len(auc_scores))