Пример #1
0
    def load(self, ratings, train, validation):

        self.train = train
        self.validation = validation
        self.num_users = movielens_extractor.get_num_users(ratings)
        self.num_items = movielens_extractor.get_num_items(ratings)
        self.train = train
        self.validation = validation

        self.mean_rating = np.mean(self.train[:, 2])
        self.ratings_test = np.float64(validation[:, 2])
        self.item_features = 0.1 * NormalRandom.generate_matrix(
            self.num_items, self.num_features)
        self.user_features = 0.1 * NormalRandom.generate_matrix(
            self.num_users, self.num_features)

        self.df_post_item = self.df_item + self.num_items
        self.df_post_user = self.df_user + self.num_users

        # num_user, num_item, ratings = build_ml_1m()
        self.matrix = build_rating_matrix(self.num_users, self.num_items,
                                          train)
        self.matrix = self.matrix.T
        self.counter_prob = 1
        self.probe_rat_all = self.pred(self.item_features, self.user_features,
                                       self.validation, self.mean_rating)

        self.estimate()
    def load(self, train, validation):

        # self.train =\
        #     movielens_extractor.reviews_to_numpy_matrix(train_reviews)
        self.train = train
        self.validation = validation
        self.num_users = movielens_extractor.get_num_users(self.train)
        self.num_items = movielens_extractor.get_num_items(self.train)
        self.mean_rating = movielens_extractor.get_mean_rating(self.train)
        self.batch_size = int(np.ceil(float(len(self.train) / self.num_batches)))
        self.alpha = self.epsilon / self.batch_size

        self.logger.debug('epsilon: %f', self.epsilon)
        self.logger.debug('alpha: %f', self.alpha)

        # latent variables
        self.user_features = 0.1 * np.random.rand(self.num_users, self.num_features)
        self.item_features = 0.1 * np.random.rand(self.num_items, self.num_features)
        user_features_inc = np.zeros((self.num_users, self.num_features))
        item_features_inc = np.zeros((self.num_items, self.num_features))

        converge = 1e-4
        last_rmse = None

        for epoch in xrange(self.num_epochs):

            # In each cycle the training vector is shuffled
            np.random.shuffle(self.train)

            for batch in xrange(self.num_batches):
                data = self.train[batch * self.batch_size: (batch + 1) * self.batch_size]
                users = data[:, 0]
                items = data[:, 1]
                ratings = data[:, 2]

                 # Default prediction is the mean rating
                ratings = ratings - self.mean_rating

                # compute predictions
                predicted_ratings = np.sum(self.user_features[users, :] * self.item_features[items, :], 1)

                # compute gradients
                error_list = predicted_ratings - ratings
                error_matrix = np.tile(error_list, (self.num_features, 1)).T

                item_gradients =\
                    error_matrix * self.user_features[users, :] + self.var_lambda * self.item_features[items, :]
                user_gradients =\
                    error_matrix * self.item_features[items, :] + self.var_lambda * self.user_features[users, :]

                user_feature_gradients =\
                    np.zeros((self.num_users, self.num_features))
                item_feature_gradients =\
                    np.zeros((self.num_items, self.num_features))
                # In the above line the gradient is calculated for every rating,
                #  but it has to be grouped (by summing it) for each user and
                # item features

                for i in xrange(self.batch_size):
                    user_feature_gradients[users[i], :] += user_gradients[i, :]
                    item_feature_gradients[items[i], :] += item_gradients[i, :]

                # Update item and user features
                # The update is done using the momentum technique of gradient descent
                user_features_inc = self.momentum * user_features_inc +\
                    self.alpha * user_feature_gradients
                # self.user_features = self.user_features - \
                #     self.alpha * user_feature_gradients
                self.user_features = self.user_features - user_features_inc

                item_features_inc = self.momentum * item_features_inc +\
                    self.alpha * item_feature_gradients
                # self.item_features = self.item_features - \
                #     self.alpha * item_feature_gradients
                self.item_features = self.item_features - item_features_inc

            # compute RMSE
            # train errors

            train_preds = self.predict(self.train)
            train_rmse = RMSE(train_preds, np.float16(self.train[:, 2]))

            # validation errors
            validation_preds = self.predict(self.validation)
            validation_rmse = RMSE(
                validation_preds, np.float16(self.validation[:, 2]))
            self.train_errors.append(train_rmse)
            self.validation_errors.append(validation_rmse)
            print "iterations: %3d, train RMSE: %.6f, validation RMSE: %.6f " % \
                (epoch + 1, train_rmse, validation_rmse)

            # stop if converge
            if last_rmse:
                if abs(train_rmse - last_rmse) < converge:
                    # break
                    pass
            last_rmse = train_rmse
    def load(self, train, validation):

        # self.train =\
        #     movielens_extractor.reviews_to_numpy_matrix(train_reviews)
        self.train = train
        self.validation = validation
        self.num_users = movielens_extractor.get_num_users(self.train)
        self.num_items = movielens_extractor.get_num_items(self.train)
        self.mean_rating = movielens_extractor.get_mean_rating(self.train)
        self.batch_size = int(
            np.ceil(float(len(self.train) / self.num_batches)))
        self.alpha = self.epsilon / self.batch_size

        self.logger.debug('epsilon: %f', self.epsilon)
        self.logger.debug('alpha: %f', self.alpha)

        # latent variables
        self.user_features = 0.1 * np.random.rand(self.num_users,
                                                  self.num_features)
        self.item_features = 0.1 * np.random.rand(self.num_items,
                                                  self.num_features)
        user_features_inc = np.zeros((self.num_users, self.num_features))
        item_features_inc = np.zeros((self.num_items, self.num_features))

        converge = 1e-4
        last_rmse = None

        for epoch in xrange(self.num_epochs):

            # In each cycle the training vector is shuffled
            np.random.shuffle(self.train)

            for batch in xrange(self.num_batches):
                data = self.train[batch * self.batch_size:(batch + 1) *
                                  self.batch_size]
                users = data[:, 0]
                items = data[:, 1]
                ratings = data[:, 2]

                # Default prediction is the mean rating
                ratings = ratings - self.mean_rating

                # compute predictions
                predicted_ratings = np.sum(
                    self.user_features[users, :] *
                    self.item_features[items, :], 1)

                # compute gradients
                error_list = predicted_ratings - ratings
                error_matrix = np.tile(error_list, (self.num_features, 1)).T

                item_gradients =\
                    error_matrix * self.user_features[users, :] + self.var_lambda * self.item_features[items, :]
                user_gradients =\
                    error_matrix * self.item_features[items, :] + self.var_lambda * self.user_features[users, :]

                user_feature_gradients =\
                    np.zeros((self.num_users, self.num_features))
                item_feature_gradients =\
                    np.zeros((self.num_items, self.num_features))
                # In the above line the gradient is calculated for every rating,
                #  but it has to be grouped (by summing it) for each user and
                # item features

                for i in xrange(self.batch_size):
                    user_feature_gradients[users[i], :] += user_gradients[i, :]
                    item_feature_gradients[items[i], :] += item_gradients[i, :]

                # Update item and user features
                # The update is done using the momentum technique of gradient descent
                user_features_inc = self.momentum * user_features_inc +\
                    self.alpha * user_feature_gradients
                # self.user_features = self.user_features - \
                #     self.alpha * user_feature_gradients
                self.user_features = self.user_features - user_features_inc

                item_features_inc = self.momentum * item_features_inc +\
                    self.alpha * item_feature_gradients
                # self.item_features = self.item_features - \
                #     self.alpha * item_feature_gradients
                self.item_features = self.item_features - item_features_inc

            # compute RMSE
            # train errors

            train_preds = self.predict(self.train)
            train_rmse = RMSE(train_preds, np.float16(self.train[:, 2]))

            # validation errors
            validation_preds = self.predict(self.validation)
            validation_rmse = RMSE(validation_preds,
                                   np.float16(self.validation[:, 2]))
            self.train_errors.append(train_rmse)
            self.validation_errors.append(validation_rmse)
            print "iterations: %3d, train RMSE: %.6f, validation RMSE: %.6f " % \
                (epoch + 1, train_rmse, validation_rmse)

            # stop if converge
            if last_rmse:
                if abs(train_rmse - last_rmse) < converge:
                    # break
                    pass
            last_rmse = train_rmse