Пример #1
0
    def set_URM_train(self, URM_train_new, estimate_model_for_cold_users = False, topK = 100, **kwargs):
        """

        :param URM_train_new:
        :param estimate_item_similarity_for_cold_users: Set to TRUE if you want to estimate the item-item similarity for cold users to be used as in a KNN algorithm
        :param topK: 100
        :param kwargs:
        :return:
        """

        assert self.URM_train.shape == URM_train_new.shape, "{}: set_URM_train old and new URM train have different shapes".format(self.RECOMMENDER_NAME)

        if len(kwargs)>0:
            print("{}: set_URM_train keyword arguments not supported for this recommender class. Received: {}".format(self.RECOMMENDER_NAME, kwargs))

        self.URM_train = check_matrix(URM_train_new.copy(), 'csr', dtype=np.float32)
        self.URM_train.eliminate_zeros()

        if estimate_model_for_cold_users == "itemKNN":

            print("{}: Estimating ItemKNN model from ITEM latent factors...".format(self.RECOMMENDER_NAME))

            W_sparse = compute_W_sparse_from_item_latent_factors(self.ITEM_factors, topK=topK)

            self._ItemKNNRecommender = ItemKNNCustomSimilarityRecommender(self.URM_train)
            self._ItemKNNRecommender.fit(W_sparse, topK=topK)

            self._cold_user_KNN_model_available = True
            self._warm_user_KNN_mask = np.ediff1d(self.URM_train.indptr) > 0

            print("{}: Estimating ItemKNN model from ITEM latent factors... done!".format(self.RECOMMENDER_NAME))



        elif estimate_model_for_cold_users == "mean_item_factors":

            print("{}: Estimating USER latent factors from ITEM latent factors...".format(self.RECOMMENDER_NAME))

            self._cold_user_mask = np.ediff1d(self.URM_train.indptr) == 0

            profile_length = np.ediff1d(self.URM_train.indptr)
            profile_length_sqrt = np.sqrt(profile_length)

            self.USER_factors = self.URM_train.dot(self.ITEM_factors)

            #Divide every row for the sqrt of the profile length
            for user_index in range(self.n_users):

                if profile_length_sqrt[user_index] > 0:

                    self.USER_factors[user_index, :] /= profile_length_sqrt[user_index]

            print("{}: Estimating USER latent factors from ITEM latent factors... done!".format(self.RECOMMENDER_NAME))
Пример #2
0
    def load_model(self, folder_path, file_name=None):
        super(BaseMatrixFactorizationRecommender,
              self).load_model(folder_path, file_name=file_name)

        if self._cold_user_KNN_model_flag:
            self._ItemKNNRecommender = ItemKNNCustomSimilarityRecommender(
                self.URM_train)
            self._ItemKNNRecommender.fit(self._ItemKNNRecommender_W_sparse,
                                         topK=self._ItemKNNRecommender_topK)

            del self._ItemKNNRecommender_W_sparse
            del self._ItemKNNRecommender_topK
Пример #3
0
    def load_model(self, folder_path, file_name=None):
        super(BaseMatrixFactorizationRecommender,
              self).load_model(folder_path, file_name=file_name)

        self.mf_recommender.load_model(folder_path,
                                       file_name=file_name + "_warm_users")

        if self._cold_user_KNN_model_flag:
            self._ItemKNNRecommender = ItemKNNCustomSimilarityRecommender(
                self.URM_train)
            self._ItemKNNRecommender.load_model(folder_path,
                                                file_name=file_name +
                                                "_cold_users")
Пример #4
0
    def computeMaxTheoreticalPerformance(self):

        # Max performance would be if we were able to learn the content matrix having for each non-zero cell exactly
        # the value that appears in the collaborative similarity

        print(self.RECOMMENDER_NAME + ": Computing collaborative performance")

        recommender = ItemKNNCustomSimilarityRecommender()
        recommender.fit(self.S_matrix_target, self.URM_train)

        results_run = self.evaluator_object(recommender)

        self.writeLog(self.RECOMMENDER_NAME +
                      ": Collaborative performance is: {}".format(results_run))

        print(self.RECOMMENDER_NAME + ": Computing top structural performance")

        n_items = self.ICM.shape[0]

        S_optimal = sps.csr_matrix(
            (self.data_list, (self.row_list, self.col_list)),
            shape=(n_items, n_items))
        S_optimal.eliminate_zeros()

        recommender = ItemKNNCustomSimilarityRecommender()
        recommender.fit(S_optimal, self.URM_train)

        results_run = self.evaluator_object(recommender)

        self.writeLog(
            self.RECOMMENDER_NAME +
            ": Top structural performance is: {}".format(results_run))
Пример #5
0
class BaseMatrixFactorizationRecommender(BaseRecommender):
    """
    This class refers to a BaseRecommender KNN which uses matrix factorization,
    it provides functions to compute item's score as well as a function to save the W_matrix

    The prediction for cold users will always be -inf for ALL items
    """
    def __init__(self, URM_train):
        super(BaseMatrixFactorizationRecommender, self).__init__(URM_train)

        self._cold_user_mask = np.ediff1d(self.URM_train.indptr) == 0
        self._cold_user_KNN_model_available = False
        self._warm_user_KNN_mask = np.zeros(len(self._cold_user_mask),
                                            dtype=np.bool)

        if self._cold_user_mask.any():
            print("{}: Detected {} ({:.2f} %) cold users.".format(
                self.RECOMMENDER_NAME, self._cold_user_mask.sum(),
                self._cold_user_mask.sum() / len(self._cold_user_mask) * 100))

    def _get_cold_user_mask(self):
        return self._cold_user_mask

    def set_URM_train(self,
                      URM_train_new,
                      estimate_model_for_cold_users=False,
                      topK=100,
                      **kwargs):
        """

        :param URM_train_new:
        :param estimate_item_similarity_for_cold_users: Set to TRUE if you want to estimate the item-item similarity for cold users to be used as in a KNN algorithm
        :param topK: 100
        :param kwargs:
        :return:
        """

        assert self.URM_train.shape == URM_train_new.shape, "{}: set_URM_train old and new URM train have different shapes".format(
            self.RECOMMENDER_NAME)

        if len(kwargs) > 0:
            print(
                "{}: set_URM_train keyword arguments not supported for this recommender class. Received: {}"
                .format(self.RECOMMENDER_NAME, kwargs))

        self.URM_train = check_matrix(URM_train_new.copy(),
                                      'csr',
                                      dtype=np.float32)
        self.URM_train.eliminate_zeros()

        if estimate_model_for_cold_users == "itemKNN":

            print("{}: Estimating ItemKNN model from ITEM latent factors...".
                  format(self.RECOMMENDER_NAME))

            W_sparse = compute_W_sparse_from_item_latent_factors(
                self.ITEM_factors, topK=topK)

            self._ItemKNNRecommender = ItemKNNCustomSimilarityRecommender(
                self.URM_train)
            self._ItemKNNRecommender.fit(W_sparse, topK=topK)

            self._cold_user_KNN_model_available = True
            self._warm_user_KNN_mask = np.ediff1d(self.URM_train.indptr) > 0

            print(
                "{}: Estimating ItemKNN model from ITEM latent factors... done!"
                .format(self.RECOMMENDER_NAME))

        elif estimate_model_for_cold_users == "mean_item_factors":

            print(
                "{}: Estimating USER latent factors from ITEM latent factors..."
                .format(self.RECOMMENDER_NAME))

            self._cold_user_mask = np.ediff1d(self.URM_train.indptr) == 0

            profile_length = np.ediff1d(self.URM_train.indptr)
            profile_length_sqrt = np.sqrt(profile_length)

            self.USER_factors = self.URM_train.dot(self.ITEM_factors)

            #Divide every row for the sqrt of the profile length
            for user_index in range(self.n_users):

                if profile_length_sqrt[user_index] > 0:

                    self.USER_factors[
                        user_index, :] /= profile_length_sqrt[user_index]

            print(
                "{}: Estimating USER latent factors from ITEM latent factors... done!"
                .format(self.RECOMMENDER_NAME))

    def _compute_item_score(self, user_id_array, items_to_compute=None):
        """
        USER_factors is n_users x n_factors
        ITEM_factors is n_items x n_factors

        The prediction for cold users will always be -inf for ALL items

        :param user_id_array:
        :param items_to_compute:
        :return:
        """

        assert self.USER_factors.shape[1] == self.ITEM_factors.shape[1], \
            "{}: User and Item factors have inconsistent shape".format(self.RECOMMENDER_NAME)

        assert self.USER_factors.shape[0] > user_id_array.max(),\
                "{}: Cold users not allowed. Users in trained model are {}, requested prediction for users up to {}".format(
                self.RECOMMENDER_NAME, self.USER_factors.shape[0], user_id_array.max())

        if items_to_compute is not None:
            item_scores = -np.ones(
                (len(user_id_array), self.ITEM_factors.shape[0]),
                dtype=np.float32) * np.inf
            item_scores[:, items_to_compute] = np.dot(
                self.USER_factors[user_id_array],
                self.ITEM_factors[items_to_compute, :].T)
        else:
            item_scores = np.dot(self.USER_factors[user_id_array],
                                 self.ITEM_factors.T)

        cold_users_MF_mask = self._get_cold_user_mask()[user_id_array]

        if cold_users_MF_mask.any():

            if self._cold_user_KNN_model_available:
                # Add KNN scores for users cold for MF but warm in KNN model
                cold_users_in_MF_warm_in_KNN_mask = np.logical_and(
                    cold_users_MF_mask,
                    self._warm_user_KNN_mask[user_id_array])

                item_scores[
                    cold_users_in_MF_warm_in_KNN_mask, :] = self._ItemKNNRecommender._compute_item_score(
                        user_id_array[cold_users_in_MF_warm_in_KNN_mask],
                        items_to_compute=items_to_compute)

                # Set cold users as those neither in MF nor in KNN
                cold_users_MF_mask = np.logical_and(
                    cold_users_MF_mask,
                    np.logical_not(cold_users_in_MF_warm_in_KNN_mask))

            # Set as -inf all remaining cold user scores
            item_scores[cold_users_MF_mask, :] = -np.ones_like(
                item_scores[cold_users_MF_mask, :]) * np.inf

        return item_scores

    def saveModel(self, folder_path, file_name=None):

        if file_name is None:
            file_name = self.RECOMMENDER_NAME

        print("{}: Saving model in file '{}'".format(self.RECOMMENDER_NAME,
                                                     folder_path + file_name))

        dictionary_to_save = {
            "USER_factors": self.USER_factors,
            "ITEM_factors": self.ITEM_factors,
            "_cold_user_mask": self._cold_user_mask
        }

        pickle.dump(dictionary_to_save,
                    open(folder_path + file_name, "wb"),
                    protocol=pickle.HIGHEST_PROTOCOL)

        print("{}: Saving complete".format(self.RECOMMENDER_NAME,
                                           folder_path + file_name))
Пример #6
0
class BaseMatrixFactorizationRecommender(BaseRecommender):
    """
    This class refers to a BaseRecommender KNN which uses matrix factorization,
    it provides functions to compute item's score as well as a function to save the W_matrix

    The prediction for cold users will always be -inf for ALL items
    """
    def __init__(self, URM_train):
        super(BaseMatrixFactorizationRecommender, self).__init__(URM_train)

        self.use_bias = False

        self._cold_user_KNN_model_available = False
        self._warm_user_KNN_mask = np.zeros(len(self._get_cold_user_mask()),
                                            dtype=np.bool)

    def set_URM_train(self,
                      URM_train_new,
                      estimate_model_for_cold_users=False,
                      topK=100,
                      **kwargs):
        """

        :param URM_train_new:
        :param estimate_item_similarity_for_cold_users: Set to TRUE if you want to estimate the item-item similarity for cold users to be used as in a KNN algorithm
        :param topK: 100
        :param kwargs:
        :return:
        """

        assert self.URM_train.shape == URM_train_new.shape, "{}: set_URM_train old and new URM train have different shapes".format(
            self.RECOMMENDER_NAME)

        if len(kwargs) > 0:
            print(
                "{}: set_URM_train keyword arguments not supported for this recommender class. Received: {}"
                .format(self.RECOMMENDER_NAME, kwargs))

        self.URM_train = check_matrix(URM_train_new.copy(),
                                      'csr',
                                      dtype=np.float32)
        self.URM_train.eliminate_zeros()

        if estimate_model_for_cold_users == "itemKNN":

            print("{}: Estimating ItemKNN model from ITEM latent factors...".
                  format(self.RECOMMENDER_NAME))

            W_sparse = compute_W_sparse_from_item_latent_factors(
                self.ITEM_factors, topK=topK)

            self._ItemKNNRecommender = ItemKNNCustomSimilarityRecommender(
                self.URM_train)
            self._ItemKNNRecommender.fit(W_sparse, topK=topK)

            self._cold_user_KNN_model_available = True
            self._warm_user_KNN_mask = np.ediff1d(self.URM_train.indptr) > 0

            print(
                "{}: Estimating ItemKNN model from ITEM latent factors... done!"
                .format(self.RECOMMENDER_NAME))

        elif estimate_model_for_cold_users == "mean_item_factors":

            print(
                "{}: Estimating USER latent factors from ITEM latent factors..."
                .format(self.RECOMMENDER_NAME))

            self._cold_user_mask = np.ediff1d(self.URM_train.indptr) == 0

            profile_length = np.ediff1d(self.URM_train.indptr)
            profile_length_sqrt = np.sqrt(profile_length)

            self.USER_factors = self.URM_train.dot(self.ITEM_factors)

            #Divide every row for the sqrt of the profile length
            for user_index in range(self.n_users):

                if profile_length_sqrt[user_index] > 0:

                    self.USER_factors[
                        user_index, :] /= profile_length_sqrt[user_index]

            print(
                "{}: Estimating USER latent factors from ITEM latent factors... done!"
                .format(self.RECOMMENDER_NAME))

    #########################################################################################################
    ##########                                                                                     ##########
    ##########                               COMPUTE ITEM SCORES                                   ##########
    ##########                                                                                     ##########
    #########################################################################################################

    def _compute_item_score(self, user_id_array, items_to_compute=None):
        """
        USER_factors is n_users x n_factors
        ITEM_factors is n_items x n_factors

        The prediction for cold users will always be -inf for ALL items

        :param user_id_array:
        :param items_to_compute:
        :return:
        """

        assert self.USER_factors.shape[1] == self.ITEM_factors.shape[1], \
            "{}: User and Item factors have inconsistent shape".format(self.RECOMMENDER_NAME)

        assert self.USER_factors.shape[0] > user_id_array.max(),\
                "{}: Cold users not allowed. Users in trained model are {}, requested prediction for users up to {}".format(
                self.RECOMMENDER_NAME, self.USER_factors.shape[0], user_id_array.max())

        if items_to_compute is not None:
            item_scores = -np.ones(
                (len(user_id_array), self.ITEM_factors.shape[0]),
                dtype=np.float32) * np.inf
            item_scores[:, items_to_compute] = np.dot(
                self.USER_factors[user_id_array],
                self.ITEM_factors[items_to_compute, :].T)

        else:
            item_scores = np.dot(self.USER_factors[user_id_array],
                                 self.ITEM_factors.T)

        # No need to select only the specific negative items or warm users because the -inf score will not change
        if self.use_bias:
            item_scores += self.ITEM_bias + self.GLOBAL_bias
            item_scores = (item_scores.T + self.USER_bias[user_id_array]).T

        item_scores = self._compute_item_score_postprocess_for_cold_users(
            user_id_array, item_scores)
        item_scores = self._compute_item_score_postprocess_for_cold_items(
            item_scores)

        return item_scores

    def _compute_item_score_postprocess_for_cold_users(self, user_id_array,
                                                       item_scores):
        """
        Remove cold users from the computed item scores, setting them to -inf
        Or estimate user factors with specified method
        :param user_id_array:
        :param item_scores:
        :return:
        """

        cold_users_batch_mask = self._get_cold_user_mask()[user_id_array]

        # Set as -inf all cold user scores
        if cold_users_batch_mask.any():

            if self._cold_user_KNN_model_available:
                # Add KNN scores for users cold for MF but warm in KNN model
                cold_users_in_MF_warm_in_KNN_mask = np.logical_and(
                    cold_users_batch_mask,
                    self._warm_user_KNN_mask[user_id_array])

                item_scores[
                    cold_users_in_MF_warm_in_KNN_mask, :] = self._ItemKNNRecommender._compute_item_score(
                        user_id_array[cold_users_in_MF_warm_in_KNN_mask],
                        items_to_compute=items_to_compute)

                # Set cold users as those neither in MF nor in KNN
                cold_users_batch_mask = np.logical_and(
                    cold_users_batch_mask,
                    np.logical_not(cold_users_in_MF_warm_in_KNN_mask))

            # Set as -inf all remaining cold user scores
            item_scores[cold_users_batch_mask, :] = -np.ones_like(
                item_scores[cold_users_batch_mask, :]) * np.inf

        return item_scores

    #########################################################################################################
    ##########                                                                                     ##########
    ##########                                LOAD AND SAVE                                        ##########
    ##########                                                                                     ##########
    #########################################################################################################

    def saveModel(self, folder_path, file_name=None):

        if file_name is None:
            file_name = self.RECOMMENDER_NAME

        print("{}: Saving model in file '{}'".format(self.RECOMMENDER_NAME,
                                                     folder_path + file_name))

        data_dict_to_save = {
            "USER_factors": self.USER_factors,
            "ITEM_factors": self.ITEM_factors,
            "use_bias": self.use_bias,
            "_cold_user_mask": self._cold_user_mask
        }

        if self.use_bias:
            data_dict_to_save["ITEM_bias"] = self.ITEM_bias
            data_dict_to_save["USER_bias"] = self.USER_bias
            data_dict_to_save["GLOBAL_bias"] = self.GLOBAL_bias

        dataIO = DataIO(folder_path=folder_path)
        dataIO.save_data(file_name=file_name,
                         data_dict_to_save=data_dict_to_save)

        print("{}: Saving complete".format(self.RECOMMENDER_NAME,
                                           folder_path + file_name))
Пример #7
0
class MF_cold_user_wrapper(BaseMatrixFactorizationRecommender):
    """ MF_cold_user_wrapper"""

    RECOMMENDER_NAME = "MF_cold_user_wrapper"

    def __init__(self,
                 MF_recommender_class: BaseMatrixFactorizationRecommender,
                 *posargs, **kwargs):
        """
        Creates an instance of the MF algorithm with the given hyperparameters and data
        :param MF_recommender_class:
        :param posargs:
        :param kwargs:
        """
        super(MF_cold_user_wrapper, self).__init__(*posargs, **kwargs)
        self.mf_recommender = MF_recommender_class(*posargs, **kwargs)
        self.RECOMMENDER_NAME = self.mf_recommender.RECOMMENDER_NAME  # + "_cold_user_Wrapper"
        self.estimate_model_for_cold_users = False

        self._cold_user_KNN_model_flag = False
        self._cold_user_KNN_estimated_factors_flag = False
        self._warm_user_KNN_mask = np.zeros(len(self._get_cold_user_mask()),
                                            dtype=np.bool)

    def _compute_item_score(self, user_id_array, items_to_compute=None):
        """
        Compute the items scores using the native function for the MF algorithm
        :param posargs:
        :param kwargs:
        :return:
        """

        # item_scores = self.mf_recommender._compute_item_score(user_id_array, items_to_compute = items_to_compute)
        item_scores = super(MF_cold_user_wrapper, self)._compute_item_score(
            user_id_array, items_to_compute=items_to_compute)
        item_scores = self._compute_item_score_for_cold_users(
            user_id_array, item_scores, items_to_compute=items_to_compute)

        return item_scores

    def _compute_item_score_for_cold_users(self,
                                           user_id_array,
                                           item_scores,
                                           items_to_compute=None):
        """
        Compute item scores with the ItemKNN model
        :param user_id_array:
        :param item_scores:
        :return:
        """

        cold_users_batch_mask = self._get_cold_user_mask()[user_id_array]

        if cold_users_batch_mask.any(
        ) and not self._cold_user_KNN_estimated_factors_flag:

            if self._cold_user_KNN_model_flag:
                # Add KNN scores for users cold for MF but warm in KNN model
                cold_users_in_MF_warm_in_KNN_mask = np.logical_and(
                    cold_users_batch_mask,
                    self._warm_user_KNN_mask[user_id_array])

                item_scores[
                    cold_users_in_MF_warm_in_KNN_mask, :] = self._ItemKNNRecommender._compute_item_score(
                        user_id_array[cold_users_in_MF_warm_in_KNN_mask],
                        items_to_compute=items_to_compute)

        return item_scores

    def set_URM_train(self, URM_train_new):
        """

        :param URM_train_new:
        :param estimate_item_similarity_for_cold_users: Set to TRUE if you want to estimate the item-item similarity for cold users to be used as in a KNN algorithm
        :param topK: 100
        :param kwargs:
        :return:
        """

        assert self.URM_train.shape == URM_train_new.shape, "{}: set_URM_train old and new URM train have different shapes".format(
            self.RECOMMENDER_NAME)

        URM_train_new = check_matrix(URM_train_new, 'csr', dtype=np.float32)
        profile_length_new = np.ediff1d(URM_train_new.indptr)

        if self.estimate_model_for_cold_users == "itemKNN":

            self._print("Generating ItemKNN model from ITEM latent factors...")

            W_sparse = compute_W_sparse_from_item_latent_factors(
                self.ITEM_factors,
                topK=self.estimate_model_for_cold_users_topK)

            self._ItemKNNRecommender = ItemKNNCustomSimilarityRecommender(
                URM_train_new)
            self._ItemKNNRecommender.fit(W_sparse, topK=None)

            self._cold_user_KNN_model_flag = True
            self._cold_user_KNN_model_flag = True
            self._warm_user_KNN_mask = profile_length_new > 0

            self._print(
                "Generating ItemKNN model from ITEM latent factors... done!")

        elif self.estimate_model_for_cold_users == "mean_item_factors":

            self._print(
                "Estimating USER latent factors from ITEM latent factors...")

            cold_user_mask_previous = self._get_cold_user_mask()
            profile_length_sqrt = np.sqrt(profile_length_new)

            self.USER_factors[cold_user_mask_previous, :] = URM_train_new.dot(
                self.ITEM_factors)[cold_user_mask_previous, :]
            self._cold_user_KNN_estimated_factors_flag = True

            #Divide every row for the sqrt of the profile length
            for user_index in range(self.n_users):
                if cold_user_mask_previous[
                        user_index] and profile_length_sqrt[user_index] > 0:

                    self.USER_factors[
                        user_index, :] /= profile_length_sqrt[user_index]

            self._print(
                "Estimating USER latent factors from ITEM latent factors... done!"
            )

        self.URM_train = sps.csr_matrix(URM_train_new.copy())
        self.URM_train.eliminate_zeros()

    def fit(self,
            *posargs,
            estimate_model_for_cold_users=None,
            estimate_model_for_cold_users_topK=100,
            **kwargs):
        """
        Fits the MF model with the given hyper-parameters and sets the kwarg "estimate_model_for_cold_users"
        :param posargs:
        :param kwargs:
        :return:
        """
        self.estimate_model_for_cold_users = estimate_model_for_cold_users
        self.estimate_model_for_cold_users_topK = estimate_model_for_cold_users_topK

        self.mf_recommender.fit(*posargs, **kwargs)

        self.USER_factors = self.mf_recommender.USER_factors.copy()
        self.ITEM_factors = self.mf_recommender.ITEM_factors.copy()

    def save_model(self, folder_path, file_name=None):

        if file_name is None:
            file_name = self.RECOMMENDER_NAME

        self._print("Saving model in file '{}'".format(folder_path +
                                                       file_name))

        self.mf_recommender.save_model(folder_path,
                                       file_name=file_name + "_warm_users")

        data_dict_to_save = {
            "_cold_user_KNN_model_flag":
            self._cold_user_KNN_model_flag,
            "_cold_user_KNN_estimated_factors_flag":
            self._cold_user_KNN_estimated_factors_flag
        }

        if self._cold_user_KNN_model_flag:
            self._ItemKNNRecommender.save_model(folder_path,
                                                file_name=file_name +
                                                "_cold_users")

        dataIO = DataIO(folder_path=folder_path)
        dataIO.save_data(file_name=file_name,
                         data_dict_to_save=data_dict_to_save)

        self._print("Saving complete")

    def load_model(self, folder_path, file_name=None):
        super(BaseMatrixFactorizationRecommender,
              self).load_model(folder_path, file_name=file_name)

        self.mf_recommender.load_model(folder_path,
                                       file_name=file_name + "_warm_users")

        if self._cold_user_KNN_model_flag:
            self._ItemKNNRecommender = ItemKNNCustomSimilarityRecommender(
                self.URM_train)
            self._ItemKNNRecommender.load_model(folder_path,
                                                file_name=file_name +
                                                "_cold_users")
Пример #8
0
    def set_URM_train(self, URM_train_new):
        """

        :param URM_train_new:
        :param estimate_item_similarity_for_cold_users: Set to TRUE if you want to estimate the item-item similarity for cold users to be used as in a KNN algorithm
        :param topK: 100
        :param kwargs:
        :return:
        """

        assert self.URM_train.shape == URM_train_new.shape, "{}: set_URM_train old and new URM train have different shapes".format(
            self.RECOMMENDER_NAME)

        URM_train_new = check_matrix(URM_train_new, 'csr', dtype=np.float32)
        profile_length_new = np.ediff1d(URM_train_new.indptr)

        if self.estimate_model_for_cold_users == "itemKNN":

            self._print("Generating ItemKNN model from ITEM latent factors...")

            W_sparse = compute_W_sparse_from_item_latent_factors(
                self.ITEM_factors,
                topK=self.estimate_model_for_cold_users_topK)

            self._ItemKNNRecommender = ItemKNNCustomSimilarityRecommender(
                URM_train_new)
            self._ItemKNNRecommender.fit(W_sparse, topK=None)

            self._cold_user_KNN_model_flag = True
            self._cold_user_KNN_model_flag = True
            self._warm_user_KNN_mask = profile_length_new > 0

            self._print(
                "Generating ItemKNN model from ITEM latent factors... done!")

        elif self.estimate_model_for_cold_users == "mean_item_factors":

            self._print(
                "Estimating USER latent factors from ITEM latent factors...")

            cold_user_mask_previous = self._get_cold_user_mask()
            profile_length_sqrt = np.sqrt(profile_length_new)

            self.USER_factors[cold_user_mask_previous, :] = URM_train_new.dot(
                self.ITEM_factors)[cold_user_mask_previous, :]
            self._cold_user_KNN_estimated_factors_flag = True

            #Divide every row for the sqrt of the profile length
            for user_index in range(self.n_users):
                if cold_user_mask_previous[
                        user_index] and profile_length_sqrt[user_index] > 0:

                    self.USER_factors[
                        user_index, :] /= profile_length_sqrt[user_index]

            self._print(
                "Estimating USER latent factors from ITEM latent factors... done!"
            )

        self.URM_train = sps.csr_matrix(URM_train_new.copy())
        self.URM_train.eliminate_zeros()
Пример #9
0
class BaseMatrixFactorizationRecommender(BaseRecommender):
    """
    This class refers to a BaseRecommender KNN which uses matrix factorization,
    it provides functions to compute item's score as well as a function to save the W_matrix

    The prediction for cold users will always be -inf for ALL items
    """
    def __init__(self,
                 URM_train,
                 verbose=True,
                 evaluation_block_size: int = 1):
        super(BaseMatrixFactorizationRecommender,
              self).__init__(URM_train,
                             verbose=verbose,
                             evaluation_block_size=evaluation_block_size)

        self.use_bias = False

        self._cold_user_KNN_model_flag = False
        self._cold_user_KNN_estimated_factors_flag = False
        self._warm_user_KNN_mask = np.zeros(len(self._get_cold_user_mask()),
                                            dtype=np.bool)

    def set_URM_train(self,
                      URM_train_new,
                      estimate_model_for_cold_users=False,
                      topK=100,
                      **kwargs):
        """

        :param URM_train_new:
        :param estimate_item_similarity_for_cold_users: Set to TRUE if you want to estimate the item-item similarity for cold users to be used as in a KNN algorithm
        :param topK: 100
        :param kwargs:
        :return:
        """

        assert self.URM_train.shape == URM_train_new.shape, "{}: set_URM_train old and new URM train have different shapes".format(
            self.RECOMMENDER_NAME)

        if len(kwargs) > 0:
            self._print(
                "set_URM_train keyword arguments not supported for this recommender class. Received: {}"
                .format(kwargs))

        URM_train_new = check_matrix(URM_train_new, 'csr', dtype=np.float32)
        profile_length_new = np.ediff1d(URM_train_new.indptr)

        if estimate_model_for_cold_users == "itemKNN":

            self._print("Estimating ItemKNN model from ITEM latent factors...")

            W_sparse = compute_W_sparse_from_item_latent_factors(
                self.ITEM_factors, topK=topK)

            self._ItemKNNRecommender = ItemKNNCustomSimilarityRecommender(
                URM_train_new)
            self._ItemKNNRecommender.fit(W_sparse, topK=topK)
            self._ItemKNNRecommender_topK = topK

            self._cold_user_KNN_model_flag = True
            self._warm_user_KNN_mask = profile_length_new > 0

            self._print(
                "Estimating ItemKNN model from ITEM latent factors... done!")

        elif estimate_model_for_cold_users == "mean_item_factors":

            self._print(
                "Estimating USER latent factors from ITEM latent factors...")

            cold_user_mask_previous = self._get_cold_user_mask()
            profile_length_sqrt = np.sqrt(profile_length_new)

            self.USER_factors[cold_user_mask_previous, :] = URM_train_new.dot(
                self.ITEM_factors)[cold_user_mask_previous, :]
            self._cold_user_KNN_estimated_factors_flag = True

            #Divide every row for the sqrt of the profile length
            for user_index in range(self.n_users):
                if cold_user_mask_previous[
                        user_index] and profile_length_sqrt[user_index] > 0:

                    self.USER_factors[
                        user_index, :] /= profile_length_sqrt[user_index]

            self._print(
                "Estimating USER latent factors from ITEM latent factors... done!"
            )

        self.URM_train = check_matrix(URM_train_new.copy(),
                                      'csr',
                                      dtype=np.float32)
        self.URM_train.eliminate_zeros()

    #########################################################################################################
    ##########                                                                                     ##########
    ##########                               COMPUTE ITEM SCORES                                   ##########
    ##########                                                                                     ##########
    #########################################################################################################

    def _compute_item_score(self, user_id_array, items_to_compute=None):
        """
        USER_factors is n_users x n_factors
        ITEM_factors is n_items x n_factors

        The prediction for cold users will always be -inf for ALL items

        :param user_id_array:
        :param items_to_compute:
        :return:
        """

        assert self.USER_factors.shape[1] == self.ITEM_factors.shape[1], \
            "{}: User and Item factors have inconsistent shape".format(self.RECOMMENDER_NAME)

        assert self.USER_factors.shape[0] > np.max(user_id_array),\
                "{}: Cold users not allowed. Users in trained model are {}, requested prediction for users up to {}".format(
                self.RECOMMENDER_NAME, self.USER_factors.shape[0], np.max(user_id_array))

        if items_to_compute is not None:
            item_scores = -np.ones(
                (len(user_id_array), self.ITEM_factors.shape[0]),
                dtype=np.float32) * np.inf
            item_scores[:, items_to_compute] = np.dot(
                self.USER_factors[user_id_array],
                self.ITEM_factors[items_to_compute, :].T)

        else:
            item_scores = np.dot(self.USER_factors[user_id_array],
                                 self.ITEM_factors.T)

        # No need to select only the specific negative items or warm users because the -inf score will not change
        if self.use_bias:
            item_scores += self.ITEM_bias + self.GLOBAL_bias
            item_scores = (item_scores.T + self.USER_bias[user_id_array]).T

        return item_scores

    #########################################################################################################
    ##########                                                                                     ##########
    ##########                                LOAD AND SAVE                                        ##########
    ##########                                                                                     ##########
    #########################################################################################################

    def save_model(self, folder_path, file_name=None):

        if file_name is None:
            file_name = self.RECOMMENDER_NAME

        logger.info(f"Saving model in file '{folder_path + file_name}'")

        data_dict_to_save = {
            "USER_factors":
            self.USER_factors,
            "ITEM_factors":
            self.ITEM_factors,
            "use_bias":
            self.use_bias,
            "_cold_user_mask":
            self._cold_user_mask,
            "_cold_user_KNN_model_flag":
            self._cold_user_KNN_model_flag,
            "_cold_user_KNN_estimated_factors_flag":
            self._cold_user_KNN_estimated_factors_flag
        }

        if self.use_bias:
            data_dict_to_save["ITEM_bias"] = self.ITEM_bias
            data_dict_to_save["USER_bias"] = self.USER_bias
            data_dict_to_save["GLOBAL_bias"] = self.GLOBAL_bias

        if self._cold_user_KNN_model_flag:
            data_dict_to_save[
                "_ItemKNNRecommender_W_sparse"] = self._ItemKNNRecommender.W_sparse
            data_dict_to_save[
                "_ItemKNNRecommender_topK"] = self._ItemKNNRecommender_topK

        dataIO = DataIO(folder_path=folder_path)
        dataIO.save_data(file_name=file_name,
                         data_dict_to_save=data_dict_to_save)

        self._print("Saving complete")

    def load_model(self, folder_path, file_name=None):
        super(BaseMatrixFactorizationRecommender,
              self).load_model(folder_path, file_name=file_name)

        if self._cold_user_KNN_model_flag:
            self._ItemKNNRecommender = ItemKNNCustomSimilarityRecommender(
                self.URM_train)
            self._ItemKNNRecommender.fit(self._ItemKNNRecommender_W_sparse,
                                         topK=self._ItemKNNRecommender_topK)

            del self._ItemKNNRecommender_W_sparse
            del self._ItemKNNRecommender_topK