Пример #1
0
class PyramidItemTreeRecommender_offline(RecommenderSystem):
    RECOMMENDER_NAME = "PyramidItemTreeRecommender_offline"

    def __init__(self, URM_train, ICM):
        super(PyramidItemTreeRecommender_offline, self).__init__()
        self.URM_train = check_matrix(URM_train, "csr", dtype=np.float32)
        self.ICM = check_matrix(ICM, "csr", dtype=np.float32)
        self.parameters = None
        self.dataset = None
        self.normalize = False

    def __repr__(self):
        return "Pyramid Item Tree 4 Level Hybrid Offline Recommender"

    def fit(self,
            alpha=0.80849266253816,
            beta=0.7286503831547066,
            gamma=0.02895704968752022,
            sigma=0.453342,
            tau=0.542421,
            chi=1.8070865821028037,
            psi=4.256005405227253,
            omega=5.096018341419944,
            coeff=39.966898886531645,
            normalize=False,
            save_model=False,
            submission=False,
            best_parameters=False,
            offline=False,
            location="submission"):
        if offline:
            m = OfflineDataLoader()
            folder_path, file_name = m.get_model(self.RECOMMENDER_NAME)
            self.loadModel(folder_path=folder_path, file_name=file_name)
        else:
            if best_parameters:
                m = OfflineDataLoader()
                folder_path, file_name = m.get_parameter(self.RECOMMENDER_NAME)
                self.loadModel(folder_path=folder_path, file_name=file_name)
            else:
                self.alpha = alpha
                self.beta = beta
                self.gamma = gamma
                self.sigma = sigma
                self.tau = tau
                self.chi = chi
                self.psi = psi
                self.omega = omega
                self.coeff = coeff

            self.normalize = normalize
            self.submission = not submission
            m = OfflineDataLoader()
            self.m_user_knn_cf = UserKNNCFRecommender(self.URM_train)
            folder_path_ucf, file_name_ucf = m.get_model(
                UserKNNCFRecommender.RECOMMENDER_NAME,
                training=self.submission)
            self.m_user_knn_cf.loadModel(folder_path=folder_path_ucf,
                                         file_name=file_name_ucf)

            self.m_item_knn_cf = ItemKNNCFRecommender(self.URM_train)
            folder_path_icf, file_name_icf = m.get_model(
                ItemKNNCFRecommender.RECOMMENDER_NAME,
                training=self.submission)
            self.m_item_knn_cf.loadModel(folder_path=folder_path_icf,
                                         file_name=file_name_icf)

            self.m_item_knn_cbf = ItemKNNCBFRecommender(
                self.URM_train, self.ICM)
            folder_path_icf, file_name_icf = m.get_model(
                ItemKNNCBFRecommender.RECOMMENDER_NAME,
                training=self.submission)
            self.m_item_knn_cbf.loadModel(folder_path=folder_path_icf,
                                          file_name=file_name_icf)

            self.m_slim_mark1 = Slim_mark1(self.URM_train)
            folder_path_slim, file_name_slim = m.get_model(
                Slim_mark1.RECOMMENDER_NAME, training=self.submission)
            self.m_slim_mark1.loadModel(folder_path=folder_path_slim,
                                        file_name=file_name_slim)

            self.m_slim_mark2 = Slim_mark2(self.URM_train)
            folder_path_slim, file_name_slim = m.get_model(
                Slim_mark2.RECOMMENDER_NAME, training=self.submission)
            self.m_slim_mark2.loadModel(folder_path=folder_path_slim,
                                        file_name=file_name_slim)

            self.m_alpha = P3alphaRecommender(self.URM_train)
            folder_path_alpha, file_name_alpha = m.get_model(
                P3alphaRecommender.RECOMMENDER_NAME, training=self.submission)
            self.m_alpha.loadModel(folder_path=folder_path_alpha,
                                   file_name=file_name_alpha)

            self.m_beta = RP3betaRecommender(self.URM_train)
            folder_path_beta, file_name_beta = m.get_model(
                RP3betaRecommender.RECOMMENDER_NAME, training=self.submission)
            self.m_beta.loadModel(folder_path=folder_path_beta,
                                  file_name=file_name_beta)

            self.m_slim_elastic = SLIMElasticNetRecommender(self.URM_train)
            folder_path_elastic, file_name_elastic = m.get_model(
                SLIMElasticNetRecommender.RECOMMENDER_NAME,
                training=self.submission)
            self.m_slim_elastic.loadModel(folder_path=folder_path_elastic,
                                          file_name=file_name_elastic)

            self.W_sparse_URM = check_matrix(self.m_user_knn_cf.W_sparse,
                                             "csr",
                                             dtype=np.float32)
            #print(self.W_sparse_URM.getrow(0).data)
            self.W_sparse_URM_T = check_matrix(self.m_item_knn_cf.W_sparse,
                                               "csr",
                                               dtype=np.float32)
            #print(self.W_sparse_URM_T.getrow(0).data)
            self.W_sparse_ICM = check_matrix(self.m_item_knn_cbf.W_sparse,
                                             "csr",
                                             dtype=np.float32)
            #print(self.W_sparse_ICM.getrow(0).data)
            self.W_sparse_Slim1 = check_matrix(self.m_slim_mark1.W,
                                               "csr",
                                               dtype=np.float32)
            #print(self.W_sparse_Slim1.getrow(0).data)
            self.W_sparse_Slim2 = check_matrix(self.m_slim_mark2.W_sparse,
                                               "csr",
                                               dtype=np.float32)
            #print(self.W_sparse_Slim2.getrow(0).data)
            self.W_sparse_alpha = check_matrix(self.m_alpha.W_sparse,
                                               "csr",
                                               dtype=np.float32)
            #print(self.W_sparse_alpha.getrow(0).data)
            self.W_sparse_beta = check_matrix(self.m_beta.W_sparse,
                                              "csr",
                                              dtype=np.float32)
            #print(self.W_sparse_beta.getrow(0).data)
            self.W_sparse_elastic = check_matrix(self.m_slim_elastic.W_sparse,
                                                 "csr",
                                                 dtype=np.float32)
            #print(self.W_sparse_elastic.getrow(0).data)
            # Precomputations
            #TODO
            self.matrix_alpha_beta = self.alpha * self.W_sparse_alpha + (
                1 - self.alpha) * self.W_sparse_beta
            self.matrix_slim = self.beta * self.W_sparse_Slim2 + (
                (1 - self.beta) * self.W_sparse_elastic *
                self.coeff) + self.sigma * self.W_sparse_Slim1

            self.parameters = "alpha={}, beta={}, gamma={},sigma={}, tau={}, chi={}, psi={}, omega={}, coeff={}".format(
                self.alpha, self.beta, self.gamma, self.sigma, self.tau,
                self.chi, self.psi, self.omega, self.coeff)
        if save_model:
            self.saveModel("saved_models/" + location + "/",
                           file_name=self.RECOMMENDER_NAME)

    def recommend(self,
                  playlist_id_array,
                  cutoff=None,
                  remove_seen_flag=True,
                  remove_top_pop_flag=False,
                  remove_CustomItems_flag=False,
                  export=False):
        # If is a scalar transform it in a 1-cell array
        if np.isscalar(playlist_id_array):
            playlist_id_array = np.atleast_1d(playlist_id_array)
            single_user = True
        else:
            single_user = False
        if cutoff is None:
            cutoff = self.URM_train.shape[1] - 1

        scores_users = self.W_sparse_URM[playlist_id_array].dot(
            self.URM_train).toarray()
        scores_items_cf = self.URM_train[playlist_id_array].dot(
            self.W_sparse_URM_T).toarray()
        scores_items_cbf = self.URM_train[playlist_id_array].dot(
            self.W_sparse_ICM).toarray()
        scores_knn = self.gamma * scores_users + (
            1 - self.gamma) * scores_items_cf + self.tau * scores_items_cbf
        scores_ab = self.URM_train[playlist_id_array].dot(
            self.matrix_alpha_beta).toarray()
        scores_slim = self.URM_train[playlist_id_array].dot(
            self.matrix_slim).toarray()
        scores = self.chi * scores_knn + self.psi * scores_ab + self.omega * scores_slim

        if self.normalize:
            # normalization will keep the scores in the same range
            # of value of the ratings in dataset
            user_profile = self.URM_train[playlist_id_array]
            rated = user_profile.copy()
            rated.data = np.ones_like(rated.data)
            if self.sparse_weights:
                # print(rated.shape)
                # print(self.W_sparse.shape)
                den = rated.dot(self.W_sparse).toarray()
            else:
                den = rated.dot(self.W)
            den[np.abs(den) < 1e-6] = 1.0  # to avoid NaNs
            scores /= den

        for user_index in range(len(playlist_id_array)):
            user_id = playlist_id_array[user_index]
            if remove_seen_flag:
                scores[user_index, :] = self._remove_seen_on_scores(
                    user_id, scores[user_index, :])

        relevant_items_partition = (-scores).argpartition(cutoff,
                                                          axis=1)[:, 0:cutoff]
        # Get original value and sort it
        # [:, None] adds 1 dimension to the array, from (block_size,) to (block_size,1)
        # This is done to correctly get scores_batch value as [row, relevant_items_partition[row,:]]
        relevant_items_partition_original_value = scores[
            np.arange(scores.shape[0])[:, None], relevant_items_partition]
        relevant_items_partition_sorting = np.argsort(
            -relevant_items_partition_original_value, axis=1)
        ranking = relevant_items_partition[
            np.arange(relevant_items_partition.shape[0])[:, None],
            relevant_items_partition_sorting]

        ranking_list = ranking.tolist()

        # Return single list for one user, instead of list of lists
        if single_user:
            if not export:
                return ranking_list
            elif export:
                return str(ranking_list[0]).strip("[,]")

        if not export:
            return ranking_list
        elif export:
            return str(ranking_list).strip("[,]")

    def saveModel(self, folder_path, file_name=None):
        if file_name is None:
            file_name = self.RECOMMENDER_NAME
        print("{}: Saving model in file '{}'".format(self.RECOMMENDER_NAME,
                                                     folder_path + file_name))
        dictionary_to_save = {
            "W_sparse_URM": self.W_sparse_URM,
            "W_sparse_URM_T": self.W_sparse_URM_T,
            "W_sparse_ICM": self.W_sparse_ICM,
            "W_sparse_Slim1": self.W_sparse_Slim1,
            "W_sparse_Slim2": self.W_sparse_Slim2,
            "W_sparse_alpha": self.W_sparse_alpha,
            "W_sparse_beta": self.W_sparse_beta,
            "W_sparse_elastic": self.W_sparse_elastic,
            "matrix_slim": self.matrix_slim,
            "matrix_alpha_beta": self.matrix_alpha_beta,
            "alpha": self.alpha,
            "beta": self.beta,
            "gamma": self.gamma,
            "sigma": self.sigma,
            "tau": self.tau,
            "chi": self.chi,
            "psi": self.psi,
            "omega": self.omega,
            "coeff": self.coeff
        }

        pickle.dump(dictionary_to_save,
                    open(folder_path + file_name, "wb"),
                    protocol=pickle.HIGHEST_PROTOCOL)

        print("{}: Saving complete".format(self.RECOMMENDER_NAME))
Пример #2
0
    def fit(self,
            alpha=0.80849266253816,
            beta=0.7286503831547066,
            gamma=0.02895704968752022,
            sigma=0.453342,
            tau=0.542421,
            chi=1.8070865821028037,
            psi=4.256005405227253,
            omega=5.096018341419944,
            coeff=39.966898886531645,
            normalize=False,
            save_model=False,
            submission=False,
            best_parameters=False,
            offline=False,
            location="submission"):
        if offline:
            m = OfflineDataLoader()
            folder_path, file_name = m.get_model(self.RECOMMENDER_NAME)
            self.loadModel(folder_path=folder_path, file_name=file_name)
        else:
            if best_parameters:
                m = OfflineDataLoader()
                folder_path, file_name = m.get_parameter(self.RECOMMENDER_NAME)
                self.loadModel(folder_path=folder_path, file_name=file_name)
            else:
                self.alpha = alpha
                self.beta = beta
                self.gamma = gamma
                self.sigma = sigma
                self.tau = tau
                self.chi = chi
                self.psi = psi
                self.omega = omega
                self.coeff = coeff

            self.normalize = normalize
            self.submission = not submission
            m = OfflineDataLoader()
            self.m_user_knn_cf = UserKNNCFRecommender(self.URM_train)
            folder_path_ucf, file_name_ucf = m.get_model(
                UserKNNCFRecommender.RECOMMENDER_NAME,
                training=self.submission)
            self.m_user_knn_cf.loadModel(folder_path=folder_path_ucf,
                                         file_name=file_name_ucf)

            self.m_item_knn_cf = ItemKNNCFRecommender(self.URM_train)
            folder_path_icf, file_name_icf = m.get_model(
                ItemKNNCFRecommender.RECOMMENDER_NAME,
                training=self.submission)
            self.m_item_knn_cf.loadModel(folder_path=folder_path_icf,
                                         file_name=file_name_icf)

            self.m_item_knn_cbf = ItemKNNCBFRecommender(
                self.URM_train, self.ICM)
            folder_path_icf, file_name_icf = m.get_model(
                ItemKNNCBFRecommender.RECOMMENDER_NAME,
                training=self.submission)
            self.m_item_knn_cbf.loadModel(folder_path=folder_path_icf,
                                          file_name=file_name_icf)

            self.m_slim_mark1 = Slim_mark1(self.URM_train)
            folder_path_slim, file_name_slim = m.get_model(
                Slim_mark1.RECOMMENDER_NAME, training=self.submission)
            self.m_slim_mark1.loadModel(folder_path=folder_path_slim,
                                        file_name=file_name_slim)

            self.m_slim_mark2 = Slim_mark2(self.URM_train)
            folder_path_slim, file_name_slim = m.get_model(
                Slim_mark2.RECOMMENDER_NAME, training=self.submission)
            self.m_slim_mark2.loadModel(folder_path=folder_path_slim,
                                        file_name=file_name_slim)

            self.m_alpha = P3alphaRecommender(self.URM_train)
            folder_path_alpha, file_name_alpha = m.get_model(
                P3alphaRecommender.RECOMMENDER_NAME, training=self.submission)
            self.m_alpha.loadModel(folder_path=folder_path_alpha,
                                   file_name=file_name_alpha)

            self.m_beta = RP3betaRecommender(self.URM_train)
            folder_path_beta, file_name_beta = m.get_model(
                RP3betaRecommender.RECOMMENDER_NAME, training=self.submission)
            self.m_beta.loadModel(folder_path=folder_path_beta,
                                  file_name=file_name_beta)

            self.m_slim_elastic = SLIMElasticNetRecommender(self.URM_train)
            folder_path_elastic, file_name_elastic = m.get_model(
                SLIMElasticNetRecommender.RECOMMENDER_NAME,
                training=self.submission)
            self.m_slim_elastic.loadModel(folder_path=folder_path_elastic,
                                          file_name=file_name_elastic)

            self.W_sparse_URM = check_matrix(self.m_user_knn_cf.W_sparse,
                                             "csr",
                                             dtype=np.float32)
            #print(self.W_sparse_URM.getrow(0).data)
            self.W_sparse_URM_T = check_matrix(self.m_item_knn_cf.W_sparse,
                                               "csr",
                                               dtype=np.float32)
            #print(self.W_sparse_URM_T.getrow(0).data)
            self.W_sparse_ICM = check_matrix(self.m_item_knn_cbf.W_sparse,
                                             "csr",
                                             dtype=np.float32)
            #print(self.W_sparse_ICM.getrow(0).data)
            self.W_sparse_Slim1 = check_matrix(self.m_slim_mark1.W,
                                               "csr",
                                               dtype=np.float32)
            #print(self.W_sparse_Slim1.getrow(0).data)
            self.W_sparse_Slim2 = check_matrix(self.m_slim_mark2.W_sparse,
                                               "csr",
                                               dtype=np.float32)
            #print(self.W_sparse_Slim2.getrow(0).data)
            self.W_sparse_alpha = check_matrix(self.m_alpha.W_sparse,
                                               "csr",
                                               dtype=np.float32)
            #print(self.W_sparse_alpha.getrow(0).data)
            self.W_sparse_beta = check_matrix(self.m_beta.W_sparse,
                                              "csr",
                                              dtype=np.float32)
            #print(self.W_sparse_beta.getrow(0).data)
            self.W_sparse_elastic = check_matrix(self.m_slim_elastic.W_sparse,
                                                 "csr",
                                                 dtype=np.float32)
            #print(self.W_sparse_elastic.getrow(0).data)
            # Precomputations
            #TODO
            self.matrix_alpha_beta = self.alpha * self.W_sparse_alpha + (
                1 - self.alpha) * self.W_sparse_beta
            self.matrix_slim = self.beta * self.W_sparse_Slim2 + (
                (1 - self.beta) * self.W_sparse_elastic *
                self.coeff) + self.sigma * self.W_sparse_Slim1

            self.parameters = "alpha={}, beta={}, gamma={},sigma={}, tau={}, chi={}, psi={}, omega={}, coeff={}".format(
                self.alpha, self.beta, self.gamma, self.sigma, self.tau,
                self.chi, self.psi, self.omega, self.coeff)
        if save_model:
            self.saveModel("saved_models/" + location + "/",
                           file_name=self.RECOMMENDER_NAME)
def printOutMapValues(modelList, URM, ICM, modelsSoFar):
    map_dict = {i: dict() for i in modelsSoFar}
    m = OfflineDataLoader()
    for model in modelList:
        folder = str("/".join(model[1].split("/")[:-1]) + "/")
        file = model[1].split("/")[-1]
        if model[0] == "UserKNNCFRecommender":
            mod = UserKNNCFRecommender(URM)
            mod.loadModel(folder_path=folder, file_name=file, verbose=False)
            map_dict[model[0]][model[2]] = mod.MAP
        # print(model[0], model[2], mod.MAP)
        elif model[0] == "ItemKNNCFRecommender":
            mod = ItemKNNCFRecommender(URM)
            mod.loadModel(folder_path=folder, file_name=file, verbose=False)
            map_dict[model[0]][model[2]] = mod.MAP
        # print(model[0], model[2], mod.MAP)
        elif model[0] == "ItemKNNCBFRecommender":
            mod = ItemKNNCBFRecommender(URM, ICM)
            mod.loadModel(folder_path=folder, file_name=file, verbose=False)
            map_dict[model[0]][model[2]] = mod.MAP
        # print(model[0], model[2], mod.MAP)
        elif model[0] == "SLIM_BPR_Recommender_mark1":
            mod = Slim_mark1(URM)
            mod.loadModel(folder_path=folder, file_name=file, verbose=False)
            map_dict[model[0]][model[2]] = mod.MAP
        # print(model[0], model[2], mod.MAP)
        elif model[0] == "RP3_Beta_Recommender":
            mod = RP3betaRecommender(URM)
            mod.loadModel(folder_path=folder, file_name=file, verbose=False)
            map_dict[model[0]][model[2]] = mod.MAP
        # print(model[0], model[2], mod.MAP)
        elif model[0] == "P3_Alpha_Recommender":
            mod = P3alphaRecommender(URM)
            mod.loadModel(folder_path=folder, file_name=file, verbose=False)
            map_dict[model[0]][model[2]] = mod.MAP
        # print(model[0], model[2], mod.MAP)
        elif model[0] == "PureSVD":
            mod = PureSVDRecommender(URM)
            mod.loadModel(folder_path=folder, file_name=file, verbose=False)
            map_dict[model[0]][model[2]] = mod.MAP
        # print(model[0], model[2], mod.MAP)
        elif model[0] == "Slim_Elastic_Net_Recommender":
            mod = SLIMElasticNetRecommender(URM)
            mod.loadModel(folder_path=folder, file_name=file, verbose=False)
            map_dict[model[0]][model[2]] = mod.MAP
            #print(model[0], model[2], mod.MAP)
        elif model[0] == "SLIM_BPR_Recommender_mark2":
            mod = Slim_mark2(URM)
            mod.loadModel(folder_path=folder, file_name=file, verbose=False)
            map_dict[model[0]][model[2]] = mod.MAP
            #print(model[0], model[2], mod.MAP)
        # elif model[0] == "ItemTreeRecommender_offline":
        #     mod = ItemTreeRecommender_offline(URM,ICM)
        #     mod.loadModel(folder_path=folder, file_name=file, verbose=False)
        #     map_dict[model[0]][model[2]] = mod.MAP
        #print(model[0], model[2], mod.MAP)
        # elif model[0] == "PartyRecommender_offline":
        #     mod = PartyRecommender_offline(URM)
        #     mod.loadModel(folder_path=folder, file_name=file, verbose=False)
        #     map_dict[model[0]][model[2]] = mod.MAP
        #     #print(model[0], model[2], mod.MAP)
        elif model[0] == "SingleNeuronRecommender_offline":
            mod = SingleNeuronRecommender_offline(URM, ICM)
            mod.loadModel(folder_path=folder, file_name=file, verbose=False)
            map_dict[model[0]][model[2]] = mod.MAP
            #print(model[0], model[2], mod.MAP)

    return map_dict
Пример #4
0
def read_data_split_and_search():
    """
    This function provides a simple example on how to tune parameters of a given algorithm

    The BayesianSearch object will save:
        - A .txt file with all the cases explored and the recommendation quality
        - A _best_model file which contains the trained model and can be loaded with recommender.load_model()
        - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter)
        - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation
        - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set
    """
    clear()
    dataReader = PlaylistDataReader()
    dataReader.generate_datasets()
    URM_train = dataReader.get_URM_train()
    # URM_validation = dataReader.get_URM_validation()
    URM_test = dataReader.get_URM_test()
    ICM = dataReader.get_ICM()
    output_root_path = "tuned_parameters"
    m = OfflineDataLoader()
    fold, fil = m.get_model(ItemKNNCFRecommender.RECOMMENDER_NAME,
                            training=True)
    m1 = ItemKNNCFRecommender(URM_train, ICM)
    m1.loadModel(folder_path=fold, file_name=fil)
    W_sparse_CF = m1.W_sparse

    # If directory does not exist, create
    if not os.path.exists(output_root_path):
        os.makedirs(output_root_path)

    collaborative_algorithm_list = [
        #P3alphaRecommender,
        #RP3betaRecommender,
        #ItemKNNCFRecommender,
        #UserKNNCFRecommender,
        # MatrixFactorization_BPR_Cython,
        # MatrixFactorization_FunkSVD_Cython,
        # PureSVDRecommender,
        # Slim_mark1,
        # Slim_mark2,
        # ItemTreeRecommender_offline
        # SLIMElasticNetRecommender,
        # PartyRecommender_offline
        # PyramidRecommender_offline
        #  ItemKNNCBFRecommender
        # PyramidItemTreeRecommender_offline
        #HybridEightRecommender_offline
        #ComboRecommender_offline
        SingleNeuronRecommender_offline
        # CFWBoostingRecommender
    ]

    from parameter_tuning.AbstractClassSearch import EvaluatorWrapper
    from base.evaluation.Evaluator import SequentialEvaluator

    evaluator_validation_earlystopping = SequentialEvaluator(URM_test,
                                                             cutoff_list=[10])
    evaluator_test = SequentialEvaluator(URM_test, cutoff_list=[10])

    evaluator_validation = EvaluatorWrapper(evaluator_validation_earlystopping)
    evaluator_test = EvaluatorWrapper(evaluator_test)

    runParameterSearch_Collaborative_partial = partial(
        runParameterSearch_Collaborative,
        URM_train=URM_train,
        ICM=ICM,
        W_sparse_CF=W_sparse_CF,
        metric_to_optimize="MAP",
        evaluator_validation_earlystopping=evaluator_validation_earlystopping,
        evaluator_validation=evaluator_validation,
        evaluator_test=evaluator_test,
        n_cases=250,
        output_root_path=output_root_path)

    for recommender_class in collaborative_algorithm_list:
        try:
            runParameterSearch_Collaborative_partial(recommender_class)
        except Exception as e:
            print("On recommender {} Exception {}".format(
                recommender_class, str(e)))
            traceback.print_exc()
Пример #5
0
class SingleNeuronRecommender_offline(RecommenderSystem):
    RECOMMENDER_NAME = "SingleNeuronRecommender_offline"

    def __init__(self, URM_train, ICM):
        super(SingleNeuronRecommender_offline, self).__init__()
        self.URM_train = check_matrix(URM_train, "csr", dtype=np.float32)
        self.ICM = check_matrix(ICM, "csr", dtype=np.float32)
        self.parameters = None
        self.dataset = None
        self.normalize = False

    def __repr__(self):
        return "Single Neuron Hybrid Offline Recommender"

    def fit(
            self,
            alpha=1.3167219260598073,
            beta=15.939928536132701,
            gamma=0.6048873602128846,
            delta=1.0527588765188267,
            epsilon=2.08444591782293,
            zeta=1.2588273098979674,
            eta=18.41012777389885,
            theta=18.000293943452448,
            #    psi = 0.00130805010990942,
            normalize=False,
            save_model=False,
            submission=False,
            best_parameters=False,
            offline=False,
            location="submission"):
        if offline:
            m = OfflineDataLoader()
            folder_path, file_name = m.get_model(self.RECOMMENDER_NAME)
            self.loadModel(folder_path=folder_path, file_name=file_name)
        else:
            if best_parameters:
                m = OfflineDataLoader()
                folder_path, file_name = m.get_parameter(self.RECOMMENDER_NAME)
                self.loadModel(folder_path=folder_path, file_name=file_name)
            else:
                self.alpha = alpha
                self.beta = beta
                self.gamma = gamma
                self.delta = delta
                self.epsilon = epsilon
                self.zeta = zeta
                self.eta = eta
                self.theta = theta
        #       self.psi = psi

            self.normalize = normalize
            self.submission = not submission
            m = OfflineDataLoader()
            self.m_user_knn_cf = UserKNNCFRecommender(self.URM_train)
            folder_path_ucf, file_name_ucf = m.get_model(
                UserKNNCFRecommender.RECOMMENDER_NAME,
                training=self.submission)
            self.m_user_knn_cf.loadModel(folder_path=folder_path_ucf,
                                         file_name=file_name_ucf)

            self.m_item_knn_cf = ItemKNNCFRecommender(self.URM_train)
            folder_path_icf, file_name_icf = m.get_model(
                ItemKNNCFRecommender.RECOMMENDER_NAME,
                training=self.submission)
            self.m_item_knn_cf.loadModel(folder_path=folder_path_icf,
                                         file_name=file_name_icf)

            self.m_item_knn_cbf = ItemKNNCBFRecommender(
                self.URM_train, self.ICM)
            folder_path_icf, file_name_icf = m.get_model(
                ItemKNNCBFRecommender.RECOMMENDER_NAME,
                training=self.submission)
            self.m_item_knn_cbf.loadModel(folder_path=folder_path_icf,
                                          file_name=file_name_icf)

            self.m_slim_mark1 = Slim_mark1(self.URM_train)
            folder_path_slim, file_name_slim = m.get_model(
                Slim_mark1.RECOMMENDER_NAME, training=self.submission)
            self.m_slim_mark1.loadModel(folder_path=folder_path_slim,
                                        file_name=file_name_slim)

            self.m_slim_mark2 = Slim_mark2(self.URM_train)
            folder_path_slim, file_name_slim = m.get_model(
                Slim_mark2.RECOMMENDER_NAME, training=self.submission)
            self.m_slim_mark2.loadModel(folder_path=folder_path_slim,
                                        file_name=file_name_slim)

            self.m_alpha = P3alphaRecommender(self.URM_train)
            folder_path_alpha, file_name_alpha = m.get_model(
                P3alphaRecommender.RECOMMENDER_NAME, training=self.submission)
            self.m_alpha.loadModel(folder_path=folder_path_alpha,
                                   file_name=file_name_alpha)

            self.m_beta = RP3betaRecommender(self.URM_train)
            folder_path_beta, file_name_beta = m.get_model(
                RP3betaRecommender.RECOMMENDER_NAME, training=self.submission)
            self.m_beta.loadModel(folder_path=folder_path_beta,
                                  file_name=file_name_beta)

            self.m_slim_elastic = SLIMElasticNetRecommender(self.URM_train)
            folder_path_elastic, file_name_elastic = m.get_model(
                SLIMElasticNetRecommender.RECOMMENDER_NAME,
                training=self.submission)
            self.m_slim_elastic.loadModel(folder_path=folder_path_elastic,
                                          file_name=file_name_elastic)

            # self.m_cfw = CFWBoostingRecommender(self.URM_train,self.ICM,Slim_mark2,training=self.submission)
            # fold, file = m.get_model(CFWBoostingRecommender.RECOMMENDER_NAME,training= self.submission)
            # self.m_cfw.loadModel(folder_path=fold,file_name=file)

            self.W_sparse_URM = check_matrix(self.m_user_knn_cf.W_sparse,
                                             "csr",
                                             dtype=np.float32)
            #print(self.W_sparse_URM.getrow(0).data)
            self.W_sparse_URM_T = check_matrix(self.m_item_knn_cf.W_sparse,
                                               "csr",
                                               dtype=np.float32)
            #print(self.W_sparse_URM_T.getrow(0).data)
            self.W_sparse_ICM = check_matrix(self.m_item_knn_cbf.W_sparse,
                                             "csr",
                                             dtype=np.float32)
            #print(self.W_sparse_ICM.getrow(0).data)
            self.W_sparse_Slim1 = check_matrix(self.m_slim_mark1.W,
                                               "csr",
                                               dtype=np.float32)
            #print(self.W_sparse_Slim1.getrow(0).data)
            self.W_sparse_Slim2 = check_matrix(self.m_slim_mark2.W_sparse,
                                               "csr",
                                               dtype=np.float32)
            #print(self.W_sparse_Slim2.getrow(0).data)
            self.W_sparse_alpha = check_matrix(self.m_alpha.W_sparse,
                                               "csr",
                                               dtype=np.float32)
            #print(self.W_sparse_alpha.getrow(0).data)
            self.W_sparse_beta = check_matrix(self.m_beta.W_sparse,
                                              "csr",
                                              dtype=np.float32)
            #print(self.W_sparse_beta.getrow(0).data)
            self.W_sparse_elastic = check_matrix(self.m_slim_elastic.W_sparse,
                                                 "csr",
                                                 dtype=np.float32)
            #print(self.W_sparse_elastic.getrow(0).data)
            #self.W_sparse_cfw = check_matrix(self.m_cfw.W_sparse,"csr",dtype=np.float32)
            # Precomputations
            self.matrix_wo_user = self.alpha * self.W_sparse_URM_T +\
                                  self.beta * self.W_sparse_ICM +\
                                  self.gamma * self.W_sparse_Slim1 +\
                                  self.delta * self.W_sparse_Slim2 +\
                                  self.epsilon * self.W_sparse_alpha +\
                                  self.zeta * self.W_sparse_beta + \
                                  self.eta * self.W_sparse_elastic #+ \
            #self.psi * self.W_sparse_cfw

            self.parameters = "alpha={}, beta={}, gamma={},delta={}, epsilon={}, zeta={}, eta={}, theta={}".format(
                self.alpha, self.beta, self.gamma, self.delta, self.epsilon,
                self.zeta, self.eta, self.theta)
        if save_model:
            self.saveModel("saved_models/" + location + "/",
                           file_name=self.RECOMMENDER_NAME)

    def recommend(self,
                  playlist_id_array,
                  cutoff=None,
                  remove_seen_flag=True,
                  remove_top_pop_flag=False,
                  remove_CustomItems_flag=False,
                  export=False):
        # If is a scalar transform it in a 1-cell array
        if np.isscalar(playlist_id_array):
            playlist_id_array = np.atleast_1d(playlist_id_array)
            single_user = True
        else:
            single_user = False
        if cutoff is None:
            cutoff = self.URM_train.shape[1] - 1

        scores_users = self.W_sparse_URM[playlist_id_array].dot(
            self.URM_train).toarray()
        scores_wo_user = self.URM_train[playlist_id_array].dot(
            self.matrix_wo_user).toarray()
        scores = self.theta * scores_users + scores_wo_user

        if self.normalize:
            # normalization will keep the scores in the same range
            # of value of the ratings in dataset
            user_profile = self.URM_train[playlist_id_array]
            rated = user_profile.copy()
            rated.data = np.ones_like(rated.data)
            if self.sparse_weights:
                # print(rated.shape)
                # print(self.W_sparse.shape)
                den = rated.dot(self.W_sparse).toarray()
            else:
                den = rated.dot(self.W)
            den[np.abs(den) < 1e-6] = 1.0  # to avoid NaNs
            scores /= den

        for user_index in range(len(playlist_id_array)):
            user_id = playlist_id_array[user_index]
            if remove_seen_flag:
                scores[user_index, :] = self._remove_seen_on_scores(
                    user_id, scores[user_index, :])

        relevant_items_partition = (-scores).argpartition(cutoff,
                                                          axis=1)[:, 0:cutoff]
        # Get original value and sort it
        # [:, None] adds 1 dimension to the array, from (block_size,) to (block_size,1)
        # This is done to correctly get scores_batch value as [row, relevant_items_partition[row,:]]
        relevant_items_partition_original_value = scores[
            np.arange(scores.shape[0])[:, None], relevant_items_partition]
        relevant_items_partition_sorting = np.argsort(
            -relevant_items_partition_original_value, axis=1)
        ranking = relevant_items_partition[
            np.arange(relevant_items_partition.shape[0])[:, None],
            relevant_items_partition_sorting]

        ranking_list = ranking.tolist()

        # Return single list for one user, instead of list of lists
        if single_user:
            if not export:
                return ranking_list
            elif export:
                return str(ranking_list[0]).strip("[,]")

        if not export:
            return ranking_list
        elif export:
            return str(ranking_list).strip("[,]")

    def saveModel(self, folder_path, file_name=None):
        if file_name is None:
            file_name = self.RECOMMENDER_NAME
        print("{}: Saving model in file '{}'".format(self.RECOMMENDER_NAME,
                                                     folder_path + file_name))
        dictionary_to_save = {
            "W_sparse_URM": self.W_sparse_URM,
            "W_sparse_URM_T": self.W_sparse_URM_T,
            "W_sparse_ICM": self.W_sparse_ICM,
            "W_sparse_Slim1": self.W_sparse_Slim1,
            "W_sparse_Slim2": self.W_sparse_Slim2,
            "W_sparse_alpha": self.W_sparse_alpha,
            "W_sparse_beta": self.W_sparse_beta,
            "W_sparse_elastic": self.W_sparse_elastic,
            #  "W_sparse_cfw" : self.W_sparse_cfw,
            "matrix_wo_user": self.matrix_wo_user,
            "alpha": self.alpha,
            "beta": self.beta,
            "gamma": self.gamma,
            "delta": self.delta,
            "epsilon": self.epsilon,
            "zeta": self.zeta,
            "eta": self.eta,
            "theta": self.theta
        }
        #   "psi": self.psi}

        pickle.dump(dictionary_to_save,
                    open(folder_path + file_name, "wb"),
                    protocol=pickle.HIGHEST_PROTOCOL)

        print("{}: Saving complete".format(self.RECOMMENDER_NAME))
Пример #6
0
    def fit(
            self,
            alpha=1.3167219260598073,
            beta=15.939928536132701,
            gamma=0.6048873602128846,
            delta=1.0527588765188267,
            epsilon=2.08444591782293,
            zeta=1.2588273098979674,
            eta=18.41012777389885,
            theta=18.000293943452448,
            #    psi = 0.00130805010990942,
            normalize=False,
            save_model=False,
            submission=False,
            best_parameters=False,
            offline=False,
            location="submission"):
        if offline:
            m = OfflineDataLoader()
            folder_path, file_name = m.get_model(self.RECOMMENDER_NAME)
            self.loadModel(folder_path=folder_path, file_name=file_name)
        else:
            if best_parameters:
                m = OfflineDataLoader()
                folder_path, file_name = m.get_parameter(self.RECOMMENDER_NAME)
                self.loadModel(folder_path=folder_path, file_name=file_name)
            else:
                self.alpha = alpha
                self.beta = beta
                self.gamma = gamma
                self.delta = delta
                self.epsilon = epsilon
                self.zeta = zeta
                self.eta = eta
                self.theta = theta
        #       self.psi = psi

            self.normalize = normalize
            self.submission = not submission
            m = OfflineDataLoader()
            self.m_user_knn_cf = UserKNNCFRecommender(self.URM_train)
            folder_path_ucf, file_name_ucf = m.get_model(
                UserKNNCFRecommender.RECOMMENDER_NAME,
                training=self.submission)
            self.m_user_knn_cf.loadModel(folder_path=folder_path_ucf,
                                         file_name=file_name_ucf)

            self.m_item_knn_cf = ItemKNNCFRecommender(self.URM_train)
            folder_path_icf, file_name_icf = m.get_model(
                ItemKNNCFRecommender.RECOMMENDER_NAME,
                training=self.submission)
            self.m_item_knn_cf.loadModel(folder_path=folder_path_icf,
                                         file_name=file_name_icf)

            self.m_item_knn_cbf = ItemKNNCBFRecommender(
                self.URM_train, self.ICM)
            folder_path_icf, file_name_icf = m.get_model(
                ItemKNNCBFRecommender.RECOMMENDER_NAME,
                training=self.submission)
            self.m_item_knn_cbf.loadModel(folder_path=folder_path_icf,
                                          file_name=file_name_icf)

            self.m_slim_mark1 = Slim_mark1(self.URM_train)
            folder_path_slim, file_name_slim = m.get_model(
                Slim_mark1.RECOMMENDER_NAME, training=self.submission)
            self.m_slim_mark1.loadModel(folder_path=folder_path_slim,
                                        file_name=file_name_slim)

            self.m_slim_mark2 = Slim_mark2(self.URM_train)
            folder_path_slim, file_name_slim = m.get_model(
                Slim_mark2.RECOMMENDER_NAME, training=self.submission)
            self.m_slim_mark2.loadModel(folder_path=folder_path_slim,
                                        file_name=file_name_slim)

            self.m_alpha = P3alphaRecommender(self.URM_train)
            folder_path_alpha, file_name_alpha = m.get_model(
                P3alphaRecommender.RECOMMENDER_NAME, training=self.submission)
            self.m_alpha.loadModel(folder_path=folder_path_alpha,
                                   file_name=file_name_alpha)

            self.m_beta = RP3betaRecommender(self.URM_train)
            folder_path_beta, file_name_beta = m.get_model(
                RP3betaRecommender.RECOMMENDER_NAME, training=self.submission)
            self.m_beta.loadModel(folder_path=folder_path_beta,
                                  file_name=file_name_beta)

            self.m_slim_elastic = SLIMElasticNetRecommender(self.URM_train)
            folder_path_elastic, file_name_elastic = m.get_model(
                SLIMElasticNetRecommender.RECOMMENDER_NAME,
                training=self.submission)
            self.m_slim_elastic.loadModel(folder_path=folder_path_elastic,
                                          file_name=file_name_elastic)

            # self.m_cfw = CFWBoostingRecommender(self.URM_train,self.ICM,Slim_mark2,training=self.submission)
            # fold, file = m.get_model(CFWBoostingRecommender.RECOMMENDER_NAME,training= self.submission)
            # self.m_cfw.loadModel(folder_path=fold,file_name=file)

            self.W_sparse_URM = check_matrix(self.m_user_knn_cf.W_sparse,
                                             "csr",
                                             dtype=np.float32)
            #print(self.W_sparse_URM.getrow(0).data)
            self.W_sparse_URM_T = check_matrix(self.m_item_knn_cf.W_sparse,
                                               "csr",
                                               dtype=np.float32)
            #print(self.W_sparse_URM_T.getrow(0).data)
            self.W_sparse_ICM = check_matrix(self.m_item_knn_cbf.W_sparse,
                                             "csr",
                                             dtype=np.float32)
            #print(self.W_sparse_ICM.getrow(0).data)
            self.W_sparse_Slim1 = check_matrix(self.m_slim_mark1.W,
                                               "csr",
                                               dtype=np.float32)
            #print(self.W_sparse_Slim1.getrow(0).data)
            self.W_sparse_Slim2 = check_matrix(self.m_slim_mark2.W_sparse,
                                               "csr",
                                               dtype=np.float32)
            #print(self.W_sparse_Slim2.getrow(0).data)
            self.W_sparse_alpha = check_matrix(self.m_alpha.W_sparse,
                                               "csr",
                                               dtype=np.float32)
            #print(self.W_sparse_alpha.getrow(0).data)
            self.W_sparse_beta = check_matrix(self.m_beta.W_sparse,
                                              "csr",
                                              dtype=np.float32)
            #print(self.W_sparse_beta.getrow(0).data)
            self.W_sparse_elastic = check_matrix(self.m_slim_elastic.W_sparse,
                                                 "csr",
                                                 dtype=np.float32)
            #print(self.W_sparse_elastic.getrow(0).data)
            #self.W_sparse_cfw = check_matrix(self.m_cfw.W_sparse,"csr",dtype=np.float32)
            # Precomputations
            self.matrix_wo_user = self.alpha * self.W_sparse_URM_T +\
                                  self.beta * self.W_sparse_ICM +\
                                  self.gamma * self.W_sparse_Slim1 +\
                                  self.delta * self.W_sparse_Slim2 +\
                                  self.epsilon * self.W_sparse_alpha +\
                                  self.zeta * self.W_sparse_beta + \
                                  self.eta * self.W_sparse_elastic #+ \
            #self.psi * self.W_sparse_cfw

            self.parameters = "alpha={}, beta={}, gamma={},delta={}, epsilon={}, zeta={}, eta={}, theta={}".format(
                self.alpha, self.beta, self.gamma, self.delta, self.epsilon,
                self.zeta, self.eta, self.theta)
        if save_model:
            self.saveModel("saved_models/" + location + "/",
                           file_name=self.RECOMMENDER_NAME)
Пример #7
0
    def fit(self,
            alpha=0.0500226666668111,
            beta=0.9996482062853596,
            gamma=0.36595766622100967,
            theta=0.22879224932897924,
            omega=0.5940982982110466,
            normalize=False,
            save_model=False,
            submission=False,
            best_parameters=False):
        if best_parameters:
            m = OfflineDataLoader()
            folder_path, file_name = m.get_parameter(self.RECOMMENDER_NAME)
            self.loadModel(folder_path=folder_path, file_name=file_name)
        else:
            self.alpha = alpha
            self.beta = beta
            self.gamma = gamma
            self.theta = theta
            self.omega = omega
        self.normalize = normalize
        self.submission = not submission
        m = OfflineDataLoader()
        self.m_user_knn_cf = UserKNNCFRecommender(self.URM_train)
        folder_path_ucf, file_name_ucf = m.get_model(
            UserKNNCFRecommender.RECOMMENDER_NAME, training=self.submission)
        self.m_user_knn_cf.loadModel(folder_path=folder_path_ucf,
                                     file_name=file_name_ucf)

        self.m_item_knn_cf = ItemKNNCFRecommender(self.URM_train)
        folder_path_icf, file_name_icf = m.get_model(
            ItemKNNCFRecommender.RECOMMENDER_NAME, training=self.submission)
        self.m_item_knn_cf.loadModel(folder_path=folder_path_icf,
                                     file_name=file_name_icf)

        self.m_item_knn_cbf = ItemKNNCBFRecommender(self.URM_train, self.ICM)
        folder_path_icbf, file_name_icbf = m.get_model(
            ItemKNNCBFRecommender.RECOMMENDER_NAME, training=self.submission)
        self.m_item_knn_cbf.loadModel(folder_path=folder_path_icbf,
                                      file_name=file_name_icbf)

        self.m_slim_mark1 = Slim_mark1(self.URM_train)
        folder_path_slim, file_name_slim = m.get_model(
            Slim_mark1.RECOMMENDER_NAME, training=self.submission)
        self.m_slim_mark1.loadModel(folder_path=folder_path_slim,
                                    file_name=file_name_slim)

        self.m_alpha = P3alphaRecommender(self.URM_train)
        folder_path_alpha, file_name_alpha = m.get_model(
            P3alphaRecommender.RECOMMENDER_NAME, training=self.submission)
        self.m_alpha.loadModel(folder_path=folder_path_alpha,
                               file_name=file_name_alpha)

        self.m_beta = RP3betaRecommender(self.URM_train)
        folder_path_beta, file_name_beta = m.get_model(
            RP3betaRecommender.RECOMMENDER_NAME, training=self.submission)
        self.m_beta.loadModel(folder_path=folder_path_beta,
                              file_name=file_name_beta)

        self.W_sparse_URM = check_matrix(self.m_user_knn_cf.W_sparse,
                                         "csr",
                                         dtype=np.float32)
        self.W_sparse_ICM = check_matrix(self.m_item_knn_cbf.W_sparse,
                                         "csr",
                                         dtype=np.float32)
        self.W_sparse_URM_T = check_matrix(self.m_item_knn_cf.W_sparse,
                                           "csr",
                                           dtype=np.float32)
        self.W_sparse_Slim = check_matrix(self.m_slim_mark1.W,
                                          "csr",
                                          dtype=np.float32)
        self.W_sparse_alpha = check_matrix(self.m_alpha.W_sparse,
                                           "csr",
                                           dtype=np.float32)
        self.W_sparse_beta = check_matrix(self.m_beta.W_sparse,
                                          "csr",
                                          dtype=np.float32)
        # Precomputations
        self.matrix_first_branch = self.alpha * self.W_sparse_ICM + (
            1 - self.alpha) * self.W_sparse_Slim
        self.matrix_right = self.beta * self.matrix_first_branch + (
            1 - self.beta) * self.W_sparse_URM_T
        self.matrix_alpha_beta = self.gamma * self.W_sparse_alpha + (
            1 - self.gamma) * self.W_sparse_beta

        self.parameters = "alpha={}, beta={}, gamma={}, omega={}, theta={}".format(
            self.alpha, self.beta, self.gamma, self.omega, self.theta)
        if save_model:
            self.saveModel("saved_models/submission/",
                           file_name="ItemTreeRecommender_offline")
Пример #8
0
class ItemTreeRecommender_offline(RecommenderSystem):
    RECOMMENDER_NAME = "ItemTreeRecommender_offline"

    def __init__(self, URM_train, ICM):
        super(ItemTreeRecommender_offline, self).__init__()
        self.URM_train = check_matrix(URM_train, "csr", dtype=np.float32)
        self.ICM = check_matrix(ICM, "csr")
        self.parameters = None
        self.dataset = None
        self.normalize = False

    def __repr__(self):
        return "Item Tree Hybrid Offline Recommender"

    #0.48932802125541863 #0.33816203568945447 # 0.7341780576036934
    def fit(self,
            alpha=0.0500226666668111,
            beta=0.9996482062853596,
            gamma=0.36595766622100967,
            theta=0.22879224932897924,
            omega=0.5940982982110466,
            normalize=False,
            save_model=False,
            submission=False,
            best_parameters=False):
        if best_parameters:
            m = OfflineDataLoader()
            folder_path, file_name = m.get_parameter(self.RECOMMENDER_NAME)
            self.loadModel(folder_path=folder_path, file_name=file_name)
        else:
            self.alpha = alpha
            self.beta = beta
            self.gamma = gamma
            self.theta = theta
            self.omega = omega
        self.normalize = normalize
        self.submission = not submission
        m = OfflineDataLoader()
        self.m_user_knn_cf = UserKNNCFRecommender(self.URM_train)
        folder_path_ucf, file_name_ucf = m.get_model(
            UserKNNCFRecommender.RECOMMENDER_NAME, training=self.submission)
        self.m_user_knn_cf.loadModel(folder_path=folder_path_ucf,
                                     file_name=file_name_ucf)

        self.m_item_knn_cf = ItemKNNCFRecommender(self.URM_train)
        folder_path_icf, file_name_icf = m.get_model(
            ItemKNNCFRecommender.RECOMMENDER_NAME, training=self.submission)
        self.m_item_knn_cf.loadModel(folder_path=folder_path_icf,
                                     file_name=file_name_icf)

        self.m_item_knn_cbf = ItemKNNCBFRecommender(self.URM_train, self.ICM)
        folder_path_icbf, file_name_icbf = m.get_model(
            ItemKNNCBFRecommender.RECOMMENDER_NAME, training=self.submission)
        self.m_item_knn_cbf.loadModel(folder_path=folder_path_icbf,
                                      file_name=file_name_icbf)

        self.m_slim_mark1 = Slim_mark1(self.URM_train)
        folder_path_slim, file_name_slim = m.get_model(
            Slim_mark1.RECOMMENDER_NAME, training=self.submission)
        self.m_slim_mark1.loadModel(folder_path=folder_path_slim,
                                    file_name=file_name_slim)

        self.m_alpha = P3alphaRecommender(self.URM_train)
        folder_path_alpha, file_name_alpha = m.get_model(
            P3alphaRecommender.RECOMMENDER_NAME, training=self.submission)
        self.m_alpha.loadModel(folder_path=folder_path_alpha,
                               file_name=file_name_alpha)

        self.m_beta = RP3betaRecommender(self.URM_train)
        folder_path_beta, file_name_beta = m.get_model(
            RP3betaRecommender.RECOMMENDER_NAME, training=self.submission)
        self.m_beta.loadModel(folder_path=folder_path_beta,
                              file_name=file_name_beta)

        self.W_sparse_URM = check_matrix(self.m_user_knn_cf.W_sparse,
                                         "csr",
                                         dtype=np.float32)
        self.W_sparse_ICM = check_matrix(self.m_item_knn_cbf.W_sparse,
                                         "csr",
                                         dtype=np.float32)
        self.W_sparse_URM_T = check_matrix(self.m_item_knn_cf.W_sparse,
                                           "csr",
                                           dtype=np.float32)
        self.W_sparse_Slim = check_matrix(self.m_slim_mark1.W,
                                          "csr",
                                          dtype=np.float32)
        self.W_sparse_alpha = check_matrix(self.m_alpha.W_sparse,
                                           "csr",
                                           dtype=np.float32)
        self.W_sparse_beta = check_matrix(self.m_beta.W_sparse,
                                          "csr",
                                          dtype=np.float32)
        # Precomputations
        self.matrix_first_branch = self.alpha * self.W_sparse_ICM + (
            1 - self.alpha) * self.W_sparse_Slim
        self.matrix_right = self.beta * self.matrix_first_branch + (
            1 - self.beta) * self.W_sparse_URM_T
        self.matrix_alpha_beta = self.gamma * self.W_sparse_alpha + (
            1 - self.gamma) * self.W_sparse_beta

        self.parameters = "alpha={}, beta={}, gamma={}, omega={}, theta={}".format(
            self.alpha, self.beta, self.gamma, self.omega, self.theta)
        if save_model:
            self.saveModel("saved_models/submission/",
                           file_name="ItemTreeRecommender_offline")

    def recommend(self,
                  playlist_id_array,
                  cutoff=None,
                  remove_seen_flag=True,
                  remove_top_pop_flag=False,
                  remove_CustomItems_flag=False,
                  export=False):

        # If is a scalar transform it in a 1-cell array
        if np.isscalar(playlist_id_array):
            playlist_id_array = np.atleast_1d(playlist_id_array)
            single_user = True
        else:
            single_user = False
        if cutoff is None:
            cutoff = self.URM_train.shape[1] - 1

        # First Branch
        #scores_ICM = self.URM_train[playlist_id_array].dot(self.W_sparse_ICM).toarray()
        #scores_Slim = self.URM_train[playlist_id_array].dot(self.W_sparse_Slim).toarray()
        #score_first_branch = (self.alpha) * scores_ICM + (1 - self.alpha) * scores_Slim

        # Second Branch
        #scores_URM_T = self.URM_train[playlist_id_array].dot(self.W_sparse_URM_T).toarray()
        #scores_right = self.beta * score_first_branch + (1 - self.beta) * scores_URM_T
        # Third Branch
        #scores_alpha = self.URM_train[playlist_id_array].dot(self.W_sparse_alpha).toarray()
        #scores_beta = self.URM_train[playlist_id_array].dot(self.W_sparse_beta).toarray()
        #scores_alpha_beta = self.gamma * scores_alpha + (1-self.gamma) * scores_beta
        # User KNN CF
        scores_URM = self.W_sparse_URM[playlist_id_array].dot(
            self.URM_train).toarray()
        scores_right = self.URM_train[playlist_id_array].dot(
            self.matrix_right).toarray()
        scores_alpha_beta = self.URM_train[playlist_id_array].dot(
            self.matrix_alpha_beta).toarray()
        scores_left = self.theta * scores_alpha_beta + (
            1 - self.theta) * scores_URM
        scores = self.omega * scores_left + (1 - self.omega) * scores_right

        if self.normalize:
            # normalization will keep the scores in the same range
            # of value of the ratings in dataset
            user_profile = self.URM_train[playlist_id_array]
            rated = user_profile.copy()
            rated.data = np.ones_like(rated.data)
            if self.sparse_weights:
                # print(rated.shape)
                # print(self.W_sparse.shape)
                den = rated.dot(self.W_sparse).toarray()
            else:
                den = rated.dot(self.W)
            den[np.abs(den) < 1e-6] = 1.0  # to avoid NaNs
            scores /= den
        for user_index in range(len(playlist_id_array)):

            user_id = playlist_id_array[user_index]
            if remove_seen_flag:
                scores[user_index, :] = self._remove_seen_on_scores(
                    user_id, scores[user_index, :])

        relevant_items_partition = (-scores).argpartition(cutoff,
                                                          axis=1)[:, 0:cutoff]

        # Get original value and sort it
        # [:, None] adds 1 dimension to the array, from (block_size,) to (block_size,1)
        # This is done to correctly get scores_batch value as [row, relevant_items_partition[row,:]]
        relevant_items_partition_original_value = scores[
            np.arange(scores.shape[0])[:, None], relevant_items_partition]
        relevant_items_partition_sorting = np.argsort(
            -relevant_items_partition_original_value, axis=1)
        ranking = relevant_items_partition[
            np.arange(relevant_items_partition.shape[0])[:, None],
            relevant_items_partition_sorting]

        ranking_list = ranking.tolist()

        # Return single list for one user, instead of list of lists
        if single_user:
            if not export:
                return ranking_list
            elif export:
                return str(ranking_list[0]).strip("[,]")

        if not export:
            return ranking_list
        elif export:
            return str(ranking_list).strip("[,]")

    def saveModel(self, folder_path, file_name=None):
        if file_name is None:
            file_name = self.RECOMMENDER_NAME
        print("{}: Saving model in file '{}'".format(self.RECOMMENDER_NAME,
                                                     folder_path + file_name))
        dictionary_to_save = {
            "W_sparse_URM": self.W_sparse_URM,
            "W_sparse_ICM": self.W_sparse_ICM,
            "W_sparse_URM_T": self.W_sparse_URM_T,
            "W_sparse_Slim": self.W_sparse_Slim,
            "W_sparse_alpha": self.W_sparse_alpha,
            "W_sparse_beta": self.W_sparse_beta,
            "matrix_first_branch": self.matrix_first_branch,
            "matrix_right": self.matrix_right,
            "matrix_alpha_beta": self.matrix_alpha_beta,
            "alpha": self.alpha,
            "beta": self.beta,
            "gamma": self.gamma,
            "theta": self.theta,
            "omega": self.omega
        }

        pickle.dump(dictionary_to_save,
                    open(folder_path + file_name, "wb"),
                    protocol=pickle.HIGHEST_PROTOCOL)

        print("{}: Saving complete".format(self.RECOMMENDER_NAME))
Пример #9
0
    def extract_models(self, dataReader, submission=False):
        print(
            "Configurator: The models are being extracted from the config file"
        )
        recsys = list()
        models = list(self.configs.models)
        data = dataReader.get_URM_train()
        if submission:
            data = dataReader.get_URM_all()
        for model in models:
            # User Collaborative Filtering with KNN
            if model["model_name"] == "user_knn_cf":
                recsys.append(
                    UserKNNCFRecommender(
                        data, sparse_weights=model["sparse_weights"]))
            # Item Collaborative Filtering with KNN
            elif model["model_name"] == "item_knn_cf":
                recsys.append(
                    ItemKNNCFRecommender(
                        data, sparse_weights=model["sparse_weights"]))
            # Item Content Based Filtering with KNN
            elif model["model_name"] == "item_knn_cbf":
                recsys.append(
                    ItemKNNCBFRecommender(
                        data,
                        dataReader.get_ICM(),
                        sparse_weights=model["sparse_weights"]))
            # Slim BPR with Python
            elif model["model_name"] == "slim_bpr_python":
                recsys.append(
                    Slim_BPR_Recommender_Python(
                        data,
                        positive_threshold=model["positive_threshold"],
                        sparse_weights=model["sparse_weights"]))
            # Slim BPR with Cython Extension
            elif model["model_name"] == "slim_bpr_mark1":
                recsys.append(
                    Slim_mark1(data,
                               positive_threshold=model["positive_threshold"],
                               recompile_cython=model["recompile_cython"],
                               symmetric=model["symmetric"]))
            elif model["model_name"] == "slim_bpr_mark2":
                recsys.append(
                    Slim_mark2(data,
                               positive_threshold=model["positive_threshold"],
                               recompile_cython=model["recompile_cython"],
                               symmetric=model["symmetric"]))
            # Funk SVD Recommender
            elif model["model_name"] == "funksvd":
                recsys.append(FunkSVD(data))

            elif model["model_name"] == "asysvd":
                recsys.append(AsySVD(data))
            elif model["model_name"] == "puresvd":
                recsys.append(PureSVDRecommender(data))

            elif model["model_name"] == "mf_bpr_cython":
                recsys.append(
                    MF_BPR_Cython(data,
                                  recompile_cython=model["recompile_cython"]))
            elif model["model_name"] == "mf_cython":
                recsys.append(
                    MatrixFactorization_Cython(
                        data,
                        positive_threshold=model["positive_threshold"],
                        URM_validation=dataReader.get_URM_test(),
                        recompile_cython=model["recompile_cython"],
                        algorithm=model["algorithm"]))
            elif model["model_name"] == "ials_numpy":
                recsys.append(IALS_numpy())
            elif model["model_name"] == "bprmf":
                recsys.append(BPRMF())
            elif model["model_name"] == "user_item_avg":
                recsys.append(
                    UserItemAvgRecommender(
                        data,
                        dataReader.get_UCM(),
                        dataReader.get_ICM(),
                        sparse_weights=model["sparse_weights"],
                        verbose=model["verbose"],
                        similarity_mode=model["similarity_mode"],
                        normalize=model["normalize"],
                        alpha=model["alpha"]))

            elif model["model_name"] == "2levelhybrid":
                recsys.append(
                    TwoLevelHybridRecommender(
                        data,
                        dataReader.get_UCM(),
                        dataReader.get_ICM(),
                        sparse_weights=model["sparse_weights"],
                        verbose=model["verbose"],
                        similarity_mode=model["similarity_mode"],
                        normalize=model["normalize"],
                        alpha=model["alpha"],
                        avg=model["avg"]))

            elif model["model_name"] == "seqrand":
                recsys.append(
                    SeqRandRecommender(
                        data,
                        dataReader.get_URM_train_tfidf(),
                        dataReader.get_UCM(),
                        dataReader.get_ICM(),
                        dataReader.get_target_playlists_seq(),
                        sparse_weights=model["sparse_weights"],
                        verbose=model["verbose"],
                        similarity_mode=model["similarity_mode"],
                        normalize=model["normalize"],
                        alpha=model["alpha"],
                        beta=model["beta"],
                        gamma=model["gamma"]))

            elif model["model_name"] == "itemtree":
                recsys.append(
                    ItemTreeRecommender(
                        data,
                        dataReader.get_URM_train_okapi(),
                        dataReader.get_ICM(),
                        sparse_weights=model["sparse_weights"]))

            elif model["model_name"] == "itemtree_offline":
                recsys.append(
                    ItemTreeRecommender_offline(data, dataReader.get_ICM()))

            elif model["model_name"] == "slim":
                recsys.append(
                    Slim(data,
                         sparse_weights=model["sparse_weights"],
                         normalize=model["normalize"]))

            elif model["model_name"] == "p3alpha":
                recsys.append(P3alphaRecommender(data))
            elif model["model_name"] == "rp3beta":
                recsys.append(RP3betaRecommender(data))
            elif model["model_name"] == "slim_elastic":
                recsys.append(SLIMElasticNetRecommender(data))
            elif model["model_name"] == "party":
                recsys.append(PartyRecommender_offline(data))
            elif model["model_name"] == "pyramid":
                recsys.append(PyramidRecommender_offline(data))
            elif model["model_name"] == "pyramid_item_tree":
                recsys.append(
                    PyramidItemTreeRecommender_offline(data,
                                                       dataReader.get_ICM()))
            elif model["model_name"] == "hybrid_eight":
                recsys.append(
                    HybridEightRecommender_offline(data, dataReader.get_ICM()))
            elif model["model_name"] == "combo":
                recsys.append(
                    ComboRecommender_offline(data, dataReader.get_ICM()))
            elif model["model_name"] == "neuron":
                recsys.append(
                    SingleNeuronRecommender_offline(data,
                                                    dataReader.get_ICM()))
            elif model["model_name"] == "cfw":
                m = OfflineDataLoader()
                #fold,file = m.get_model(Slim_mark2.RECOMMENDER_NAME,training=True)
                m1 = Slim_mark2(data)
                #m1.loadModel(folder_path=fold,file_name=file)
                recsys.append(
                    CFWBoostingRecommender(data, dataReader.get_ICM(),
                                           Slim_mark2))
        print("Configurator: Models are extracted")

        return recsys
    def fit(self,
            alpha=0.0029711141561171717,
            beta=0.9694720669481413,
            gamma=0.9635187725527589,
            theta=0.09930388487311004,
            omega=0.766047309541692,
            coeff = 5.4055892529064735,
            normalize=False,
            save_model=False,
            submission=False,
            best_parameters=False,
            location="submission"):
        if best_parameters:
            m = OfflineDataLoader()
            folder_path, file_name = m.get_parameter(self.RECOMMENDER_NAME)
            self.loadModel(folder_path=folder_path, file_name=file_name)
        else:
            self.alpha = alpha
            self.beta = beta
            self.gamma = gamma
            self.theta = theta
            self.omega = omega
            self.coeff = coeff


        self.normalize = normalize
        self.submission = not submission
        m = OfflineDataLoader()
        self.m_user_knn_cf = UserKNNCFRecommender(self.URM_train)
        folder_path_ucf, file_name_ucf = m.get_model(UserKNNCFRecommender.RECOMMENDER_NAME, training=self.submission)
        self.m_user_knn_cf.loadModel(folder_path=folder_path_ucf, file_name=file_name_ucf)

        self.m_item_knn_cf = ItemKNNCFRecommender(self.URM_train)
        folder_path_icf, file_name_icf = m.get_model(ItemKNNCFRecommender.RECOMMENDER_NAME, training=self.submission)
        self.m_item_knn_cf.loadModel(folder_path=folder_path_icf, file_name=file_name_icf)

        self.m_slim_mark2 = Slim_mark2(self.URM_train)
        folder_path_slim, file_name_slim = m.get_model(Slim_mark2.RECOMMENDER_NAME, training=self.submission)
        self.m_slim_mark2.loadModel(folder_path=folder_path_slim, file_name=file_name_slim)

        self.m_alpha = P3alphaRecommender(self.URM_train)
        folder_path_alpha, file_name_alpha = m.get_model(P3alphaRecommender.RECOMMENDER_NAME, training=self.submission)
        self.m_alpha.loadModel(folder_path=folder_path_alpha, file_name=file_name_alpha)

        self.m_beta = RP3betaRecommender(self.URM_train)
        folder_path_beta, file_name_beta = m.get_model(RP3betaRecommender.RECOMMENDER_NAME, training=self.submission)
        self.m_beta.loadModel(folder_path=folder_path_beta, file_name=file_name_beta)

        self.m_slim_elastic = SLIMElasticNetRecommender(self.URM_train)
        folder_path_elastic, file_name_elastic = m.get_model(SLIMElasticNetRecommender.RECOMMENDER_NAME,
                                                             training=self.submission)
        self.m_slim_elastic.loadModel(folder_path=folder_path_elastic, file_name=file_name_elastic)

        self.W_sparse_URM = check_matrix(self.m_user_knn_cf.W_sparse, "csr", dtype=np.float32)
        self.W_sparse_URM_T = check_matrix(self.m_item_knn_cf.W_sparse, "csr", dtype=np.float32)
        self.W_sparse_Slim = check_matrix(self.m_slim_mark2.W_sparse, "csr", dtype=np.float32)
        self.W_sparse_alpha = check_matrix(self.m_alpha.W_sparse, "csr", dtype=np.float32)
        self.W_sparse_beta = check_matrix(self.m_beta.W_sparse, "csr", dtype=np.float32)
        self.W_sparse_elastic = check_matrix(self.m_slim_elastic.W_sparse, "csr", dtype=np.float32)
        # Precomputations
        self.matrix_alpha_beta = self.alpha * self.W_sparse_alpha + (1 - self.alpha) * self.W_sparse_beta
        self.matrix_level1 = self.beta * self.W_sparse_Slim + (1 - self.beta) * self.W_sparse_URM_T

        self.parameters = "alpha={}, beta={}, gamma={}, theta={}, omega={}, coeff={}".format(self.alpha, self.beta, self.gamma,
                                                                                   self.theta, self.omega, self.coeff)
        if save_model:
            self.saveModel("saved_models/"+location+"/", file_name=self.RECOMMENDER_NAME)
class PartyRecommender_offline(RecommenderSystem):
    RECOMMENDER_NAME = "PartyTreeRecommender_offline"

    def __init__(self, URM_train):
        super(PartyRecommender_offline, self).__init__()
        self.URM_train = check_matrix(URM_train, "csr", dtype=np.float32)
        self.parameters = None
        self.dataset = None
        self.normalize = False

    def __repr__(self):
        return "Party 3 Level Hybrid Offline Recommender"

    def fit(self,
            alpha=0.0029711141561171717,
            beta=0.9694720669481413,
            gamma=0.9635187725527589,
            theta=0.09930388487311004,
            omega=0.766047309541692,
            coeff = 5.4055892529064735,
            normalize=False,
            save_model=False,
            submission=False,
            best_parameters=False,
            location="submission"):
        if best_parameters:
            m = OfflineDataLoader()
            folder_path, file_name = m.get_parameter(self.RECOMMENDER_NAME)
            self.loadModel(folder_path=folder_path, file_name=file_name)
        else:
            self.alpha = alpha
            self.beta = beta
            self.gamma = gamma
            self.theta = theta
            self.omega = omega
            self.coeff = coeff


        self.normalize = normalize
        self.submission = not submission
        m = OfflineDataLoader()
        self.m_user_knn_cf = UserKNNCFRecommender(self.URM_train)
        folder_path_ucf, file_name_ucf = m.get_model(UserKNNCFRecommender.RECOMMENDER_NAME, training=self.submission)
        self.m_user_knn_cf.loadModel(folder_path=folder_path_ucf, file_name=file_name_ucf)

        self.m_item_knn_cf = ItemKNNCFRecommender(self.URM_train)
        folder_path_icf, file_name_icf = m.get_model(ItemKNNCFRecommender.RECOMMENDER_NAME, training=self.submission)
        self.m_item_knn_cf.loadModel(folder_path=folder_path_icf, file_name=file_name_icf)

        self.m_slim_mark2 = Slim_mark2(self.URM_train)
        folder_path_slim, file_name_slim = m.get_model(Slim_mark2.RECOMMENDER_NAME, training=self.submission)
        self.m_slim_mark2.loadModel(folder_path=folder_path_slim, file_name=file_name_slim)

        self.m_alpha = P3alphaRecommender(self.URM_train)
        folder_path_alpha, file_name_alpha = m.get_model(P3alphaRecommender.RECOMMENDER_NAME, training=self.submission)
        self.m_alpha.loadModel(folder_path=folder_path_alpha, file_name=file_name_alpha)

        self.m_beta = RP3betaRecommender(self.URM_train)
        folder_path_beta, file_name_beta = m.get_model(RP3betaRecommender.RECOMMENDER_NAME, training=self.submission)
        self.m_beta.loadModel(folder_path=folder_path_beta, file_name=file_name_beta)

        self.m_slim_elastic = SLIMElasticNetRecommender(self.URM_train)
        folder_path_elastic, file_name_elastic = m.get_model(SLIMElasticNetRecommender.RECOMMENDER_NAME,
                                                             training=self.submission)
        self.m_slim_elastic.loadModel(folder_path=folder_path_elastic, file_name=file_name_elastic)

        self.W_sparse_URM = check_matrix(self.m_user_knn_cf.W_sparse, "csr", dtype=np.float32)
        self.W_sparse_URM_T = check_matrix(self.m_item_knn_cf.W_sparse, "csr", dtype=np.float32)
        self.W_sparse_Slim = check_matrix(self.m_slim_mark2.W_sparse, "csr", dtype=np.float32)
        self.W_sparse_alpha = check_matrix(self.m_alpha.W_sparse, "csr", dtype=np.float32)
        self.W_sparse_beta = check_matrix(self.m_beta.W_sparse, "csr", dtype=np.float32)
        self.W_sparse_elastic = check_matrix(self.m_slim_elastic.W_sparse, "csr", dtype=np.float32)
        # Precomputations
        self.matrix_alpha_beta = self.alpha * self.W_sparse_alpha + (1 - self.alpha) * self.W_sparse_beta
        self.matrix_level1 = self.beta * self.W_sparse_Slim + (1 - self.beta) * self.W_sparse_URM_T

        self.parameters = "alpha={}, beta={}, gamma={}, theta={}, omega={}, coeff={}".format(self.alpha, self.beta, self.gamma,
                                                                                   self.theta, self.omega, self.coeff)
        if save_model:
            self.saveModel("saved_models/"+location+"/", file_name=self.RECOMMENDER_NAME)

    def recommend(self, playlist_id_array, cutoff=None, remove_seen_flag=True, remove_top_pop_flag=False,
                  remove_CustomItems_flag=False, export=False):
        # If is a scalar transform it in a 1-cell array
        if np.isscalar(playlist_id_array):
            playlist_id_array = np.atleast_1d(playlist_id_array)
            single_user = True
        else:
            single_user = False
        if cutoff is None:
            cutoff = self.URM_train.shape[1] - 1

        scores_URM = self.W_sparse_URM[playlist_id_array].dot(self.URM_train).toarray()
        scores_alphabeta = self.URM_train[playlist_id_array].dot(self.matrix_alpha_beta).toarray()
        scores_level1 = self.URM_train[playlist_id_array].dot(self.matrix_level1).toarray()
        scores_level2 = self.gamma * scores_alphabeta + (1 - self.gamma) * scores_URM
        scores_level3 = self.theta * scores_level2 + (1 - self.theta) * scores_level1
        scores_elastic = self.URM_train[playlist_id_array].dot(self.W_sparse_elastic).toarray()
        scores = (self.omega* self.coeff * scores_elastic) + (1 - self.omega) * scores_level3

        if self.normalize:
            # normalization will keep the scores in the same range
            # of value of the ratings in dataset
            user_profile = self.URM_train[playlist_id_array]
            rated = user_profile.copy()
            rated.data = np.ones_like(rated.data)
            if self.sparse_weights:
                # print(rated.shape)
                # print(self.W_sparse.shape)
                den = rated.dot(self.W_sparse).toarray()
            else:
                den = rated.dot(self.W)
            den[np.abs(den) < 1e-6] = 1.0  # to avoid NaNs
            scores /= den

        for user_index in range(len(playlist_id_array)):
            user_id = playlist_id_array[user_index]
            if remove_seen_flag:
                scores[user_index, :] = self._remove_seen_on_scores(user_id, scores[user_index, :])

        relevant_items_partition = (-scores).argpartition(cutoff, axis=1)[:, 0:cutoff]
        # Get original value and sort it
        # [:, None] adds 1 dimension to the array, from (block_size,) to (block_size,1)
        # This is done to correctly get scores_batch value as [row, relevant_items_partition[row,:]]
        relevant_items_partition_original_value = scores[
            np.arange(scores.shape[0])[:, None], relevant_items_partition]
        relevant_items_partition_sorting = np.argsort(-relevant_items_partition_original_value, axis=1)
        ranking = relevant_items_partition[
            np.arange(relevant_items_partition.shape[0])[:, None], relevant_items_partition_sorting]

        ranking_list = ranking.tolist()

        # Return single list for one user, instead of list of lists
        if single_user:
            if not export:
                return ranking_list
            elif export:
                return str(ranking_list[0]).strip("[,]")

        if not export:
            return ranking_list
        elif export:
            return str(ranking_list).strip("[,]")

    def saveModel(self, folder_path, file_name=None):
        if file_name is None:
            file_name = self.RECOMMENDER_NAME
        print("{}: Saving model in file '{}'".format(self.RECOMMENDER_NAME, folder_path + file_name))
        dictionary_to_save = {"W_sparse_URM": self.W_sparse_URM,
                              "W_sparse_URM_T": self.W_sparse_URM_T,
                              "W_sparse_Slim": self.W_sparse_Slim,
                              "W_sparse_alpha": self.W_sparse_alpha,
                              "W_sparse_beta": self.W_sparse_beta,
                              "W_sparse_elastic": self.W_sparse_elastic,
                              "matrix_level1": self.matrix_level1,
                              "matrix_alpha_beta": self.matrix_alpha_beta,
                              "alpha": self.alpha,
                              "beta": self.beta,
                              "gamma": self.gamma,
                              "theta": self.theta,
                              "omega": self.omega,
                              "coeff": self.coeff}

        pickle.dump(dictionary_to_save,
                    open(folder_path + file_name, "wb"),
                    protocol=pickle.HIGHEST_PROTOCOL)

        print("{}: Saving complete".format(self.RECOMMENDER_NAME))