Exemplo n.º 1
0
    def __init__(self, URM_train, ICM, target_model, training=True):

        super(CFWBoostingRecommender, self).__init__()
        if (URM_train.shape[1] != ICM.shape[0]):
            raise ValueError(
                "Number of items not consistent. URM contains {} but ICM contains {}"
                .format(URM_train.shape[1], ICM.shape[0]))
        # if(S_matrix_target.shape[0] != S_matrix_target.shape[1]):
        #     raise ValueError("Items imilarity matrix is not square: rows are {}, columns are {}".format(S_matrix_target.shape[0],
        #                                                                                                 S_matrix_target.shape[1]))
        # if(S_matrix_target.shape[0] != ICM.shape[0]):
        #     raise ValueError("Number of items not consistent. S_matrix contains {} but ICM contains {}".format(S_matrix_target.shape[0],
        #                                                                                                   ICM.shape[0]))

        self.URM_train = check_matrix(URM_train, 'csr')
        self.ICM = check_matrix(ICM, 'csr')
        m = OfflineDataLoader()
        fold, file = m.get_model(target_model.RECOMMENDER_NAME,
                                 training=training)
        m1 = target_model(self.URM_train)
        print(m1.RECOMMENDER_NAME)
        m1.loadModel(folder_path=fold, file_name=file)
        self.S_matrix_target = check_matrix(m1.W_sparse, 'csr')
        self.n_items = self.URM_train.shape[1]
        self.n_users = self.URM_train.shape[0]
        self.n_features = self.ICM.shape[1]
        self.sparse_weights = True
Exemplo n.º 2
0
    def fit(self,
            topK=600,
            shrink=1000,
            similarity='asymmetric',
            normalize=True,
            feature_weighting="BM25",
            save_model=False,
            best_parameters=False,
            **similarity_args):
        similarity_args = {'asymmetric_alpha': 0.40273209903969387}
        if best_parameters:
            m = OfflineDataLoader()
            folder_path_icbf, file_name_icbf = m.get_parameter(
                self.RECOMMENDER_NAME)
            self.loadModel(folder_path=folder_path_icbf,
                           file_name=file_name_icbf)
            if self.feature_weighting == "none":
                pass
            if self.feature_weighting == "BM25":
                self.ICM = self.ICM.astype(np.float32)
                self.ICM = to_okapi(self.ICM)

            elif self.feature_weighting == "TF-IDF":
                self.ICM = self.ICM.astype(np.float32)
                self.ICM = to_tfidf(self.ICM)
            similarity = Compute_Similarity(self.ICM.T,
                                            shrink=shrink,
                                            topK=topK,
                                            normalize=normalize,
                                            similarity=similarity,
                                            **similarity_args)
        else:
            self.topK = topK
            self.shrink = shrink
            similarity = Compute_Similarity(self.ICM.T,
                                            shrink=shrink,
                                            topK=topK,
                                            normalize=normalize,
                                            similarity=similarity,
                                            **similarity_args)

        self.parameters = "sparse_weights= {0}, similarity= {1}, shrink= {2}, neighbourhood={3}, " \
                          "normalize= {4}".format(
            self.sparse_weights, similarity, self.shrink, self.topK, normalize)

        if self.sparse_weights:
            self.W_sparse = similarity.compute_similarity()
        else:
            self.W = similarity.compute_similarity()
            self.W = self.W.toarray()

        if save_model:
            self.saveModel("saved_models/submission/",
                           file_name="ItemKNNCBFRecommender_submission_model")
Exemplo n.º 3
0
    def fit(self,
            show_max_performance=False,
            loss_tolerance=1e-6,
            iteration_limit=50000,
            damp_coeff=0.0,
            topK=800,
            add_zeros_quota=0.9744535193088417,
            normalize_similarity=False,
            save_model=True,
            best_parameters=False,
            offline=False,
            location="training",
            submission=False):
        if offline:
            m = OfflineDataLoader()
            folder_path, file_name = m.get_model(self.RECOMMENDER_NAME,
                                                 training=not submission)
            self.loadModel(folder_path=folder_path, file_name=file_name)
        else:
            if best_parameters:
                m = OfflineDataLoader()
                folder_path, file_name = m.get_parameter(self.RECOMMENDER_NAME)
                self.loadModel(folder_path=folder_path, file_name=file_name)
            else:
                self.normalize_similarity = normalize_similarity

                self.add_zeros_quota = add_zeros_quota
                self.topK = topK

            self._generateTrainData_low_ram()

            commonFeatures = self.ICM[self.row_list].multiply(
                self.ICM[self.col_list])

            linalg_result = linalg.lsqr(commonFeatures,
                                        self.data_list,
                                        show=False,
                                        atol=loss_tolerance,
                                        btol=loss_tolerance,
                                        iter_lim=iteration_limit,
                                        damp=damp_coeff)

            # res = linalg.lsmr(commonFeatures, self.data_list, show = False, atol=loss_tolerance, btol=loss_tolerance,
            #                   maxiter = iteration_limit, damp=damp_coeff)

            self.D_incremental = linalg_result[0].copy()
            self.D_best = linalg_result[0].copy()
            self.epochs_best = 0

            self.loss = linalg_result[3]

            self._compute_W_sparse()
        if save_model:
            self.saveModel("saved_models/" + location + "/",
                           file_name=(self.RECOMMENDER_NAME + "_" + location +
                                      "_model"))
Exemplo n.º 4
0
def read_data_split_and_search():
    """
    This function provides a simple example on how to tune parameters of a given algorithm

    The BayesianSearch object will save:
        - A .txt file with all the cases explored and the recommendation quality
        - A _best_model file which contains the trained model and can be loaded with recommender.load_model()
        - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter)
        - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation
        - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set
    """
    clear()
    dataReader = PlaylistDataReader()
    dataReader.generate_datasets()
    URM_train = dataReader.get_URM_train()
    # URM_validation = dataReader.get_URM_validation()
    URM_test = dataReader.get_URM_test()
    ICM = dataReader.get_ICM()
    output_root_path = "tuned_parameters"
    m = OfflineDataLoader()
    fold, fil = m.get_model(ItemKNNCFRecommender.RECOMMENDER_NAME,
                            training=True)
    m1 = ItemKNNCFRecommender(URM_train, ICM)
    m1.loadModel(folder_path=fold, file_name=fil)
    W_sparse_CF = m1.W_sparse

    # If directory does not exist, create
    if not os.path.exists(output_root_path):
        os.makedirs(output_root_path)

    collaborative_algorithm_list = [
        #P3alphaRecommender,
        #RP3betaRecommender,
        #ItemKNNCFRecommender,
        #UserKNNCFRecommender,
        # MatrixFactorization_BPR_Cython,
        # MatrixFactorization_FunkSVD_Cython,
        # PureSVDRecommender,
        # Slim_mark1,
        # Slim_mark2,
        # ItemTreeRecommender_offline
        # SLIMElasticNetRecommender,
        # PartyRecommender_offline
        # PyramidRecommender_offline
        #  ItemKNNCBFRecommender
        # PyramidItemTreeRecommender_offline
        #HybridEightRecommender_offline
        #ComboRecommender_offline
        SingleNeuronRecommender_offline
        # CFWBoostingRecommender
    ]

    from parameter_tuning.AbstractClassSearch import EvaluatorWrapper
    from base.evaluation.Evaluator import SequentialEvaluator

    evaluator_validation_earlystopping = SequentialEvaluator(URM_test,
                                                             cutoff_list=[10])
    evaluator_test = SequentialEvaluator(URM_test, cutoff_list=[10])

    evaluator_validation = EvaluatorWrapper(evaluator_validation_earlystopping)
    evaluator_test = EvaluatorWrapper(evaluator_test)

    runParameterSearch_Collaborative_partial = partial(
        runParameterSearch_Collaborative,
        URM_train=URM_train,
        ICM=ICM,
        W_sparse_CF=W_sparse_CF,
        metric_to_optimize="MAP",
        evaluator_validation_earlystopping=evaluator_validation_earlystopping,
        evaluator_validation=evaluator_validation,
        evaluator_test=evaluator_test,
        n_cases=250,
        output_root_path=output_root_path)

    for recommender_class in collaborative_algorithm_list:
        try:
            runParameterSearch_Collaborative_partial(recommender_class)
        except Exception as e:
            print("On recommender {} Exception {}".format(
                recommender_class, str(e)))
            traceback.print_exc()
    def fit(self,
            topK=175,
            shrink=400,
            similarity="asymmetric",
            normalize=True,
            feature_weighting="BM25",
            save_model=False,
            best_parameters=False,
            location="training",
            submission=False,
            offline=False,
            **similarity_args):
        if offline:
            m = OfflineDataLoader()
            folder_path_icf, file_name_icf = m.get_model(
                self.RECOMMENDER_NAME, training=(not submission))
            self.loadModel(folder_path=folder_path_icf,
                           file_name=file_name_icf)
        else:
            if best_parameters:
                m = OfflineDataLoader()
                folder_path_ucf, file_name_ucf = m.get_parameter(
                    self.RECOMMENDER_NAME)
                self.loadModel(folder_path=folder_path_ucf,
                               file_name=file_name_ucf)
                if self.feature_weighting == "none":
                    similarity = Compute_Similarity(self.URM_train.T,
                                                    **similarity_args)
                else:
                    if feature_weighting == "BM25":
                        self.URM_train_copy = self.URM_train.astype(np.float32)
                        self.URM_train_copy = to_okapi(self.URM_train)

                    elif feature_weighting == "TF-IDF":
                        self.URM_train_copy = self.URM_train.astype(np.float32)
                        self.URM_train_copy = to_tfidf(self.URM_train)
                    similarity_args = {
                        'asymmetric_alpha': 0.11483114799990246,
                        'normalize': True,
                        'shrink': 450,
                        'similarity': 'asymmetric',
                        'topK': 200
                    }
                    similarity = Compute_Similarity(self.URM_train_copy.T,
                                                    **similarity_args)
            else:
                self.topK = topK
                self.shrink = shrink
                self.feature_weighting = feature_weighting
                similarity_args = {'asymmetric_alpha': 0.0033404951135529437}
                if self.feature_weighting == "BM25":
                    self.URM_train_copy = self.URM_train.astype(np.float32)
                    self.URM_train_copy = to_okapi(self.URM_train)

                elif self.feature_weighting == "TF-IDF":
                    self.URM_train_copy = self.URM_train.astype(np.float32)
                    self.URM_train_copy = to_tfidf(self.URM_train)

                if self.feature_weighting == "none":
                    similarity = Compute_Similarity(self.URM_train.T,
                                                    shrink=shrink,
                                                    topK=topK,
                                                    normalize=normalize,
                                                    similarity=similarity,
                                                    **similarity_args)
                else:
                    similarity = Compute_Similarity(self.URM_train_copy.T,
                                                    shrink=shrink,
                                                    topK=topK,
                                                    normalize=normalize,
                                                    similarity=similarity,
                                                    **similarity_args)


            self.parameters = "sparse_weights= {0}, similarity= {1}, shrink= {2}, neighbourhood={3}, " \
                            "normalize= {4}".format(self.sparse_weights, similarity, shrink, topK, normalize)

            if self.sparse_weights:
                self.W_sparse = similarity.compute_similarity()
            else:
                self.W = similarity.compute_similarity()
                self.W = self.W.toarray()
        if save_model:
            self.saveModel("saved_models/submission/",
                           file_name=self.RECOMMENDER_NAME + "_" + location +
                           "_model")
    def fit(self, epochs=50,
            URM_test=None,
            filterTopPop=False,
            minRatingsPerUser=1,
            batch_size=1000,
            validate_every_N_epochs=1,
            start_validation_after_N_epochs=0,
            lambda_i=1e-4,
            lambda_j=1e-4,
            learning_rate=0.020,
            topK=500,
            sgd_mode='adagrad',
            save_model = False,
            best_parameters=False,
            offline=True,submission=False):
        self.parameters = "positive_threshold= {0}, sparse_weights= {1}, symmetric= {2},sgd_mode= {3}, lambda_i={4}, " \
                          "lambda_j={5}, learning_rate={6}, topK={7}, epochs= {8}".format(
        self.positive_threshold,self.sparse_weights,self.symmetric,self.sgd_mode,lambda_i,lambda_j,learning_rate,topK,epochs)
        if offline:
            m = OfflineDataLoader()
            folder, file = m.get_model(self.RECOMMENDER_NAME, training=(not submission))
            self.loadModel(folder_path=folder,file_name=file)
        else:
            self.save_model = save_model
            # Select only positive interactions
            URM_train_positive = self.URM_train.copy()

            URM_train_positive.data = URM_train_positive.data >= self.positive_threshold
            URM_train_positive.eliminate_zeros()

            if best_parameters:
                m = OfflineDataLoader()
                folder_slim, file_slim = m.get_parameter(self.RECOMMENDER_NAME)
                self.loadModel(folder_path=folder_slim,file_name=file_slim)
                self.cythonEpoch = Slim_BPR_Cython_Epoch(
                    self.URM_mask,
                    sparse_weights=self.sparse_weights,
                    learning_rate=learning_rate,
                    batch_size=1,
                    symmetric=self.symmetric)
                result = super(Slim_BPR_Recommender_Cython, self).fit_alreadyInitialized(
                    epochs=epochs,
                    URM_test=URM_test,
                    filterTopPop=filterTopPop,
                    minRatingsPerUser=minRatingsPerUser,
                    batch_size=batch_size,
                    validate_every_N_epochs=validate_every_N_epochs,
                    start_validation_after_N_epochs=start_validation_after_N_epochs)

            else:
                self.sgd_mode = sgd_mode
                self.cythonEpoch = Slim_BPR_Cython_Epoch(
                    self.URM_mask,
                    sparse_weights=self.sparse_weights,
                    topK=topK,
                    learning_rate=learning_rate,
                    li_reg=lambda_i,
                    lj_reg=lambda_j,
                    batch_size=1,
                    symmetric=self.symmetric,
                    sgd_mode=sgd_mode)
                result = super(Slim_BPR_Recommender_Cython, self).fit_alreadyInitialized(
                    epochs=epochs,
                    URM_test=URM_test,
                    filterTopPop=filterTopPop,
                    minRatingsPerUser=minRatingsPerUser,
                    batch_size=batch_size,
                    validate_every_N_epochs=validate_every_N_epochs,
                    start_validation_after_N_epochs=start_validation_after_N_epochs,
                    lambda_i=lambda_i,
                    lambda_j=lambda_j,
                    learning_rate=learning_rate,
                    topK=topK)
                return result

        if save_model:
            self.saveModel("saved_models/submission/",file_name="SLIM_BPR_Recommender_mark1_submission_model")
        return self.W
def printOutMapValues(modelList, URM, ICM, modelsSoFar):
    map_dict = {i: dict() for i in modelsSoFar}
    m = OfflineDataLoader()
    for model in modelList:
        folder = str("/".join(model[1].split("/")[:-1]) + "/")
        file = model[1].split("/")[-1]
        if model[0] == "UserKNNCFRecommender":
            mod = UserKNNCFRecommender(URM)
            mod.loadModel(folder_path=folder, file_name=file, verbose=False)
            map_dict[model[0]][model[2]] = mod.MAP
        # print(model[0], model[2], mod.MAP)
        elif model[0] == "ItemKNNCFRecommender":
            mod = ItemKNNCFRecommender(URM)
            mod.loadModel(folder_path=folder, file_name=file, verbose=False)
            map_dict[model[0]][model[2]] = mod.MAP
        # print(model[0], model[2], mod.MAP)
        elif model[0] == "ItemKNNCBFRecommender":
            mod = ItemKNNCBFRecommender(URM, ICM)
            mod.loadModel(folder_path=folder, file_name=file, verbose=False)
            map_dict[model[0]][model[2]] = mod.MAP
        # print(model[0], model[2], mod.MAP)
        elif model[0] == "SLIM_BPR_Recommender_mark1":
            mod = Slim_mark1(URM)
            mod.loadModel(folder_path=folder, file_name=file, verbose=False)
            map_dict[model[0]][model[2]] = mod.MAP
        # print(model[0], model[2], mod.MAP)
        elif model[0] == "RP3_Beta_Recommender":
            mod = RP3betaRecommender(URM)
            mod.loadModel(folder_path=folder, file_name=file, verbose=False)
            map_dict[model[0]][model[2]] = mod.MAP
        # print(model[0], model[2], mod.MAP)
        elif model[0] == "P3_Alpha_Recommender":
            mod = P3alphaRecommender(URM)
            mod.loadModel(folder_path=folder, file_name=file, verbose=False)
            map_dict[model[0]][model[2]] = mod.MAP
        # print(model[0], model[2], mod.MAP)
        elif model[0] == "PureSVD":
            mod = PureSVDRecommender(URM)
            mod.loadModel(folder_path=folder, file_name=file, verbose=False)
            map_dict[model[0]][model[2]] = mod.MAP
        # print(model[0], model[2], mod.MAP)
        elif model[0] == "Slim_Elastic_Net_Recommender":
            mod = SLIMElasticNetRecommender(URM)
            mod.loadModel(folder_path=folder, file_name=file, verbose=False)
            map_dict[model[0]][model[2]] = mod.MAP
            #print(model[0], model[2], mod.MAP)
        elif model[0] == "SLIM_BPR_Recommender_mark2":
            mod = Slim_mark2(URM)
            mod.loadModel(folder_path=folder, file_name=file, verbose=False)
            map_dict[model[0]][model[2]] = mod.MAP
            #print(model[0], model[2], mod.MAP)
        # elif model[0] == "ItemTreeRecommender_offline":
        #     mod = ItemTreeRecommender_offline(URM,ICM)
        #     mod.loadModel(folder_path=folder, file_name=file, verbose=False)
        #     map_dict[model[0]][model[2]] = mod.MAP
        #print(model[0], model[2], mod.MAP)
        # elif model[0] == "PartyRecommender_offline":
        #     mod = PartyRecommender_offline(URM)
        #     mod.loadModel(folder_path=folder, file_name=file, verbose=False)
        #     map_dict[model[0]][model[2]] = mod.MAP
        #     #print(model[0], model[2], mod.MAP)
        elif model[0] == "SingleNeuronRecommender_offline":
            mod = SingleNeuronRecommender_offline(URM, ICM)
            mod.loadModel(folder_path=folder, file_name=file, verbose=False)
            map_dict[model[0]][model[2]] = mod.MAP
            #print(model[0], model[2], mod.MAP)

    return map_dict
Exemplo n.º 8
0
    def fit(
            self,
            alpha=1.3167219260598073,
            beta=15.939928536132701,
            gamma=0.6048873602128846,
            delta=1.0527588765188267,
            epsilon=2.08444591782293,
            zeta=1.2588273098979674,
            eta=18.41012777389885,
            theta=18.000293943452448,
            #    psi = 0.00130805010990942,
            normalize=False,
            save_model=False,
            submission=False,
            best_parameters=False,
            offline=False,
            location="submission"):
        if offline:
            m = OfflineDataLoader()
            folder_path, file_name = m.get_model(self.RECOMMENDER_NAME)
            self.loadModel(folder_path=folder_path, file_name=file_name)
        else:
            if best_parameters:
                m = OfflineDataLoader()
                folder_path, file_name = m.get_parameter(self.RECOMMENDER_NAME)
                self.loadModel(folder_path=folder_path, file_name=file_name)
            else:
                self.alpha = alpha
                self.beta = beta
                self.gamma = gamma
                self.delta = delta
                self.epsilon = epsilon
                self.zeta = zeta
                self.eta = eta
                self.theta = theta
        #       self.psi = psi

            self.normalize = normalize
            self.submission = not submission
            m = OfflineDataLoader()
            self.m_user_knn_cf = UserKNNCFRecommender(self.URM_train)
            folder_path_ucf, file_name_ucf = m.get_model(
                UserKNNCFRecommender.RECOMMENDER_NAME,
                training=self.submission)
            self.m_user_knn_cf.loadModel(folder_path=folder_path_ucf,
                                         file_name=file_name_ucf)

            self.m_item_knn_cf = ItemKNNCFRecommender(self.URM_train)
            folder_path_icf, file_name_icf = m.get_model(
                ItemKNNCFRecommender.RECOMMENDER_NAME,
                training=self.submission)
            self.m_item_knn_cf.loadModel(folder_path=folder_path_icf,
                                         file_name=file_name_icf)

            self.m_item_knn_cbf = ItemKNNCBFRecommender(
                self.URM_train, self.ICM)
            folder_path_icf, file_name_icf = m.get_model(
                ItemKNNCBFRecommender.RECOMMENDER_NAME,
                training=self.submission)
            self.m_item_knn_cbf.loadModel(folder_path=folder_path_icf,
                                          file_name=file_name_icf)

            self.m_slim_mark1 = Slim_mark1(self.URM_train)
            folder_path_slim, file_name_slim = m.get_model(
                Slim_mark1.RECOMMENDER_NAME, training=self.submission)
            self.m_slim_mark1.loadModel(folder_path=folder_path_slim,
                                        file_name=file_name_slim)

            self.m_slim_mark2 = Slim_mark2(self.URM_train)
            folder_path_slim, file_name_slim = m.get_model(
                Slim_mark2.RECOMMENDER_NAME, training=self.submission)
            self.m_slim_mark2.loadModel(folder_path=folder_path_slim,
                                        file_name=file_name_slim)

            self.m_alpha = P3alphaRecommender(self.URM_train)
            folder_path_alpha, file_name_alpha = m.get_model(
                P3alphaRecommender.RECOMMENDER_NAME, training=self.submission)
            self.m_alpha.loadModel(folder_path=folder_path_alpha,
                                   file_name=file_name_alpha)

            self.m_beta = RP3betaRecommender(self.URM_train)
            folder_path_beta, file_name_beta = m.get_model(
                RP3betaRecommender.RECOMMENDER_NAME, training=self.submission)
            self.m_beta.loadModel(folder_path=folder_path_beta,
                                  file_name=file_name_beta)

            self.m_slim_elastic = SLIMElasticNetRecommender(self.URM_train)
            folder_path_elastic, file_name_elastic = m.get_model(
                SLIMElasticNetRecommender.RECOMMENDER_NAME,
                training=self.submission)
            self.m_slim_elastic.loadModel(folder_path=folder_path_elastic,
                                          file_name=file_name_elastic)

            # self.m_cfw = CFWBoostingRecommender(self.URM_train,self.ICM,Slim_mark2,training=self.submission)
            # fold, file = m.get_model(CFWBoostingRecommender.RECOMMENDER_NAME,training= self.submission)
            # self.m_cfw.loadModel(folder_path=fold,file_name=file)

            self.W_sparse_URM = check_matrix(self.m_user_knn_cf.W_sparse,
                                             "csr",
                                             dtype=np.float32)
            #print(self.W_sparse_URM.getrow(0).data)
            self.W_sparse_URM_T = check_matrix(self.m_item_knn_cf.W_sparse,
                                               "csr",
                                               dtype=np.float32)
            #print(self.W_sparse_URM_T.getrow(0).data)
            self.W_sparse_ICM = check_matrix(self.m_item_knn_cbf.W_sparse,
                                             "csr",
                                             dtype=np.float32)
            #print(self.W_sparse_ICM.getrow(0).data)
            self.W_sparse_Slim1 = check_matrix(self.m_slim_mark1.W,
                                               "csr",
                                               dtype=np.float32)
            #print(self.W_sparse_Slim1.getrow(0).data)
            self.W_sparse_Slim2 = check_matrix(self.m_slim_mark2.W_sparse,
                                               "csr",
                                               dtype=np.float32)
            #print(self.W_sparse_Slim2.getrow(0).data)
            self.W_sparse_alpha = check_matrix(self.m_alpha.W_sparse,
                                               "csr",
                                               dtype=np.float32)
            #print(self.W_sparse_alpha.getrow(0).data)
            self.W_sparse_beta = check_matrix(self.m_beta.W_sparse,
                                              "csr",
                                              dtype=np.float32)
            #print(self.W_sparse_beta.getrow(0).data)
            self.W_sparse_elastic = check_matrix(self.m_slim_elastic.W_sparse,
                                                 "csr",
                                                 dtype=np.float32)
            #print(self.W_sparse_elastic.getrow(0).data)
            #self.W_sparse_cfw = check_matrix(self.m_cfw.W_sparse,"csr",dtype=np.float32)
            # Precomputations
            self.matrix_wo_user = self.alpha * self.W_sparse_URM_T +\
                                  self.beta * self.W_sparse_ICM +\
                                  self.gamma * self.W_sparse_Slim1 +\
                                  self.delta * self.W_sparse_Slim2 +\
                                  self.epsilon * self.W_sparse_alpha +\
                                  self.zeta * self.W_sparse_beta + \
                                  self.eta * self.W_sparse_elastic #+ \
            #self.psi * self.W_sparse_cfw

            self.parameters = "alpha={}, beta={}, gamma={},delta={}, epsilon={}, zeta={}, eta={}, theta={}".format(
                self.alpha, self.beta, self.gamma, self.delta, self.epsilon,
                self.zeta, self.eta, self.theta)
        if save_model:
            self.saveModel("saved_models/" + location + "/",
                           file_name=self.RECOMMENDER_NAME)
Exemplo n.º 9
0
    def fit(self,
            alpha=0.0500226666668111,
            beta=0.9996482062853596,
            gamma=0.36595766622100967,
            theta=0.22879224932897924,
            omega=0.5940982982110466,
            normalize=False,
            save_model=False,
            submission=False,
            best_parameters=False):
        if best_parameters:
            m = OfflineDataLoader()
            folder_path, file_name = m.get_parameter(self.RECOMMENDER_NAME)
            self.loadModel(folder_path=folder_path, file_name=file_name)
        else:
            self.alpha = alpha
            self.beta = beta
            self.gamma = gamma
            self.theta = theta
            self.omega = omega
        self.normalize = normalize
        self.submission = not submission
        m = OfflineDataLoader()
        self.m_user_knn_cf = UserKNNCFRecommender(self.URM_train)
        folder_path_ucf, file_name_ucf = m.get_model(
            UserKNNCFRecommender.RECOMMENDER_NAME, training=self.submission)
        self.m_user_knn_cf.loadModel(folder_path=folder_path_ucf,
                                     file_name=file_name_ucf)

        self.m_item_knn_cf = ItemKNNCFRecommender(self.URM_train)
        folder_path_icf, file_name_icf = m.get_model(
            ItemKNNCFRecommender.RECOMMENDER_NAME, training=self.submission)
        self.m_item_knn_cf.loadModel(folder_path=folder_path_icf,
                                     file_name=file_name_icf)

        self.m_item_knn_cbf = ItemKNNCBFRecommender(self.URM_train, self.ICM)
        folder_path_icbf, file_name_icbf = m.get_model(
            ItemKNNCBFRecommender.RECOMMENDER_NAME, training=self.submission)
        self.m_item_knn_cbf.loadModel(folder_path=folder_path_icbf,
                                      file_name=file_name_icbf)

        self.m_slim_mark1 = Slim_mark1(self.URM_train)
        folder_path_slim, file_name_slim = m.get_model(
            Slim_mark1.RECOMMENDER_NAME, training=self.submission)
        self.m_slim_mark1.loadModel(folder_path=folder_path_slim,
                                    file_name=file_name_slim)

        self.m_alpha = P3alphaRecommender(self.URM_train)
        folder_path_alpha, file_name_alpha = m.get_model(
            P3alphaRecommender.RECOMMENDER_NAME, training=self.submission)
        self.m_alpha.loadModel(folder_path=folder_path_alpha,
                               file_name=file_name_alpha)

        self.m_beta = RP3betaRecommender(self.URM_train)
        folder_path_beta, file_name_beta = m.get_model(
            RP3betaRecommender.RECOMMENDER_NAME, training=self.submission)
        self.m_beta.loadModel(folder_path=folder_path_beta,
                              file_name=file_name_beta)

        self.W_sparse_URM = check_matrix(self.m_user_knn_cf.W_sparse,
                                         "csr",
                                         dtype=np.float32)
        self.W_sparse_ICM = check_matrix(self.m_item_knn_cbf.W_sparse,
                                         "csr",
                                         dtype=np.float32)
        self.W_sparse_URM_T = check_matrix(self.m_item_knn_cf.W_sparse,
                                           "csr",
                                           dtype=np.float32)
        self.W_sparse_Slim = check_matrix(self.m_slim_mark1.W,
                                          "csr",
                                          dtype=np.float32)
        self.W_sparse_alpha = check_matrix(self.m_alpha.W_sparse,
                                           "csr",
                                           dtype=np.float32)
        self.W_sparse_beta = check_matrix(self.m_beta.W_sparse,
                                          "csr",
                                          dtype=np.float32)
        # Precomputations
        self.matrix_first_branch = self.alpha * self.W_sparse_ICM + (
            1 - self.alpha) * self.W_sparse_Slim
        self.matrix_right = self.beta * self.matrix_first_branch + (
            1 - self.beta) * self.W_sparse_URM_T
        self.matrix_alpha_beta = self.gamma * self.W_sparse_alpha + (
            1 - self.gamma) * self.W_sparse_beta

        self.parameters = "alpha={}, beta={}, gamma={}, omega={}, theta={}".format(
            self.alpha, self.beta, self.gamma, self.omega, self.theta)
        if save_model:
            self.saveModel("saved_models/submission/",
                           file_name="ItemTreeRecommender_offline")
    def fit(self,
            alpha=0.1,
            beta=0.1,
            gamma=0.1,
            theta=0.1,
            delta=0.1,
            epsilon=0.1,
            normalize=False,
            save_model=False,
            submission=False,
            best_parameters=False,
            location="submission"):
        if best_parameters:
            m = OfflineDataLoader()
            folder_path, file_name = m.get_parameter(self.RECOMMENDER_NAME)
            self.loadModel(folder_path=folder_path, file_name=file_name)
        else:
            self.alpha = alpha
            self.beta = beta
            self.gamma = gamma
            self.theta = theta
            self.delta = delta
            self.epsilon = epsilon

        self.normalize = normalize
        self.submission = not submission
        m = OfflineDataLoader()
        self.m_party = PartyRecommender_offline(self.URM_train)
        folder_path_ucf, file_name_ucf = m.get_model(
            PartyRecommender_offline.RECOMMENDER_NAME,
            training=self.submission)
        self.m_party.loadModel(folder_path=folder_path_ucf,
                               file_name=file_name_ucf)

        self.m_pyramid = PyramidRecommender_offline(self.URM_train)
        folder_path_icf, file_name_icf = m.get_model(
            PyramidRecommender_offline.RECOMMENDER_NAME,
            training=self.submission)
        self.m_pyramid.loadModel(folder_path=folder_path_icf,
                                 file_name=file_name_icf)

        self.m_pyitem = PyramidItemTreeRecommender_offline(
            self.URM_train, self.ICM)
        folder_path_slim, file_name_slim = m.get_model(
            PyramidItemTreeRecommender_offline.RECOMMENDER_NAME,
            training=self.submission)
        self.m_pyitem.loadModel(folder_path=folder_path_slim,
                                file_name=file_name_slim)

        self.m_8 = HybridEightRecommender_offline(self.URM_train, self.ICM)
        folder_path_alpha, file_name_alpha = m.get_model(
            HybridEightRecommender_offline.RECOMMENDER_NAME,
            training=self.submission)
        self.m_8.loadModel(folder_path=folder_path_alpha,
                           file_name=file_name_alpha)

        self.m_sn = SingleNeuronRecommender_offline(self.URM_train, self.ICM)
        folder_path_alpha, file_name_alpha = m.get_model(
            SingleNeuronRecommender_offline.RECOMMENDER_NAME,
            training=self.submission)
        self.m_sn.loadModel(folder_path=folder_path_alpha,
                            file_name=file_name_alpha)

        self.m_cfw = CFWBoostingRecommender(self.URM_train,
                                            self.ICM,
                                            Slim_mark2,
                                            training=self.submission)
        fold, file = m.get_model(CFWBoostingRecommender.RECOMMENDER_NAME,
                                 training=self.submission)
        self.m_cfw.loadModel(folder_path=fold, file_name=file)

        self.parameters = "alpha={}, beta={}, gamma={}, theta={},delta={} ".format(
            self.alpha, self.beta, self.gamma, self.theta, self.delta)
        if save_model:
            self.saveModel("saved_models/" + location + "/",
                           file_name=self.RECOMMENDER_NAME)
Exemplo n.º 11
0
    def extract_models(self, dataReader, submission=False):
        print(
            "Configurator: The models are being extracted from the config file"
        )
        recsys = list()
        models = list(self.configs.models)
        data = dataReader.get_URM_train()
        if submission:
            data = dataReader.get_URM_all()
        for model in models:
            # User Collaborative Filtering with KNN
            if model["model_name"] == "user_knn_cf":
                recsys.append(
                    UserKNNCFRecommender(
                        data, sparse_weights=model["sparse_weights"]))
            # Item Collaborative Filtering with KNN
            elif model["model_name"] == "item_knn_cf":
                recsys.append(
                    ItemKNNCFRecommender(
                        data, sparse_weights=model["sparse_weights"]))
            # Item Content Based Filtering with KNN
            elif model["model_name"] == "item_knn_cbf":
                recsys.append(
                    ItemKNNCBFRecommender(
                        data,
                        dataReader.get_ICM(),
                        sparse_weights=model["sparse_weights"]))
            # Slim BPR with Python
            elif model["model_name"] == "slim_bpr_python":
                recsys.append(
                    Slim_BPR_Recommender_Python(
                        data,
                        positive_threshold=model["positive_threshold"],
                        sparse_weights=model["sparse_weights"]))
            # Slim BPR with Cython Extension
            elif model["model_name"] == "slim_bpr_mark1":
                recsys.append(
                    Slim_mark1(data,
                               positive_threshold=model["positive_threshold"],
                               recompile_cython=model["recompile_cython"],
                               symmetric=model["symmetric"]))
            elif model["model_name"] == "slim_bpr_mark2":
                recsys.append(
                    Slim_mark2(data,
                               positive_threshold=model["positive_threshold"],
                               recompile_cython=model["recompile_cython"],
                               symmetric=model["symmetric"]))
            # Funk SVD Recommender
            elif model["model_name"] == "funksvd":
                recsys.append(FunkSVD(data))

            elif model["model_name"] == "asysvd":
                recsys.append(AsySVD(data))
            elif model["model_name"] == "puresvd":
                recsys.append(PureSVDRecommender(data))

            elif model["model_name"] == "mf_bpr_cython":
                recsys.append(
                    MF_BPR_Cython(data,
                                  recompile_cython=model["recompile_cython"]))
            elif model["model_name"] == "mf_cython":
                recsys.append(
                    MatrixFactorization_Cython(
                        data,
                        positive_threshold=model["positive_threshold"],
                        URM_validation=dataReader.get_URM_test(),
                        recompile_cython=model["recompile_cython"],
                        algorithm=model["algorithm"]))
            elif model["model_name"] == "ials_numpy":
                recsys.append(IALS_numpy())
            elif model["model_name"] == "bprmf":
                recsys.append(BPRMF())
            elif model["model_name"] == "user_item_avg":
                recsys.append(
                    UserItemAvgRecommender(
                        data,
                        dataReader.get_UCM(),
                        dataReader.get_ICM(),
                        sparse_weights=model["sparse_weights"],
                        verbose=model["verbose"],
                        similarity_mode=model["similarity_mode"],
                        normalize=model["normalize"],
                        alpha=model["alpha"]))

            elif model["model_name"] == "2levelhybrid":
                recsys.append(
                    TwoLevelHybridRecommender(
                        data,
                        dataReader.get_UCM(),
                        dataReader.get_ICM(),
                        sparse_weights=model["sparse_weights"],
                        verbose=model["verbose"],
                        similarity_mode=model["similarity_mode"],
                        normalize=model["normalize"],
                        alpha=model["alpha"],
                        avg=model["avg"]))

            elif model["model_name"] == "seqrand":
                recsys.append(
                    SeqRandRecommender(
                        data,
                        dataReader.get_URM_train_tfidf(),
                        dataReader.get_UCM(),
                        dataReader.get_ICM(),
                        dataReader.get_target_playlists_seq(),
                        sparse_weights=model["sparse_weights"],
                        verbose=model["verbose"],
                        similarity_mode=model["similarity_mode"],
                        normalize=model["normalize"],
                        alpha=model["alpha"],
                        beta=model["beta"],
                        gamma=model["gamma"]))

            elif model["model_name"] == "itemtree":
                recsys.append(
                    ItemTreeRecommender(
                        data,
                        dataReader.get_URM_train_okapi(),
                        dataReader.get_ICM(),
                        sparse_weights=model["sparse_weights"]))

            elif model["model_name"] == "itemtree_offline":
                recsys.append(
                    ItemTreeRecommender_offline(data, dataReader.get_ICM()))

            elif model["model_name"] == "slim":
                recsys.append(
                    Slim(data,
                         sparse_weights=model["sparse_weights"],
                         normalize=model["normalize"]))

            elif model["model_name"] == "p3alpha":
                recsys.append(P3alphaRecommender(data))
            elif model["model_name"] == "rp3beta":
                recsys.append(RP3betaRecommender(data))
            elif model["model_name"] == "slim_elastic":
                recsys.append(SLIMElasticNetRecommender(data))
            elif model["model_name"] == "party":
                recsys.append(PartyRecommender_offline(data))
            elif model["model_name"] == "pyramid":
                recsys.append(PyramidRecommender_offline(data))
            elif model["model_name"] == "pyramid_item_tree":
                recsys.append(
                    PyramidItemTreeRecommender_offline(data,
                                                       dataReader.get_ICM()))
            elif model["model_name"] == "hybrid_eight":
                recsys.append(
                    HybridEightRecommender_offline(data, dataReader.get_ICM()))
            elif model["model_name"] == "combo":
                recsys.append(
                    ComboRecommender_offline(data, dataReader.get_ICM()))
            elif model["model_name"] == "neuron":
                recsys.append(
                    SingleNeuronRecommender_offline(data,
                                                    dataReader.get_ICM()))
            elif model["model_name"] == "cfw":
                m = OfflineDataLoader()
                #fold,file = m.get_model(Slim_mark2.RECOMMENDER_NAME,training=True)
                m1 = Slim_mark2(data)
                #m1.loadModel(folder_path=fold,file_name=file)
                recsys.append(
                    CFWBoostingRecommender(data, dataReader.get_ICM(),
                                           Slim_mark2))
        print("Configurator: Models are extracted")

        return recsys
    def fit(self,
            alpha=0.0029711141561171717,
            beta=0.9694720669481413,
            gamma=0.9635187725527589,
            theta=0.09930388487311004,
            omega=0.766047309541692,
            coeff = 5.4055892529064735,
            normalize=False,
            save_model=False,
            submission=False,
            best_parameters=False,
            location="submission"):
        if best_parameters:
            m = OfflineDataLoader()
            folder_path, file_name = m.get_parameter(self.RECOMMENDER_NAME)
            self.loadModel(folder_path=folder_path, file_name=file_name)
        else:
            self.alpha = alpha
            self.beta = beta
            self.gamma = gamma
            self.theta = theta
            self.omega = omega
            self.coeff = coeff


        self.normalize = normalize
        self.submission = not submission
        m = OfflineDataLoader()
        self.m_user_knn_cf = UserKNNCFRecommender(self.URM_train)
        folder_path_ucf, file_name_ucf = m.get_model(UserKNNCFRecommender.RECOMMENDER_NAME, training=self.submission)
        self.m_user_knn_cf.loadModel(folder_path=folder_path_ucf, file_name=file_name_ucf)

        self.m_item_knn_cf = ItemKNNCFRecommender(self.URM_train)
        folder_path_icf, file_name_icf = m.get_model(ItemKNNCFRecommender.RECOMMENDER_NAME, training=self.submission)
        self.m_item_knn_cf.loadModel(folder_path=folder_path_icf, file_name=file_name_icf)

        self.m_slim_mark2 = Slim_mark2(self.URM_train)
        folder_path_slim, file_name_slim = m.get_model(Slim_mark2.RECOMMENDER_NAME, training=self.submission)
        self.m_slim_mark2.loadModel(folder_path=folder_path_slim, file_name=file_name_slim)

        self.m_alpha = P3alphaRecommender(self.URM_train)
        folder_path_alpha, file_name_alpha = m.get_model(P3alphaRecommender.RECOMMENDER_NAME, training=self.submission)
        self.m_alpha.loadModel(folder_path=folder_path_alpha, file_name=file_name_alpha)

        self.m_beta = RP3betaRecommender(self.URM_train)
        folder_path_beta, file_name_beta = m.get_model(RP3betaRecommender.RECOMMENDER_NAME, training=self.submission)
        self.m_beta.loadModel(folder_path=folder_path_beta, file_name=file_name_beta)

        self.m_slim_elastic = SLIMElasticNetRecommender(self.URM_train)
        folder_path_elastic, file_name_elastic = m.get_model(SLIMElasticNetRecommender.RECOMMENDER_NAME,
                                                             training=self.submission)
        self.m_slim_elastic.loadModel(folder_path=folder_path_elastic, file_name=file_name_elastic)

        self.W_sparse_URM = check_matrix(self.m_user_knn_cf.W_sparse, "csr", dtype=np.float32)
        self.W_sparse_URM_T = check_matrix(self.m_item_knn_cf.W_sparse, "csr", dtype=np.float32)
        self.W_sparse_Slim = check_matrix(self.m_slim_mark2.W_sparse, "csr", dtype=np.float32)
        self.W_sparse_alpha = check_matrix(self.m_alpha.W_sparse, "csr", dtype=np.float32)
        self.W_sparse_beta = check_matrix(self.m_beta.W_sparse, "csr", dtype=np.float32)
        self.W_sparse_elastic = check_matrix(self.m_slim_elastic.W_sparse, "csr", dtype=np.float32)
        # Precomputations
        self.matrix_alpha_beta = self.alpha * self.W_sparse_alpha + (1 - self.alpha) * self.W_sparse_beta
        self.matrix_level1 = self.beta * self.W_sparse_Slim + (1 - self.beta) * self.W_sparse_URM_T

        self.parameters = "alpha={}, beta={}, gamma={}, theta={}, omega={}, coeff={}".format(self.alpha, self.beta, self.gamma,
                                                                                   self.theta, self.omega, self.coeff)
        if save_model:
            self.saveModel("saved_models/"+location+"/", file_name=self.RECOMMENDER_NAME)
    def fit(self,
            epochs=100,
            logFile=None,
            batch_size=1000,
            lambda_i=1e-4,
            lambda_j=1e-4,
            learning_rate=0.025,
            topK=200,
            sgd_mode='adagrad',
            gamma=0.995,
            beta_1=0.9,
            beta_2=0.999,
            stop_on_validation=False,
            lower_validatons_allowed=5,
            validation_metric="MAP",
            evaluator_object=None,
            validation_every_n=1,
            save_model=False,
            best_parameters=False,
            offline=True,
            submission=False):
        self.parameters = "epochs={0}, batch_size={1}, lambda_i={2}, lambda_j={3}, learning_rate={4}, topK={5}, sgd_mode={6" \
                            "}, gamma={7}, beta_1={8}, beta_2={9},".format(epochs,batch_size,lambda_i,lambda_j,
                                                                        learning_rate,topK,sgd_mode,gamma,beta_1,beta_2)
        if offline:
            m = OfflineDataLoader()
            folder, file = m.get_model(self.RECOMMENDER_NAME,
                                       training=(not submission))
            self.loadModel(folder_path=folder, file_name=file)
        else:
            # Import compiled module
            from models.Slim_mark2.Cython.SLIM_BPR_Cython_Epoch import SLIM_BPR_Cython_Epoch
            # Select only positive interactions
            URM_train_positive = self.URM_train.copy()
            URM_train_positive.data = URM_train_positive.data >= self.positive_threshold
            URM_train_positive.eliminate_zeros()

            self.sgd_mode = sgd_mode
            self.epochs = epochs

            self.cythonEpoch = SLIM_BPR_Cython_Epoch(
                self.URM_mask,
                train_with_sparse_weights=self.train_with_sparse_weights,
                final_model_sparse_weights=self.sparse_weights,
                topK=topK,
                learning_rate=learning_rate,
                li_reg=lambda_i,
                lj_reg=lambda_j,
                batch_size=1,
                symmetric=self.symmetric,
                sgd_mode=sgd_mode,
                gamma=gamma,
                beta_1=beta_1,
                beta_2=beta_2)

            if (topK != False and topK < 1):
                raise ValueError(
                    "TopK not valid. Acceptable values are either False or a positive integer value. Provided value was '{}'"
                    .format(topK))
            self.topK = topK

            if validation_every_n is not None:
                self.validation_every_n = validation_every_n
            else:
                self.validation_every_n = np.inf

            if evaluator_object is None and stop_on_validation:
                evaluator_object = SequentialEvaluator(self.URM_validation,
                                                       [10])

            self.batch_size = batch_size
            self.lambda_i = lambda_i
            self.lambda_j = lambda_j
            self.learning_rate = learning_rate

            self._train_with_early_stopping(
                epochs,
                validation_every_n,
                stop_on_validation,
                validation_metric,
                lower_validatons_allowed,
                evaluator_object,
                algorithm_name=self.RECOMMENDER_NAME)

            self.get_S_incremental_and_set_W()

            sys.stdout.flush()
        if save_model:
            self.saveModel("saved_models/submission/",
                           file_name=self.RECOMMENDER_NAME)
    def fit(self,
            l1_ratio=0.1,
            positive_only=True,
            topK=400,
            save_model=False,
            best_parameters=False,
            offline=False,
            submission=False):
        self.parameters = "l1_ratio= {}, topK= {},alpha= {},tol= {},max_iter= {}".format(
            l1_ratio, topK, 0.0001, 1e-4, 100)
        if offline:
            m = OfflineDataLoader()
            folder, file = m.get_model(self.RECOMMENDER_NAME,
                                       training=(not submission))
            self.loadModel(folder_path=folder, file_name=file)
        else:

            assert l1_ratio >= 0 and l1_ratio <= 1, "SLIM_ElasticNet: l1_ratio must be between 0 and 1, provided value was {}".format(
                l1_ratio)

            self.l1_ratio = l1_ratio
            self.positive_only = positive_only
            self.topK = topK

            # initialize the ElasticNet model
            self.model = ElasticNet(alpha=0.0001,
                                    l1_ratio=self.l1_ratio,
                                    positive=self.positive_only,
                                    fit_intercept=False,
                                    copy_X=False,
                                    precompute=True,
                                    selection='random',
                                    max_iter=100,
                                    tol=1e-4)

            URM_train = check_matrix(self.URM_train, 'csc', dtype=np.float32)
            n_items = URM_train.shape[1]
            # Use array as it reduces memory requirements compared to lists
            dataBlock = 10000000
            rows = np.zeros(dataBlock, dtype=np.int32)
            cols = np.zeros(dataBlock, dtype=np.int32)
            values = np.zeros(dataBlock, dtype=np.float32)
            numCells = 0
            start_time = time.time()
            start_time_printBatch = start_time

            # fit each item's factors sequentially (not in parallel)
            for currentItem in tqdm(range(n_items)):
                # get the target column
                y = URM_train[:, currentItem].toarray()
                # set the j-th column of X to zero
                start_pos = URM_train.indptr[currentItem]
                end_pos = URM_train.indptr[currentItem + 1]
                current_item_data_backup = URM_train.data[
                    start_pos:end_pos].copy()
                URM_train.data[start_pos:end_pos] = 0.0
                # fit one ElasticNet model per column
                self.model.fit(URM_train, y)
                nonzero_model_coef_index = self.model.sparse_coef_.indices
                nonzero_model_coef_value = self.model.sparse_coef_.data
                local_topK = min(len(nonzero_model_coef_value) - 1, self.topK)
                relevant_items_partition = (
                    -nonzero_model_coef_value
                ).argpartition(local_topK)[0:local_topK]
                relevant_items_partition_sorting = np.argsort(
                    -nonzero_model_coef_value[relevant_items_partition])
                ranking = relevant_items_partition[
                    relevant_items_partition_sorting]

                for index in range(len(ranking)):
                    if numCells == len(rows):
                        rows = np.concatenate(
                            (rows, np.zeros(dataBlock, dtype=np.int32)))
                        cols = np.concatenate(
                            (cols, np.zeros(dataBlock, dtype=np.int32)))
                        values = np.concatenate(
                            (values, np.zeros(dataBlock, dtype=np.float32)))
                    rows[numCells] = nonzero_model_coef_index[ranking[index]]
                    cols[numCells] = currentItem
                    values[numCells] = nonzero_model_coef_value[ranking[index]]
                    numCells += 1
                # finally, replace the original values of the j-th column
                URM_train.data[start_pos:end_pos] = current_item_data_backup

                if time.time(
                ) - start_time_printBatch > 300 or currentItem == n_items - 1:
                    print(
                        "Processed {} ( {:.2f}% ) in {:.2f} minutes. Items per second: {:.0f}"
                        .format(
                            currentItem + 1,
                            100.0 * float(currentItem + 1) / n_items,
                            (time.time() - start_time) / 60,
                            float(currentItem) / (time.time() - start_time)))
                    sys.stdout.flush()
                    sys.stderr.flush()
                    start_time_printBatch = time.time()

            # generate the sparse weight matrix
            self.W_sparse = sps.csr_matrix(
                (values[:numCells], (rows[:numCells], cols[:numCells])),
                shape=(n_items, n_items),
                dtype=np.float32)
        if save_model:
            self.saveModel("saved_models/submission/",
                           file_name=self.RECOMMENDER_NAME)
    def fit(self,
            topK=400,
            shrink=200,
            similarity='cosine',
            feature_weighting="BM25",
            normalize=True,
            save_model=False,
            best_parameters=False,
            offline=False,
            submission=False,
            location="submission",
            **similarity_args):
        #similarity_args = {'tversky_alpha': 0.8047100184165605, 'tversky_beta': 1.9775806370926445}
        #self.feature_weighting = feature_weighting
        if offline:
            m = OfflineDataLoader()
            folder_path_icf, file_name_icf = m.get_model(
                self.RECOMMENDER_NAME, training=(not submission))
            self.loadModel(folder_path=folder_path_icf,
                           file_name=file_name_icf)
        else:
            if best_parameters:
                m = OfflineDataLoader()
                folder_path_icf, file_name_icf = m.get_parameter(
                    self.RECOMMENDER_NAME)
                self.loadModel(folder_path=folder_path_icf,
                               file_name=file_name_icf)
                #similarity_args = {'normalize': True, 'shrink': 0, 'similarity': 'tversky', 'topK': 20, 'tversky_alpha': 0.18872151621891953, 'tversky_beta': 1.99102432161935}
                similarity_args = {
                    'feature_weighting': 'BM25',
                    'normalize': True,
                    'shrink': 200,
                    'similarity': 'cosine',
                    'topK': 400
                }
                if self.feature_weighting == "none":
                    pass
                if self.feature_weighting == "BM25":
                    self.URM_train_copy = self.URM_train.astype(np.float32)
                    self.URM_train_copy = to_okapi(self.URM_train)

                elif self.feature_weighting == "TF-IDF":
                    self.URM_train_copy = self.URM_train.astype(np.float32)
                    self.URM_train_copy = to_tfidf(self.URM_train)
                similarity = Compute_Similarity(self.URM_train_copy,
                                                **similarity_args)
            else:
                self.topK = topK
                self.shrink = shrink
                self.feature_weighting = feature_weighting
                if self.feature_weighting == "BM25":
                    self.URM_train_copy = self.URM_train.astype(np.float32)
                    self.URM_train_copy = to_okapi(self.URM_train)

                elif self.feature_weighting == "TF-IDF":
                    self.URM_train_copy = self.URM_train.astype(np.float32)
                    self.URM_train_copy = to_tfidf(self.URM_train)
                if self.feature_weighting == "none":
                    similarity = Compute_Similarity(self.URM_train,
                                                    shrink=shrink,
                                                    topK=topK,
                                                    normalize=normalize,
                                                    similarity=similarity,
                                                    **similarity_args)
                else:
                    similarity = Compute_Similarity(self.URM_train_copy,
                                                    shrink=shrink,
                                                    topK=topK,
                                                    normalize=normalize,
                                                    similarity=similarity,
                                                    **similarity_args)
            self.parameters = "sparse_weights= {0}, similarity= {1}, shrink= {2}, neighbourhood={3}, normalize={4}".format(
                self.sparse_weights, similarity, shrink, topK, normalize)
            if self.sparse_weights:
                self.W_sparse = similarity.compute_similarity()
            else:
                self.W = similarity.compute_similarity()
                self.W = self.W.toarray()
        if save_model:
            self.saveModel("saved_models/" + location + "/",
                           file_name=self.RECOMMENDER_NAME + "_" + location +
                           "_model")
Exemplo n.º 16
0
    def fit(self,
            alpha=0.80849266253816,
            beta=0.7286503831547066,
            gamma=0.02895704968752022,
            sigma=0.453342,
            tau=0.542421,
            chi=1.8070865821028037,
            psi=4.256005405227253,
            omega=5.096018341419944,
            coeff=39.966898886531645,
            normalize=False,
            save_model=False,
            submission=False,
            best_parameters=False,
            offline=False,
            location="submission"):
        if offline:
            m = OfflineDataLoader()
            folder_path, file_name = m.get_model(self.RECOMMENDER_NAME)
            self.loadModel(folder_path=folder_path, file_name=file_name)
        else:
            if best_parameters:
                m = OfflineDataLoader()
                folder_path, file_name = m.get_parameter(self.RECOMMENDER_NAME)
                self.loadModel(folder_path=folder_path, file_name=file_name)
            else:
                self.alpha = alpha
                self.beta = beta
                self.gamma = gamma
                self.sigma = sigma
                self.tau = tau
                self.chi = chi
                self.psi = psi
                self.omega = omega
                self.coeff = coeff

            self.normalize = normalize
            self.submission = not submission
            m = OfflineDataLoader()
            self.m_user_knn_cf = UserKNNCFRecommender(self.URM_train)
            folder_path_ucf, file_name_ucf = m.get_model(
                UserKNNCFRecommender.RECOMMENDER_NAME,
                training=self.submission)
            self.m_user_knn_cf.loadModel(folder_path=folder_path_ucf,
                                         file_name=file_name_ucf)

            self.m_item_knn_cf = ItemKNNCFRecommender(self.URM_train)
            folder_path_icf, file_name_icf = m.get_model(
                ItemKNNCFRecommender.RECOMMENDER_NAME,
                training=self.submission)
            self.m_item_knn_cf.loadModel(folder_path=folder_path_icf,
                                         file_name=file_name_icf)

            self.m_item_knn_cbf = ItemKNNCBFRecommender(
                self.URM_train, self.ICM)
            folder_path_icf, file_name_icf = m.get_model(
                ItemKNNCBFRecommender.RECOMMENDER_NAME,
                training=self.submission)
            self.m_item_knn_cbf.loadModel(folder_path=folder_path_icf,
                                          file_name=file_name_icf)

            self.m_slim_mark1 = Slim_mark1(self.URM_train)
            folder_path_slim, file_name_slim = m.get_model(
                Slim_mark1.RECOMMENDER_NAME, training=self.submission)
            self.m_slim_mark1.loadModel(folder_path=folder_path_slim,
                                        file_name=file_name_slim)

            self.m_slim_mark2 = Slim_mark2(self.URM_train)
            folder_path_slim, file_name_slim = m.get_model(
                Slim_mark2.RECOMMENDER_NAME, training=self.submission)
            self.m_slim_mark2.loadModel(folder_path=folder_path_slim,
                                        file_name=file_name_slim)

            self.m_alpha = P3alphaRecommender(self.URM_train)
            folder_path_alpha, file_name_alpha = m.get_model(
                P3alphaRecommender.RECOMMENDER_NAME, training=self.submission)
            self.m_alpha.loadModel(folder_path=folder_path_alpha,
                                   file_name=file_name_alpha)

            self.m_beta = RP3betaRecommender(self.URM_train)
            folder_path_beta, file_name_beta = m.get_model(
                RP3betaRecommender.RECOMMENDER_NAME, training=self.submission)
            self.m_beta.loadModel(folder_path=folder_path_beta,
                                  file_name=file_name_beta)

            self.m_slim_elastic = SLIMElasticNetRecommender(self.URM_train)
            folder_path_elastic, file_name_elastic = m.get_model(
                SLIMElasticNetRecommender.RECOMMENDER_NAME,
                training=self.submission)
            self.m_slim_elastic.loadModel(folder_path=folder_path_elastic,
                                          file_name=file_name_elastic)

            self.W_sparse_URM = check_matrix(self.m_user_knn_cf.W_sparse,
                                             "csr",
                                             dtype=np.float32)
            #print(self.W_sparse_URM.getrow(0).data)
            self.W_sparse_URM_T = check_matrix(self.m_item_knn_cf.W_sparse,
                                               "csr",
                                               dtype=np.float32)
            #print(self.W_sparse_URM_T.getrow(0).data)
            self.W_sparse_ICM = check_matrix(self.m_item_knn_cbf.W_sparse,
                                             "csr",
                                             dtype=np.float32)
            #print(self.W_sparse_ICM.getrow(0).data)
            self.W_sparse_Slim1 = check_matrix(self.m_slim_mark1.W,
                                               "csr",
                                               dtype=np.float32)
            #print(self.W_sparse_Slim1.getrow(0).data)
            self.W_sparse_Slim2 = check_matrix(self.m_slim_mark2.W_sparse,
                                               "csr",
                                               dtype=np.float32)
            #print(self.W_sparse_Slim2.getrow(0).data)
            self.W_sparse_alpha = check_matrix(self.m_alpha.W_sparse,
                                               "csr",
                                               dtype=np.float32)
            #print(self.W_sparse_alpha.getrow(0).data)
            self.W_sparse_beta = check_matrix(self.m_beta.W_sparse,
                                              "csr",
                                              dtype=np.float32)
            #print(self.W_sparse_beta.getrow(0).data)
            self.W_sparse_elastic = check_matrix(self.m_slim_elastic.W_sparse,
                                                 "csr",
                                                 dtype=np.float32)
            #print(self.W_sparse_elastic.getrow(0).data)
            # Precomputations
            #TODO
            self.matrix_alpha_beta = self.alpha * self.W_sparse_alpha + (
                1 - self.alpha) * self.W_sparse_beta
            self.matrix_slim = self.beta * self.W_sparse_Slim2 + (
                (1 - self.beta) * self.W_sparse_elastic *
                self.coeff) + self.sigma * self.W_sparse_Slim1

            self.parameters = "alpha={}, beta={}, gamma={},sigma={}, tau={}, chi={}, psi={}, omega={}, coeff={}".format(
                self.alpha, self.beta, self.gamma, self.sigma, self.tau,
                self.chi, self.psi, self.omega, self.coeff)
        if save_model:
            self.saveModel("saved_models/" + location + "/",
                           file_name=self.RECOMMENDER_NAME)
    def fit(self, topK=100, alpha=1., min_rating=0, implicit=False, normalize_similarity=False,save_model=False,best_parameters=False,location="training"):
        if best_parameters:
            m = OfflineDataLoader()
            folder_alpha, file_alpha = m.get_parameter(self.RECOMMENDER_NAME)
            self.loadModel(folder_path=folder_alpha,file_name=file_alpha)
        else:
            self.topK = topK
            self.alpha = alpha
            self.normalize_similarity = normalize_similarity

        self.min_rating = min_rating
        self.implicit = implicit
        self.parameters = "alpha={}, min_rating={}, topk={}, implicit={}, normalize_similarity={})".format(self.alpha,
                                                                            self.min_rating, self.topK, self.implicit,
                                                                            self.normalize_similarity)

        if self.min_rating > 0:
            self.URM_train.data[self.URM_train.data < self.min_rating] = 0
            self.URM_train.eliminate_zeros()
            if self.implicit:
                self.URM_train.data = np.ones(self.URM_train.data.size, dtype=np.float32)

        #Pui is the row-normalized urm
        Pui = normalize(self.URM_train, norm='l1', axis=1)

        #Piu is the column-normalized, "boolean" urm transposed
        X_bool = self.URM_train.transpose(copy=True)
        X_bool.data = np.ones(X_bool.data.size, np.float32)
        #ATTENTION: axis is still 1 because i transposed before the normalization
        Piu = normalize(X_bool, norm='l1', axis=1)
        del(X_bool)

        # Alfa power
        if self.alpha != 1.:
            Pui = Pui.power(self.alpha)
            Piu = Piu.power(self.alpha)

        # Final matrix is computed as Pui * Piu * Pui
        # Multiplication unpacked for memory usage reasons
        block_dim = 200
        d_t = Piu

        # Use array as it reduces memory requirements compared to lists
        dataBlock = 10000000

        rows = np.zeros(dataBlock, dtype=np.int32)
        cols = np.zeros(dataBlock, dtype=np.int32)
        values = np.zeros(dataBlock, dtype=np.float32)

        numCells = 0


        start_time = time.time()
        start_time_printBatch = start_time

        for current_block_start_row in range(0, Pui.shape[1], block_dim):

            if current_block_start_row + block_dim > Pui.shape[1]:
                block_dim = Pui.shape[1] - current_block_start_row

            similarity_block = d_t[current_block_start_row:current_block_start_row + block_dim, :] * Pui
            similarity_block = similarity_block.toarray()

            for row_in_block in range(block_dim):
                row_data = similarity_block[row_in_block, :]
                row_data[current_block_start_row + row_in_block] = 0

                best = row_data.argsort()[::-1][:self.topK]

                notZerosMask = row_data[best] != 0.0

                values_to_add = row_data[best][notZerosMask]
                cols_to_add = best[notZerosMask]

                for index in range(len(values_to_add)):

                    if numCells == len(rows):
                        rows = np.concatenate((rows, np.zeros(dataBlock, dtype=np.int32)))
                        cols = np.concatenate((cols, np.zeros(dataBlock, dtype=np.int32)))
                        values = np.concatenate((values, np.zeros(dataBlock, dtype=np.float32)))


                    rows[numCells] = current_block_start_row + row_in_block
                    cols[numCells] = cols_to_add[index]
                    values[numCells] = values_to_add[index]

                    numCells += 1


            if time.time() - start_time_printBatch > 60:
                print("Processed {} ( {:.2f}% ) in {:.2f} minutes. Rows per second: {:.0f}".format(
                    current_block_start_row,
                    100.0 * float(current_block_start_row) / Pui.shape[1],
                    (time.time() - start_time) / 60,
                    float(current_block_start_row) / (time.time() - start_time)))

                sys.stdout.flush()
                sys.stderr.flush()

                start_time_printBatch = time.time()

        self.W_sparse = sps.coo_matrix((values[:numCells], (rows[:numCells], cols[:numCells])), shape=(Pui.shape[1], Pui.shape[1]))
        self.W_sparse = check_matrix(self.W_sparse,"csr",dtype=np.float32)


        if self.normalize_similarity:
            self.W_sparse = normalize(self.W_sparse, norm='l1', axis=1)


        if self.topK != False:
            self.W_sparse = similarityMatrixTopK(self.W_sparse, forceSparseOutput = True, k=self.topK)
            self.sparse_weights = True

        if save_model:
            self.saveModel("saved_models/" +location+"/",file_name=self.RECOMMENDER_NAME+ "_"+location+"_model")