示例#1
0
    def __init__(self, URM_train, ICM, target_model, training=True):

        super(CFWBoostingRecommender, self).__init__()
        if (URM_train.shape[1] != ICM.shape[0]):
            raise ValueError(
                "Number of items not consistent. URM contains {} but ICM contains {}"
                .format(URM_train.shape[1], ICM.shape[0]))
        # if(S_matrix_target.shape[0] != S_matrix_target.shape[1]):
        #     raise ValueError("Items imilarity matrix is not square: rows are {}, columns are {}".format(S_matrix_target.shape[0],
        #                                                                                                 S_matrix_target.shape[1]))
        # if(S_matrix_target.shape[0] != ICM.shape[0]):
        #     raise ValueError("Number of items not consistent. S_matrix contains {} but ICM contains {}".format(S_matrix_target.shape[0],
        #                                                                                                   ICM.shape[0]))

        self.URM_train = check_matrix(URM_train, 'csr')
        self.ICM = check_matrix(ICM, 'csr')
        m = OfflineDataLoader()
        fold, file = m.get_model(target_model.RECOMMENDER_NAME,
                                 training=training)
        m1 = target_model(self.URM_train)
        print(m1.RECOMMENDER_NAME)
        m1.loadModel(folder_path=fold, file_name=file)
        self.S_matrix_target = check_matrix(m1.W_sparse, 'csr')
        self.n_items = self.URM_train.shape[1]
        self.n_users = self.URM_train.shape[0]
        self.n_features = self.ICM.shape[1]
        self.sparse_weights = True
示例#2
0
    def fit(self,
            show_max_performance=False,
            loss_tolerance=1e-6,
            iteration_limit=50000,
            damp_coeff=0.0,
            topK=800,
            add_zeros_quota=0.9744535193088417,
            normalize_similarity=False,
            save_model=True,
            best_parameters=False,
            offline=False,
            location="training",
            submission=False):
        if offline:
            m = OfflineDataLoader()
            folder_path, file_name = m.get_model(self.RECOMMENDER_NAME,
                                                 training=not submission)
            self.loadModel(folder_path=folder_path, file_name=file_name)
        else:
            if best_parameters:
                m = OfflineDataLoader()
                folder_path, file_name = m.get_parameter(self.RECOMMENDER_NAME)
                self.loadModel(folder_path=folder_path, file_name=file_name)
            else:
                self.normalize_similarity = normalize_similarity

                self.add_zeros_quota = add_zeros_quota
                self.topK = topK

            self._generateTrainData_low_ram()

            commonFeatures = self.ICM[self.row_list].multiply(
                self.ICM[self.col_list])

            linalg_result = linalg.lsqr(commonFeatures,
                                        self.data_list,
                                        show=False,
                                        atol=loss_tolerance,
                                        btol=loss_tolerance,
                                        iter_lim=iteration_limit,
                                        damp=damp_coeff)

            # res = linalg.lsmr(commonFeatures, self.data_list, show = False, atol=loss_tolerance, btol=loss_tolerance,
            #                   maxiter = iteration_limit, damp=damp_coeff)

            self.D_incremental = linalg_result[0].copy()
            self.D_best = linalg_result[0].copy()
            self.epochs_best = 0

            self.loss = linalg_result[3]

            self._compute_W_sparse()
        if save_model:
            self.saveModel("saved_models/" + location + "/",
                           file_name=(self.RECOMMENDER_NAME + "_" + location +
                                      "_model"))
示例#3
0
    def fit(self,
            alpha=0.80849266253816,
            beta=0.7286503831547066,
            gamma=0.02895704968752022,
            sigma=0.453342,
            tau=0.542421,
            chi=1.8070865821028037,
            psi=4.256005405227253,
            omega=5.096018341419944,
            coeff=39.966898886531645,
            normalize=False,
            save_model=False,
            submission=False,
            best_parameters=False,
            offline=False,
            location="submission"):
        if offline:
            m = OfflineDataLoader()
            folder_path, file_name = m.get_model(self.RECOMMENDER_NAME)
            self.loadModel(folder_path=folder_path, file_name=file_name)
        else:
            if best_parameters:
                m = OfflineDataLoader()
                folder_path, file_name = m.get_parameter(self.RECOMMENDER_NAME)
                self.loadModel(folder_path=folder_path, file_name=file_name)
            else:
                self.alpha = alpha
                self.beta = beta
                self.gamma = gamma
                self.sigma = sigma
                self.tau = tau
                self.chi = chi
                self.psi = psi
                self.omega = omega
                self.coeff = coeff

            self.normalize = normalize
            self.submission = not submission
            m = OfflineDataLoader()
            self.m_user_knn_cf = UserKNNCFRecommender(self.URM_train)
            folder_path_ucf, file_name_ucf = m.get_model(
                UserKNNCFRecommender.RECOMMENDER_NAME,
                training=self.submission)
            self.m_user_knn_cf.loadModel(folder_path=folder_path_ucf,
                                         file_name=file_name_ucf)

            self.m_item_knn_cf = ItemKNNCFRecommender(self.URM_train)
            folder_path_icf, file_name_icf = m.get_model(
                ItemKNNCFRecommender.RECOMMENDER_NAME,
                training=self.submission)
            self.m_item_knn_cf.loadModel(folder_path=folder_path_icf,
                                         file_name=file_name_icf)

            self.m_item_knn_cbf = ItemKNNCBFRecommender(
                self.URM_train, self.ICM)
            folder_path_icf, file_name_icf = m.get_model(
                ItemKNNCBFRecommender.RECOMMENDER_NAME,
                training=self.submission)
            self.m_item_knn_cbf.loadModel(folder_path=folder_path_icf,
                                          file_name=file_name_icf)

            self.m_slim_mark1 = Slim_mark1(self.URM_train)
            folder_path_slim, file_name_slim = m.get_model(
                Slim_mark1.RECOMMENDER_NAME, training=self.submission)
            self.m_slim_mark1.loadModel(folder_path=folder_path_slim,
                                        file_name=file_name_slim)

            self.m_slim_mark2 = Slim_mark2(self.URM_train)
            folder_path_slim, file_name_slim = m.get_model(
                Slim_mark2.RECOMMENDER_NAME, training=self.submission)
            self.m_slim_mark2.loadModel(folder_path=folder_path_slim,
                                        file_name=file_name_slim)

            self.m_alpha = P3alphaRecommender(self.URM_train)
            folder_path_alpha, file_name_alpha = m.get_model(
                P3alphaRecommender.RECOMMENDER_NAME, training=self.submission)
            self.m_alpha.loadModel(folder_path=folder_path_alpha,
                                   file_name=file_name_alpha)

            self.m_beta = RP3betaRecommender(self.URM_train)
            folder_path_beta, file_name_beta = m.get_model(
                RP3betaRecommender.RECOMMENDER_NAME, training=self.submission)
            self.m_beta.loadModel(folder_path=folder_path_beta,
                                  file_name=file_name_beta)

            self.m_slim_elastic = SLIMElasticNetRecommender(self.URM_train)
            folder_path_elastic, file_name_elastic = m.get_model(
                SLIMElasticNetRecommender.RECOMMENDER_NAME,
                training=self.submission)
            self.m_slim_elastic.loadModel(folder_path=folder_path_elastic,
                                          file_name=file_name_elastic)

            self.W_sparse_URM = check_matrix(self.m_user_knn_cf.W_sparse,
                                             "csr",
                                             dtype=np.float32)
            #print(self.W_sparse_URM.getrow(0).data)
            self.W_sparse_URM_T = check_matrix(self.m_item_knn_cf.W_sparse,
                                               "csr",
                                               dtype=np.float32)
            #print(self.W_sparse_URM_T.getrow(0).data)
            self.W_sparse_ICM = check_matrix(self.m_item_knn_cbf.W_sparse,
                                             "csr",
                                             dtype=np.float32)
            #print(self.W_sparse_ICM.getrow(0).data)
            self.W_sparse_Slim1 = check_matrix(self.m_slim_mark1.W,
                                               "csr",
                                               dtype=np.float32)
            #print(self.W_sparse_Slim1.getrow(0).data)
            self.W_sparse_Slim2 = check_matrix(self.m_slim_mark2.W_sparse,
                                               "csr",
                                               dtype=np.float32)
            #print(self.W_sparse_Slim2.getrow(0).data)
            self.W_sparse_alpha = check_matrix(self.m_alpha.W_sparse,
                                               "csr",
                                               dtype=np.float32)
            #print(self.W_sparse_alpha.getrow(0).data)
            self.W_sparse_beta = check_matrix(self.m_beta.W_sparse,
                                              "csr",
                                              dtype=np.float32)
            #print(self.W_sparse_beta.getrow(0).data)
            self.W_sparse_elastic = check_matrix(self.m_slim_elastic.W_sparse,
                                                 "csr",
                                                 dtype=np.float32)
            #print(self.W_sparse_elastic.getrow(0).data)
            # Precomputations
            #TODO
            self.matrix_alpha_beta = self.alpha * self.W_sparse_alpha + (
                1 - self.alpha) * self.W_sparse_beta
            self.matrix_slim = self.beta * self.W_sparse_Slim2 + (
                (1 - self.beta) * self.W_sparse_elastic *
                self.coeff) + self.sigma * self.W_sparse_Slim1

            self.parameters = "alpha={}, beta={}, gamma={},sigma={}, tau={}, chi={}, psi={}, omega={}, coeff={}".format(
                self.alpha, self.beta, self.gamma, self.sigma, self.tau,
                self.chi, self.psi, self.omega, self.coeff)
        if save_model:
            self.saveModel("saved_models/" + location + "/",
                           file_name=self.RECOMMENDER_NAME)
    def fit(self,
            l1_ratio=0.1,
            positive_only=True,
            topK=400,
            save_model=False,
            best_parameters=False,
            offline=False,
            submission=False):
        self.parameters = "l1_ratio= {}, topK= {},alpha= {},tol= {},max_iter= {}".format(
            l1_ratio, topK, 0.0001, 1e-4, 100)
        if offline:
            m = OfflineDataLoader()
            folder, file = m.get_model(self.RECOMMENDER_NAME,
                                       training=(not submission))
            self.loadModel(folder_path=folder, file_name=file)
        else:

            assert l1_ratio >= 0 and l1_ratio <= 1, "SLIM_ElasticNet: l1_ratio must be between 0 and 1, provided value was {}".format(
                l1_ratio)

            self.l1_ratio = l1_ratio
            self.positive_only = positive_only
            self.topK = topK

            # initialize the ElasticNet model
            self.model = ElasticNet(alpha=0.0001,
                                    l1_ratio=self.l1_ratio,
                                    positive=self.positive_only,
                                    fit_intercept=False,
                                    copy_X=False,
                                    precompute=True,
                                    selection='random',
                                    max_iter=100,
                                    tol=1e-4)

            URM_train = check_matrix(self.URM_train, 'csc', dtype=np.float32)
            n_items = URM_train.shape[1]
            # Use array as it reduces memory requirements compared to lists
            dataBlock = 10000000
            rows = np.zeros(dataBlock, dtype=np.int32)
            cols = np.zeros(dataBlock, dtype=np.int32)
            values = np.zeros(dataBlock, dtype=np.float32)
            numCells = 0
            start_time = time.time()
            start_time_printBatch = start_time

            # fit each item's factors sequentially (not in parallel)
            for currentItem in tqdm(range(n_items)):
                # get the target column
                y = URM_train[:, currentItem].toarray()
                # set the j-th column of X to zero
                start_pos = URM_train.indptr[currentItem]
                end_pos = URM_train.indptr[currentItem + 1]
                current_item_data_backup = URM_train.data[
                    start_pos:end_pos].copy()
                URM_train.data[start_pos:end_pos] = 0.0
                # fit one ElasticNet model per column
                self.model.fit(URM_train, y)
                nonzero_model_coef_index = self.model.sparse_coef_.indices
                nonzero_model_coef_value = self.model.sparse_coef_.data
                local_topK = min(len(nonzero_model_coef_value) - 1, self.topK)
                relevant_items_partition = (
                    -nonzero_model_coef_value
                ).argpartition(local_topK)[0:local_topK]
                relevant_items_partition_sorting = np.argsort(
                    -nonzero_model_coef_value[relevant_items_partition])
                ranking = relevant_items_partition[
                    relevant_items_partition_sorting]

                for index in range(len(ranking)):
                    if numCells == len(rows):
                        rows = np.concatenate(
                            (rows, np.zeros(dataBlock, dtype=np.int32)))
                        cols = np.concatenate(
                            (cols, np.zeros(dataBlock, dtype=np.int32)))
                        values = np.concatenate(
                            (values, np.zeros(dataBlock, dtype=np.float32)))
                    rows[numCells] = nonzero_model_coef_index[ranking[index]]
                    cols[numCells] = currentItem
                    values[numCells] = nonzero_model_coef_value[ranking[index]]
                    numCells += 1
                # finally, replace the original values of the j-th column
                URM_train.data[start_pos:end_pos] = current_item_data_backup

                if time.time(
                ) - start_time_printBatch > 300 or currentItem == n_items - 1:
                    print(
                        "Processed {} ( {:.2f}% ) in {:.2f} minutes. Items per second: {:.0f}"
                        .format(
                            currentItem + 1,
                            100.0 * float(currentItem + 1) / n_items,
                            (time.time() - start_time) / 60,
                            float(currentItem) / (time.time() - start_time)))
                    sys.stdout.flush()
                    sys.stderr.flush()
                    start_time_printBatch = time.time()

            # generate the sparse weight matrix
            self.W_sparse = sps.csr_matrix(
                (values[:numCells], (rows[:numCells], cols[:numCells])),
                shape=(n_items, n_items),
                dtype=np.float32)
        if save_model:
            self.saveModel("saved_models/submission/",
                           file_name=self.RECOMMENDER_NAME)
示例#5
0
def read_data_split_and_search():
    """
    This function provides a simple example on how to tune parameters of a given algorithm

    The BayesianSearch object will save:
        - A .txt file with all the cases explored and the recommendation quality
        - A _best_model file which contains the trained model and can be loaded with recommender.load_model()
        - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter)
        - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation
        - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set
    """
    clear()
    dataReader = PlaylistDataReader()
    dataReader.generate_datasets()
    URM_train = dataReader.get_URM_train()
    # URM_validation = dataReader.get_URM_validation()
    URM_test = dataReader.get_URM_test()
    ICM = dataReader.get_ICM()
    output_root_path = "tuned_parameters"
    m = OfflineDataLoader()
    fold, fil = m.get_model(ItemKNNCFRecommender.RECOMMENDER_NAME,
                            training=True)
    m1 = ItemKNNCFRecommender(URM_train, ICM)
    m1.loadModel(folder_path=fold, file_name=fil)
    W_sparse_CF = m1.W_sparse

    # If directory does not exist, create
    if not os.path.exists(output_root_path):
        os.makedirs(output_root_path)

    collaborative_algorithm_list = [
        #P3alphaRecommender,
        #RP3betaRecommender,
        #ItemKNNCFRecommender,
        #UserKNNCFRecommender,
        # MatrixFactorization_BPR_Cython,
        # MatrixFactorization_FunkSVD_Cython,
        # PureSVDRecommender,
        # Slim_mark1,
        # Slim_mark2,
        # ItemTreeRecommender_offline
        # SLIMElasticNetRecommender,
        # PartyRecommender_offline
        # PyramidRecommender_offline
        #  ItemKNNCBFRecommender
        # PyramidItemTreeRecommender_offline
        #HybridEightRecommender_offline
        #ComboRecommender_offline
        SingleNeuronRecommender_offline
        # CFWBoostingRecommender
    ]

    from parameter_tuning.AbstractClassSearch import EvaluatorWrapper
    from base.evaluation.Evaluator import SequentialEvaluator

    evaluator_validation_earlystopping = SequentialEvaluator(URM_test,
                                                             cutoff_list=[10])
    evaluator_test = SequentialEvaluator(URM_test, cutoff_list=[10])

    evaluator_validation = EvaluatorWrapper(evaluator_validation_earlystopping)
    evaluator_test = EvaluatorWrapper(evaluator_test)

    runParameterSearch_Collaborative_partial = partial(
        runParameterSearch_Collaborative,
        URM_train=URM_train,
        ICM=ICM,
        W_sparse_CF=W_sparse_CF,
        metric_to_optimize="MAP",
        evaluator_validation_earlystopping=evaluator_validation_earlystopping,
        evaluator_validation=evaluator_validation,
        evaluator_test=evaluator_test,
        n_cases=250,
        output_root_path=output_root_path)

    for recommender_class in collaborative_algorithm_list:
        try:
            runParameterSearch_Collaborative_partial(recommender_class)
        except Exception as e:
            print("On recommender {} Exception {}".format(
                recommender_class, str(e)))
            traceback.print_exc()
    def fit(self,
            topK=175,
            shrink=400,
            similarity="asymmetric",
            normalize=True,
            feature_weighting="BM25",
            save_model=False,
            best_parameters=False,
            location="training",
            submission=False,
            offline=False,
            **similarity_args):
        if offline:
            m = OfflineDataLoader()
            folder_path_icf, file_name_icf = m.get_model(
                self.RECOMMENDER_NAME, training=(not submission))
            self.loadModel(folder_path=folder_path_icf,
                           file_name=file_name_icf)
        else:
            if best_parameters:
                m = OfflineDataLoader()
                folder_path_ucf, file_name_ucf = m.get_parameter(
                    self.RECOMMENDER_NAME)
                self.loadModel(folder_path=folder_path_ucf,
                               file_name=file_name_ucf)
                if self.feature_weighting == "none":
                    similarity = Compute_Similarity(self.URM_train.T,
                                                    **similarity_args)
                else:
                    if feature_weighting == "BM25":
                        self.URM_train_copy = self.URM_train.astype(np.float32)
                        self.URM_train_copy = to_okapi(self.URM_train)

                    elif feature_weighting == "TF-IDF":
                        self.URM_train_copy = self.URM_train.astype(np.float32)
                        self.URM_train_copy = to_tfidf(self.URM_train)
                    similarity_args = {
                        'asymmetric_alpha': 0.11483114799990246,
                        'normalize': True,
                        'shrink': 450,
                        'similarity': 'asymmetric',
                        'topK': 200
                    }
                    similarity = Compute_Similarity(self.URM_train_copy.T,
                                                    **similarity_args)
            else:
                self.topK = topK
                self.shrink = shrink
                self.feature_weighting = feature_weighting
                similarity_args = {'asymmetric_alpha': 0.0033404951135529437}
                if self.feature_weighting == "BM25":
                    self.URM_train_copy = self.URM_train.astype(np.float32)
                    self.URM_train_copy = to_okapi(self.URM_train)

                elif self.feature_weighting == "TF-IDF":
                    self.URM_train_copy = self.URM_train.astype(np.float32)
                    self.URM_train_copy = to_tfidf(self.URM_train)

                if self.feature_weighting == "none":
                    similarity = Compute_Similarity(self.URM_train.T,
                                                    shrink=shrink,
                                                    topK=topK,
                                                    normalize=normalize,
                                                    similarity=similarity,
                                                    **similarity_args)
                else:
                    similarity = Compute_Similarity(self.URM_train_copy.T,
                                                    shrink=shrink,
                                                    topK=topK,
                                                    normalize=normalize,
                                                    similarity=similarity,
                                                    **similarity_args)


            self.parameters = "sparse_weights= {0}, similarity= {1}, shrink= {2}, neighbourhood={3}, " \
                            "normalize= {4}".format(self.sparse_weights, similarity, shrink, topK, normalize)

            if self.sparse_weights:
                self.W_sparse = similarity.compute_similarity()
            else:
                self.W = similarity.compute_similarity()
                self.W = self.W.toarray()
        if save_model:
            self.saveModel("saved_models/submission/",
                           file_name=self.RECOMMENDER_NAME + "_" + location +
                           "_model")
    def fit(self, epochs=50,
            URM_test=None,
            filterTopPop=False,
            minRatingsPerUser=1,
            batch_size=1000,
            validate_every_N_epochs=1,
            start_validation_after_N_epochs=0,
            lambda_i=1e-4,
            lambda_j=1e-4,
            learning_rate=0.020,
            topK=500,
            sgd_mode='adagrad',
            save_model = False,
            best_parameters=False,
            offline=True,submission=False):
        self.parameters = "positive_threshold= {0}, sparse_weights= {1}, symmetric= {2},sgd_mode= {3}, lambda_i={4}, " \
                          "lambda_j={5}, learning_rate={6}, topK={7}, epochs= {8}".format(
        self.positive_threshold,self.sparse_weights,self.symmetric,self.sgd_mode,lambda_i,lambda_j,learning_rate,topK,epochs)
        if offline:
            m = OfflineDataLoader()
            folder, file = m.get_model(self.RECOMMENDER_NAME, training=(not submission))
            self.loadModel(folder_path=folder,file_name=file)
        else:
            self.save_model = save_model
            # Select only positive interactions
            URM_train_positive = self.URM_train.copy()

            URM_train_positive.data = URM_train_positive.data >= self.positive_threshold
            URM_train_positive.eliminate_zeros()

            if best_parameters:
                m = OfflineDataLoader()
                folder_slim, file_slim = m.get_parameter(self.RECOMMENDER_NAME)
                self.loadModel(folder_path=folder_slim,file_name=file_slim)
                self.cythonEpoch = Slim_BPR_Cython_Epoch(
                    self.URM_mask,
                    sparse_weights=self.sparse_weights,
                    learning_rate=learning_rate,
                    batch_size=1,
                    symmetric=self.symmetric)
                result = super(Slim_BPR_Recommender_Cython, self).fit_alreadyInitialized(
                    epochs=epochs,
                    URM_test=URM_test,
                    filterTopPop=filterTopPop,
                    minRatingsPerUser=minRatingsPerUser,
                    batch_size=batch_size,
                    validate_every_N_epochs=validate_every_N_epochs,
                    start_validation_after_N_epochs=start_validation_after_N_epochs)

            else:
                self.sgd_mode = sgd_mode
                self.cythonEpoch = Slim_BPR_Cython_Epoch(
                    self.URM_mask,
                    sparse_weights=self.sparse_weights,
                    topK=topK,
                    learning_rate=learning_rate,
                    li_reg=lambda_i,
                    lj_reg=lambda_j,
                    batch_size=1,
                    symmetric=self.symmetric,
                    sgd_mode=sgd_mode)
                result = super(Slim_BPR_Recommender_Cython, self).fit_alreadyInitialized(
                    epochs=epochs,
                    URM_test=URM_test,
                    filterTopPop=filterTopPop,
                    minRatingsPerUser=minRatingsPerUser,
                    batch_size=batch_size,
                    validate_every_N_epochs=validate_every_N_epochs,
                    start_validation_after_N_epochs=start_validation_after_N_epochs,
                    lambda_i=lambda_i,
                    lambda_j=lambda_j,
                    learning_rate=learning_rate,
                    topK=topK)
                return result

        if save_model:
            self.saveModel("saved_models/submission/",file_name="SLIM_BPR_Recommender_mark1_submission_model")
        return self.W
示例#8
0
    def fit(
            self,
            alpha=1.3167219260598073,
            beta=15.939928536132701,
            gamma=0.6048873602128846,
            delta=1.0527588765188267,
            epsilon=2.08444591782293,
            zeta=1.2588273098979674,
            eta=18.41012777389885,
            theta=18.000293943452448,
            #    psi = 0.00130805010990942,
            normalize=False,
            save_model=False,
            submission=False,
            best_parameters=False,
            offline=False,
            location="submission"):
        if offline:
            m = OfflineDataLoader()
            folder_path, file_name = m.get_model(self.RECOMMENDER_NAME)
            self.loadModel(folder_path=folder_path, file_name=file_name)
        else:
            if best_parameters:
                m = OfflineDataLoader()
                folder_path, file_name = m.get_parameter(self.RECOMMENDER_NAME)
                self.loadModel(folder_path=folder_path, file_name=file_name)
            else:
                self.alpha = alpha
                self.beta = beta
                self.gamma = gamma
                self.delta = delta
                self.epsilon = epsilon
                self.zeta = zeta
                self.eta = eta
                self.theta = theta
        #       self.psi = psi

            self.normalize = normalize
            self.submission = not submission
            m = OfflineDataLoader()
            self.m_user_knn_cf = UserKNNCFRecommender(self.URM_train)
            folder_path_ucf, file_name_ucf = m.get_model(
                UserKNNCFRecommender.RECOMMENDER_NAME,
                training=self.submission)
            self.m_user_knn_cf.loadModel(folder_path=folder_path_ucf,
                                         file_name=file_name_ucf)

            self.m_item_knn_cf = ItemKNNCFRecommender(self.URM_train)
            folder_path_icf, file_name_icf = m.get_model(
                ItemKNNCFRecommender.RECOMMENDER_NAME,
                training=self.submission)
            self.m_item_knn_cf.loadModel(folder_path=folder_path_icf,
                                         file_name=file_name_icf)

            self.m_item_knn_cbf = ItemKNNCBFRecommender(
                self.URM_train, self.ICM)
            folder_path_icf, file_name_icf = m.get_model(
                ItemKNNCBFRecommender.RECOMMENDER_NAME,
                training=self.submission)
            self.m_item_knn_cbf.loadModel(folder_path=folder_path_icf,
                                          file_name=file_name_icf)

            self.m_slim_mark1 = Slim_mark1(self.URM_train)
            folder_path_slim, file_name_slim = m.get_model(
                Slim_mark1.RECOMMENDER_NAME, training=self.submission)
            self.m_slim_mark1.loadModel(folder_path=folder_path_slim,
                                        file_name=file_name_slim)

            self.m_slim_mark2 = Slim_mark2(self.URM_train)
            folder_path_slim, file_name_slim = m.get_model(
                Slim_mark2.RECOMMENDER_NAME, training=self.submission)
            self.m_slim_mark2.loadModel(folder_path=folder_path_slim,
                                        file_name=file_name_slim)

            self.m_alpha = P3alphaRecommender(self.URM_train)
            folder_path_alpha, file_name_alpha = m.get_model(
                P3alphaRecommender.RECOMMENDER_NAME, training=self.submission)
            self.m_alpha.loadModel(folder_path=folder_path_alpha,
                                   file_name=file_name_alpha)

            self.m_beta = RP3betaRecommender(self.URM_train)
            folder_path_beta, file_name_beta = m.get_model(
                RP3betaRecommender.RECOMMENDER_NAME, training=self.submission)
            self.m_beta.loadModel(folder_path=folder_path_beta,
                                  file_name=file_name_beta)

            self.m_slim_elastic = SLIMElasticNetRecommender(self.URM_train)
            folder_path_elastic, file_name_elastic = m.get_model(
                SLIMElasticNetRecommender.RECOMMENDER_NAME,
                training=self.submission)
            self.m_slim_elastic.loadModel(folder_path=folder_path_elastic,
                                          file_name=file_name_elastic)

            # self.m_cfw = CFWBoostingRecommender(self.URM_train,self.ICM,Slim_mark2,training=self.submission)
            # fold, file = m.get_model(CFWBoostingRecommender.RECOMMENDER_NAME,training= self.submission)
            # self.m_cfw.loadModel(folder_path=fold,file_name=file)

            self.W_sparse_URM = check_matrix(self.m_user_knn_cf.W_sparse,
                                             "csr",
                                             dtype=np.float32)
            #print(self.W_sparse_URM.getrow(0).data)
            self.W_sparse_URM_T = check_matrix(self.m_item_knn_cf.W_sparse,
                                               "csr",
                                               dtype=np.float32)
            #print(self.W_sparse_URM_T.getrow(0).data)
            self.W_sparse_ICM = check_matrix(self.m_item_knn_cbf.W_sparse,
                                             "csr",
                                             dtype=np.float32)
            #print(self.W_sparse_ICM.getrow(0).data)
            self.W_sparse_Slim1 = check_matrix(self.m_slim_mark1.W,
                                               "csr",
                                               dtype=np.float32)
            #print(self.W_sparse_Slim1.getrow(0).data)
            self.W_sparse_Slim2 = check_matrix(self.m_slim_mark2.W_sparse,
                                               "csr",
                                               dtype=np.float32)
            #print(self.W_sparse_Slim2.getrow(0).data)
            self.W_sparse_alpha = check_matrix(self.m_alpha.W_sparse,
                                               "csr",
                                               dtype=np.float32)
            #print(self.W_sparse_alpha.getrow(0).data)
            self.W_sparse_beta = check_matrix(self.m_beta.W_sparse,
                                              "csr",
                                              dtype=np.float32)
            #print(self.W_sparse_beta.getrow(0).data)
            self.W_sparse_elastic = check_matrix(self.m_slim_elastic.W_sparse,
                                                 "csr",
                                                 dtype=np.float32)
            #print(self.W_sparse_elastic.getrow(0).data)
            #self.W_sparse_cfw = check_matrix(self.m_cfw.W_sparse,"csr",dtype=np.float32)
            # Precomputations
            self.matrix_wo_user = self.alpha * self.W_sparse_URM_T +\
                                  self.beta * self.W_sparse_ICM +\
                                  self.gamma * self.W_sparse_Slim1 +\
                                  self.delta * self.W_sparse_Slim2 +\
                                  self.epsilon * self.W_sparse_alpha +\
                                  self.zeta * self.W_sparse_beta + \
                                  self.eta * self.W_sparse_elastic #+ \
            #self.psi * self.W_sparse_cfw

            self.parameters = "alpha={}, beta={}, gamma={},delta={}, epsilon={}, zeta={}, eta={}, theta={}".format(
                self.alpha, self.beta, self.gamma, self.delta, self.epsilon,
                self.zeta, self.eta, self.theta)
        if save_model:
            self.saveModel("saved_models/" + location + "/",
                           file_name=self.RECOMMENDER_NAME)
示例#9
0
    def fit(self,
            alpha=0.0500226666668111,
            beta=0.9996482062853596,
            gamma=0.36595766622100967,
            theta=0.22879224932897924,
            omega=0.5940982982110466,
            normalize=False,
            save_model=False,
            submission=False,
            best_parameters=False):
        if best_parameters:
            m = OfflineDataLoader()
            folder_path, file_name = m.get_parameter(self.RECOMMENDER_NAME)
            self.loadModel(folder_path=folder_path, file_name=file_name)
        else:
            self.alpha = alpha
            self.beta = beta
            self.gamma = gamma
            self.theta = theta
            self.omega = omega
        self.normalize = normalize
        self.submission = not submission
        m = OfflineDataLoader()
        self.m_user_knn_cf = UserKNNCFRecommender(self.URM_train)
        folder_path_ucf, file_name_ucf = m.get_model(
            UserKNNCFRecommender.RECOMMENDER_NAME, training=self.submission)
        self.m_user_knn_cf.loadModel(folder_path=folder_path_ucf,
                                     file_name=file_name_ucf)

        self.m_item_knn_cf = ItemKNNCFRecommender(self.URM_train)
        folder_path_icf, file_name_icf = m.get_model(
            ItemKNNCFRecommender.RECOMMENDER_NAME, training=self.submission)
        self.m_item_knn_cf.loadModel(folder_path=folder_path_icf,
                                     file_name=file_name_icf)

        self.m_item_knn_cbf = ItemKNNCBFRecommender(self.URM_train, self.ICM)
        folder_path_icbf, file_name_icbf = m.get_model(
            ItemKNNCBFRecommender.RECOMMENDER_NAME, training=self.submission)
        self.m_item_knn_cbf.loadModel(folder_path=folder_path_icbf,
                                      file_name=file_name_icbf)

        self.m_slim_mark1 = Slim_mark1(self.URM_train)
        folder_path_slim, file_name_slim = m.get_model(
            Slim_mark1.RECOMMENDER_NAME, training=self.submission)
        self.m_slim_mark1.loadModel(folder_path=folder_path_slim,
                                    file_name=file_name_slim)

        self.m_alpha = P3alphaRecommender(self.URM_train)
        folder_path_alpha, file_name_alpha = m.get_model(
            P3alphaRecommender.RECOMMENDER_NAME, training=self.submission)
        self.m_alpha.loadModel(folder_path=folder_path_alpha,
                               file_name=file_name_alpha)

        self.m_beta = RP3betaRecommender(self.URM_train)
        folder_path_beta, file_name_beta = m.get_model(
            RP3betaRecommender.RECOMMENDER_NAME, training=self.submission)
        self.m_beta.loadModel(folder_path=folder_path_beta,
                              file_name=file_name_beta)

        self.W_sparse_URM = check_matrix(self.m_user_knn_cf.W_sparse,
                                         "csr",
                                         dtype=np.float32)
        self.W_sparse_ICM = check_matrix(self.m_item_knn_cbf.W_sparse,
                                         "csr",
                                         dtype=np.float32)
        self.W_sparse_URM_T = check_matrix(self.m_item_knn_cf.W_sparse,
                                           "csr",
                                           dtype=np.float32)
        self.W_sparse_Slim = check_matrix(self.m_slim_mark1.W,
                                          "csr",
                                          dtype=np.float32)
        self.W_sparse_alpha = check_matrix(self.m_alpha.W_sparse,
                                           "csr",
                                           dtype=np.float32)
        self.W_sparse_beta = check_matrix(self.m_beta.W_sparse,
                                          "csr",
                                          dtype=np.float32)
        # Precomputations
        self.matrix_first_branch = self.alpha * self.W_sparse_ICM + (
            1 - self.alpha) * self.W_sparse_Slim
        self.matrix_right = self.beta * self.matrix_first_branch + (
            1 - self.beta) * self.W_sparse_URM_T
        self.matrix_alpha_beta = self.gamma * self.W_sparse_alpha + (
            1 - self.gamma) * self.W_sparse_beta

        self.parameters = "alpha={}, beta={}, gamma={}, omega={}, theta={}".format(
            self.alpha, self.beta, self.gamma, self.omega, self.theta)
        if save_model:
            self.saveModel("saved_models/submission/",
                           file_name="ItemTreeRecommender_offline")
    def fit(self,
            topK=400,
            shrink=200,
            similarity='cosine',
            feature_weighting="BM25",
            normalize=True,
            save_model=False,
            best_parameters=False,
            offline=False,
            submission=False,
            location="submission",
            **similarity_args):
        #similarity_args = {'tversky_alpha': 0.8047100184165605, 'tversky_beta': 1.9775806370926445}
        #self.feature_weighting = feature_weighting
        if offline:
            m = OfflineDataLoader()
            folder_path_icf, file_name_icf = m.get_model(
                self.RECOMMENDER_NAME, training=(not submission))
            self.loadModel(folder_path=folder_path_icf,
                           file_name=file_name_icf)
        else:
            if best_parameters:
                m = OfflineDataLoader()
                folder_path_icf, file_name_icf = m.get_parameter(
                    self.RECOMMENDER_NAME)
                self.loadModel(folder_path=folder_path_icf,
                               file_name=file_name_icf)
                #similarity_args = {'normalize': True, 'shrink': 0, 'similarity': 'tversky', 'topK': 20, 'tversky_alpha': 0.18872151621891953, 'tversky_beta': 1.99102432161935}
                similarity_args = {
                    'feature_weighting': 'BM25',
                    'normalize': True,
                    'shrink': 200,
                    'similarity': 'cosine',
                    'topK': 400
                }
                if self.feature_weighting == "none":
                    pass
                if self.feature_weighting == "BM25":
                    self.URM_train_copy = self.URM_train.astype(np.float32)
                    self.URM_train_copy = to_okapi(self.URM_train)

                elif self.feature_weighting == "TF-IDF":
                    self.URM_train_copy = self.URM_train.astype(np.float32)
                    self.URM_train_copy = to_tfidf(self.URM_train)
                similarity = Compute_Similarity(self.URM_train_copy,
                                                **similarity_args)
            else:
                self.topK = topK
                self.shrink = shrink
                self.feature_weighting = feature_weighting
                if self.feature_weighting == "BM25":
                    self.URM_train_copy = self.URM_train.astype(np.float32)
                    self.URM_train_copy = to_okapi(self.URM_train)

                elif self.feature_weighting == "TF-IDF":
                    self.URM_train_copy = self.URM_train.astype(np.float32)
                    self.URM_train_copy = to_tfidf(self.URM_train)
                if self.feature_weighting == "none":
                    similarity = Compute_Similarity(self.URM_train,
                                                    shrink=shrink,
                                                    topK=topK,
                                                    normalize=normalize,
                                                    similarity=similarity,
                                                    **similarity_args)
                else:
                    similarity = Compute_Similarity(self.URM_train_copy,
                                                    shrink=shrink,
                                                    topK=topK,
                                                    normalize=normalize,
                                                    similarity=similarity,
                                                    **similarity_args)
            self.parameters = "sparse_weights= {0}, similarity= {1}, shrink= {2}, neighbourhood={3}, normalize={4}".format(
                self.sparse_weights, similarity, shrink, topK, normalize)
            if self.sparse_weights:
                self.W_sparse = similarity.compute_similarity()
            else:
                self.W = similarity.compute_similarity()
                self.W = self.W.toarray()
        if save_model:
            self.saveModel("saved_models/" + location + "/",
                           file_name=self.RECOMMENDER_NAME + "_" + location +
                           "_model")
    def fit(self,
            alpha=0.1,
            beta=0.1,
            gamma=0.1,
            theta=0.1,
            delta=0.1,
            epsilon=0.1,
            normalize=False,
            save_model=False,
            submission=False,
            best_parameters=False,
            location="submission"):
        if best_parameters:
            m = OfflineDataLoader()
            folder_path, file_name = m.get_parameter(self.RECOMMENDER_NAME)
            self.loadModel(folder_path=folder_path, file_name=file_name)
        else:
            self.alpha = alpha
            self.beta = beta
            self.gamma = gamma
            self.theta = theta
            self.delta = delta
            self.epsilon = epsilon

        self.normalize = normalize
        self.submission = not submission
        m = OfflineDataLoader()
        self.m_party = PartyRecommender_offline(self.URM_train)
        folder_path_ucf, file_name_ucf = m.get_model(
            PartyRecommender_offline.RECOMMENDER_NAME,
            training=self.submission)
        self.m_party.loadModel(folder_path=folder_path_ucf,
                               file_name=file_name_ucf)

        self.m_pyramid = PyramidRecommender_offline(self.URM_train)
        folder_path_icf, file_name_icf = m.get_model(
            PyramidRecommender_offline.RECOMMENDER_NAME,
            training=self.submission)
        self.m_pyramid.loadModel(folder_path=folder_path_icf,
                                 file_name=file_name_icf)

        self.m_pyitem = PyramidItemTreeRecommender_offline(
            self.URM_train, self.ICM)
        folder_path_slim, file_name_slim = m.get_model(
            PyramidItemTreeRecommender_offline.RECOMMENDER_NAME,
            training=self.submission)
        self.m_pyitem.loadModel(folder_path=folder_path_slim,
                                file_name=file_name_slim)

        self.m_8 = HybridEightRecommender_offline(self.URM_train, self.ICM)
        folder_path_alpha, file_name_alpha = m.get_model(
            HybridEightRecommender_offline.RECOMMENDER_NAME,
            training=self.submission)
        self.m_8.loadModel(folder_path=folder_path_alpha,
                           file_name=file_name_alpha)

        self.m_sn = SingleNeuronRecommender_offline(self.URM_train, self.ICM)
        folder_path_alpha, file_name_alpha = m.get_model(
            SingleNeuronRecommender_offline.RECOMMENDER_NAME,
            training=self.submission)
        self.m_sn.loadModel(folder_path=folder_path_alpha,
                            file_name=file_name_alpha)

        self.m_cfw = CFWBoostingRecommender(self.URM_train,
                                            self.ICM,
                                            Slim_mark2,
                                            training=self.submission)
        fold, file = m.get_model(CFWBoostingRecommender.RECOMMENDER_NAME,
                                 training=self.submission)
        self.m_cfw.loadModel(folder_path=fold, file_name=file)

        self.parameters = "alpha={}, beta={}, gamma={}, theta={},delta={} ".format(
            self.alpha, self.beta, self.gamma, self.theta, self.delta)
        if save_model:
            self.saveModel("saved_models/" + location + "/",
                           file_name=self.RECOMMENDER_NAME)
    def fit(self,
            alpha=0.0029711141561171717,
            beta=0.9694720669481413,
            gamma=0.9635187725527589,
            theta=0.09930388487311004,
            omega=0.766047309541692,
            coeff = 5.4055892529064735,
            normalize=False,
            save_model=False,
            submission=False,
            best_parameters=False,
            location="submission"):
        if best_parameters:
            m = OfflineDataLoader()
            folder_path, file_name = m.get_parameter(self.RECOMMENDER_NAME)
            self.loadModel(folder_path=folder_path, file_name=file_name)
        else:
            self.alpha = alpha
            self.beta = beta
            self.gamma = gamma
            self.theta = theta
            self.omega = omega
            self.coeff = coeff


        self.normalize = normalize
        self.submission = not submission
        m = OfflineDataLoader()
        self.m_user_knn_cf = UserKNNCFRecommender(self.URM_train)
        folder_path_ucf, file_name_ucf = m.get_model(UserKNNCFRecommender.RECOMMENDER_NAME, training=self.submission)
        self.m_user_knn_cf.loadModel(folder_path=folder_path_ucf, file_name=file_name_ucf)

        self.m_item_knn_cf = ItemKNNCFRecommender(self.URM_train)
        folder_path_icf, file_name_icf = m.get_model(ItemKNNCFRecommender.RECOMMENDER_NAME, training=self.submission)
        self.m_item_knn_cf.loadModel(folder_path=folder_path_icf, file_name=file_name_icf)

        self.m_slim_mark2 = Slim_mark2(self.URM_train)
        folder_path_slim, file_name_slim = m.get_model(Slim_mark2.RECOMMENDER_NAME, training=self.submission)
        self.m_slim_mark2.loadModel(folder_path=folder_path_slim, file_name=file_name_slim)

        self.m_alpha = P3alphaRecommender(self.URM_train)
        folder_path_alpha, file_name_alpha = m.get_model(P3alphaRecommender.RECOMMENDER_NAME, training=self.submission)
        self.m_alpha.loadModel(folder_path=folder_path_alpha, file_name=file_name_alpha)

        self.m_beta = RP3betaRecommender(self.URM_train)
        folder_path_beta, file_name_beta = m.get_model(RP3betaRecommender.RECOMMENDER_NAME, training=self.submission)
        self.m_beta.loadModel(folder_path=folder_path_beta, file_name=file_name_beta)

        self.m_slim_elastic = SLIMElasticNetRecommender(self.URM_train)
        folder_path_elastic, file_name_elastic = m.get_model(SLIMElasticNetRecommender.RECOMMENDER_NAME,
                                                             training=self.submission)
        self.m_slim_elastic.loadModel(folder_path=folder_path_elastic, file_name=file_name_elastic)

        self.W_sparse_URM = check_matrix(self.m_user_knn_cf.W_sparse, "csr", dtype=np.float32)
        self.W_sparse_URM_T = check_matrix(self.m_item_knn_cf.W_sparse, "csr", dtype=np.float32)
        self.W_sparse_Slim = check_matrix(self.m_slim_mark2.W_sparse, "csr", dtype=np.float32)
        self.W_sparse_alpha = check_matrix(self.m_alpha.W_sparse, "csr", dtype=np.float32)
        self.W_sparse_beta = check_matrix(self.m_beta.W_sparse, "csr", dtype=np.float32)
        self.W_sparse_elastic = check_matrix(self.m_slim_elastic.W_sparse, "csr", dtype=np.float32)
        # Precomputations
        self.matrix_alpha_beta = self.alpha * self.W_sparse_alpha + (1 - self.alpha) * self.W_sparse_beta
        self.matrix_level1 = self.beta * self.W_sparse_Slim + (1 - self.beta) * self.W_sparse_URM_T

        self.parameters = "alpha={}, beta={}, gamma={}, theta={}, omega={}, coeff={}".format(self.alpha, self.beta, self.gamma,
                                                                                   self.theta, self.omega, self.coeff)
        if save_model:
            self.saveModel("saved_models/"+location+"/", file_name=self.RECOMMENDER_NAME)
    def fit(self,
            epochs=100,
            logFile=None,
            batch_size=1000,
            lambda_i=1e-4,
            lambda_j=1e-4,
            learning_rate=0.025,
            topK=200,
            sgd_mode='adagrad',
            gamma=0.995,
            beta_1=0.9,
            beta_2=0.999,
            stop_on_validation=False,
            lower_validatons_allowed=5,
            validation_metric="MAP",
            evaluator_object=None,
            validation_every_n=1,
            save_model=False,
            best_parameters=False,
            offline=True,
            submission=False):
        self.parameters = "epochs={0}, batch_size={1}, lambda_i={2}, lambda_j={3}, learning_rate={4}, topK={5}, sgd_mode={6" \
                            "}, gamma={7}, beta_1={8}, beta_2={9},".format(epochs,batch_size,lambda_i,lambda_j,
                                                                        learning_rate,topK,sgd_mode,gamma,beta_1,beta_2)
        if offline:
            m = OfflineDataLoader()
            folder, file = m.get_model(self.RECOMMENDER_NAME,
                                       training=(not submission))
            self.loadModel(folder_path=folder, file_name=file)
        else:
            # Import compiled module
            from models.Slim_mark2.Cython.SLIM_BPR_Cython_Epoch import SLIM_BPR_Cython_Epoch
            # Select only positive interactions
            URM_train_positive = self.URM_train.copy()
            URM_train_positive.data = URM_train_positive.data >= self.positive_threshold
            URM_train_positive.eliminate_zeros()

            self.sgd_mode = sgd_mode
            self.epochs = epochs

            self.cythonEpoch = SLIM_BPR_Cython_Epoch(
                self.URM_mask,
                train_with_sparse_weights=self.train_with_sparse_weights,
                final_model_sparse_weights=self.sparse_weights,
                topK=topK,
                learning_rate=learning_rate,
                li_reg=lambda_i,
                lj_reg=lambda_j,
                batch_size=1,
                symmetric=self.symmetric,
                sgd_mode=sgd_mode,
                gamma=gamma,
                beta_1=beta_1,
                beta_2=beta_2)

            if (topK != False and topK < 1):
                raise ValueError(
                    "TopK not valid. Acceptable values are either False or a positive integer value. Provided value was '{}'"
                    .format(topK))
            self.topK = topK

            if validation_every_n is not None:
                self.validation_every_n = validation_every_n
            else:
                self.validation_every_n = np.inf

            if evaluator_object is None and stop_on_validation:
                evaluator_object = SequentialEvaluator(self.URM_validation,
                                                       [10])

            self.batch_size = batch_size
            self.lambda_i = lambda_i
            self.lambda_j = lambda_j
            self.learning_rate = learning_rate

            self._train_with_early_stopping(
                epochs,
                validation_every_n,
                stop_on_validation,
                validation_metric,
                lower_validatons_allowed,
                evaluator_object,
                algorithm_name=self.RECOMMENDER_NAME)

            self.get_S_incremental_and_set_W()

            sys.stdout.flush()
        if save_model:
            self.saveModel("saved_models/submission/",
                           file_name=self.RECOMMENDER_NAME)