Python Extractor.Extractor示例，OwnUtils.Extractor.Extractor.Extractor Python示例

示例#1

0

显示文件

文件： GenericRunner.py 项目： MathyasGiudici/polimi-recsys-challenge

    def run(self, is_test, is_SSLIM):
        """
        From here we start each algorithm.
        :param is_test: specifies if we want to write a report or a submission
        """
        self.is_test = is_test
        self.is_SSLIM = is_SSLIM

        if self.is_test:
            extractor = Extractor()
            urm = extractor.get_urm_all()

            self.icm = extractor.get_icm_all()

            # Splitting into post-validation & testing in case of parameter tuning
            matrices = loo.split_train_leave_k_out_user_wise(
                urm, 1, False, True)

            self.urm_post_validation = matrices[0]
            self.urm_test = matrices[1]

            # Splitting the post-validation matrix in train & validation
            # (Problem of merging train and validation again at the end => loo twice)
            matrices_for_validation = loo.split_train_leave_k_out_user_wise(
                self.urm_post_validation, 1, False, True)
            self.urm_train = matrices_for_validation[0]
            self.urm_validation = matrices_for_validation[1]

            self.urm_train = extractor.preprocess_csr_matrix(self.urm_train)

            self.write_report()

            if self.is_SSLIM:
                # for topK in [50, 100, 200]:
                #     for epochs in [10, 20, 50, 100, 200, 300]:
                self.sslim_pars = WeightConstants.SLIM_BPR_ICM
                slim_bpr = SLIM_BPR_Cython(self.icm.copy())
                slim_bpr.fit(**self.sslim_pars)

                self.icm = slim_bpr.recs.copy().tocsr()
                self.evaluate()

            else:
                self.evaluate()

        else:
            extractor = Extractor()
            users = extractor.get_target_users_of_recs()
            self.urm_train = extractor.get_urm_all()
            self.icm = extractor.get_icm_all()

            self.write_submission(users)

示例#2

0

显示文件

文件： UserFeaturesRunner.py 项目： MathyasGiudici/polimi-recsys-challenge

    def run(self, is_test):
        self.is_test = is_test

        if self.is_test:
            extractor = Extractor()
            builder = Builder()
            urm = extractor.get_urm_all()
            self.icm = extractor.get_icm_all()

            # Splitting into post-validation & testing in case of parameter tuning
            matrices = loo.split_train_leave_k_out_user_wise(
                urm, 1, False, True)

            self.urm_post_validation = matrices[0]
            self.urm_test = matrices[1]

            # Splitting the post-validation matrix in train & validation
            # (Problem of merging train and validation again at the end => loo twice)
            matrices_for_validation = loo.split_train_leave_k_out_user_wise(
                self.urm_post_validation, 1, False, True)
            self.urm_train = matrices_for_validation[0]
            self.urm_validation = matrices_for_validation[1]

            # Building the urm_per_feature lists
            if self.users_per_region:
                self.urm_per_region_list = builder.build_per_region_urm_train(
                    self.urm_train)
            if self.users_per_age:
                self.urm_per_age_list = builder.build_per_age_urm_train(
                    self.urm_train)

            self.write_report()
            self.evaluate()

        else:
            extractor = Extractor()
            builder = Builder()

            users = extractor.get_target_users_of_recs()
            self.urm_train = extractor.get_urm_all()
            self.icm = extractor.get_icm_all()

            # Building the urm_per_feature lists
            if self.users_per_region:
                self.urm_per_region_list = builder.build_per_region_urm_train(
                    self.urm_train)
            if self.users_per_age:
                self.urm_per_age_list = builder.build_per_age_urm_train(
                    self.urm_train)

            self.write_submission(users)

示例#3

0

显示文件

文件： Optimizer.py 项目： MathyasGiudici/polimi-recsys-challenge

    def __init__(self):
        self.HYP = {}
        self.report_counter = 60
        self.writer = Writer()

        # Some parameters
        self.hyperparams = dict()
        self.hyperparams_names = list()
        self.hyperparams_values = list()
        self.hyperparams_single_value = dict()

        # Extractor for matricies
        extractor = Extractor()
        urm = extractor.get_urm_all()
        self.icm = extractor.get_icm_all()

        # Splitting into post-validation & testing in case of parameter tuning
        matrices = loo.split_train_leave_k_out_user_wise(urm, 1, False, True)

        self.urm_post_validation = matrices[0]
        self.urm_test = matrices[1]

        # Splitting the post-validation matrix in train & validation
        # (Problem of merging train and validation again at the end => loo twice)
        matrices_for_validation = loo.split_train_leave_k_out_user_wise(
            self.urm_post_validation, 1, False, True)
        self.urm_train = matrices_for_validation[0]
        self.urm_validation = matrices_for_validation[1]

示例#4

0

显示文件

文件： DataGraphs.py 项目： MathyasGiudici/polimi-recsys-challenge

def data_visualization():
    # Retriving variables
    userList = list(Extractor().get_users(True))
    itemList = list(Extractor().get_tracks(True, True))
    userList_unique = list(set(userList))
    itemList_unique = list(set(itemList))

    numUsers = len(userList_unique)
    numItems = len(itemList_unique)

    numberInteractions = Extractor().get_numb_interactions()

    print("Number of items\t {}, Number of users\t {}".format(numItems, numUsers))
    print("Max ID items\t {}, Max Id users\t {}\n".format(max(itemList_unique), max(userList_unique)))
    print("Average interactions per user {:.2f}".format(numberInteractions / numUsers))
    print("Average interactions per item {:.2f}\n".format(numberInteractions / numItems))

    print("Sparsity {:.2f} %".format((1 - float(numberInteractions) / (numItems * numUsers)) * 100))

    URM_all = Extractor().get_train(True)
    URM_all.tocsr()

    itemPopularity = (URM_all > 0).sum(axis=0)
    itemPopularity = np.array(itemPopularity).squeeze()
    pyplot.plot(itemPopularity, 'ro')
    pyplot.ylabel('Num Interactions ')
    pyplot.xlabel('Item Index')
    pyplot.show()

    itemPopularity = np.sort(itemPopularity)
    pyplot.plot(itemPopularity, 'ro')
    pyplot.ylabel('Num Interactions ')
    pyplot.xlabel('Item Index')
    pyplot.show()

    userActivity = (URM_all > 0).sum(axis=1)
    userActivity = np.array(userActivity).squeeze()
    userActivity = np.sort(userActivity)

    pyplot.plot(userActivity, 'ro')
    pyplot.ylabel('Num Interactions ')
    pyplot.xlabel('User Index')
    pyplot.show()

示例#5

0

显示文件

    def __init__(self,
                 cutoff,
                 cbfknn=False,
                 icfknn=False,
                 ucfknn=False,
                 slim_bpr=False,
                 pure_svd=False,
                 als=False,
                 cfw=False,
                 p3a=False,
                 rp3b=False,
                 slim_en=False):
        """
        Initialization of the generic runner in which we decide whether or not use an algorithm
        """
        self.cutoff = cutoff
        self.cbfknn = cbfknn
        self.icfknn = icfknn
        self.ucfknn = ucfknn
        self.slim_bpr = slim_bpr
        self.pure_svd = pure_svd
        self.als = als
        self.cfw = cfw
        self.p3a = p3a
        self.rp3b = rp3b
        self.slim_en = slim_en

        self.writer = Writer
        self.extractor = Extractor()
        self.df_builder = XGBoostDataframe(self.cutoff)
        self.result_dict = None

        self.urm_train = None
        self.urm_validation = None
        self.icm = self.extractor.get_icm_all()

        self.p_cbfknn = None
        self.p_icfknn = None
        self.p_ucfknn = None
        self.p_slimbpr = None
        self.p_puresvd = None
        self.p_als = None
        self.p_cfw = None
        self.p_p3a = None
        self.p_rp3b = None
        self.p_slimen = None

        self.target_users = []
        self.results = []

        self.df_user_id_col = []
        self.df_item_id_col = []

        self.df_train = pd.DataFrame
        self.df_test = pd.DataFrame

示例#6

0

显示文件

    def __init__(self,
                 urm_train,
                 icm,
                 urm_per_region_list,
                 urm_per_age_list,
                 weights,
                 add_pure_svd=False,
                 add_slim_bpr=False):
        self.urm_train = urm_train
        self.urm_per_region_list = urm_per_region_list
        self.urm_per_age_list = urm_per_age_list
        self.icm = icm

        self.add_pure_svd = add_pure_svd
        self.add_slim_bpr = add_slim_bpr

        self.weights = weights

        self.icfknn_list = []
        self.ucfknn_list = []
        self.icbfknn_list = []

        self.icm_bm25 = self.icm.copy().astype(np.float32)
        self.icm_bm25 = okapi_BM_25(self.icm_bm25)
        self.icm_bm25 = self.icm_bm25.tocsr()

        self.ratings = None

        self.extractor = Extractor()

        # Creation of the list of algortms that have to be used
        if self.urm_per_region_list is not None:
            for urm in self.urm_per_region_list:
                sps.csr_matrix(urm)
                self.icfknn_list.append(ItemCFKNNRecommender(urm.copy()))
                self.ucfknn_list.append(UserCFKNNRecommender(urm.copy()))
                self.icbfknn_list.append(
                    ItemCBFKNNRecommender(urm.copy(), self.icm_bm25.copy()))

        if self.urm_per_age_list is not None:
            for urm in self.urm_per_age_list:
                sps.csr_matrix(urm)
                self.icfknn_list.append(ItemCFKNNRecommender(urm.copy()))
                self.ucfknn_list.append(UserCFKNNRecommender(urm.copy()))
                self.icbfknn_list.append(
                    ItemCBFKNNRecommender(urm.copy(), self.icm_bm25.copy()))

        # self.icfknn_list.append(ItemCFKNNRecommender(self.urm_train.copy()))
        # self.ucfknn_list.append(UserCFKNNRecommender(self.urm_train.copy()))
        # self.icbfknn_list.append(ItemCBFKNNRecommender(self.urm_train.copy(), self.icm_bm25.copy()))

        if self.add_pure_svd:
            self.pure_SVD = PureSVDRecommender(self.urm_train.copy())
        if self.add_slim_bpr:
            self.slim_bpr = SLIM_BPR_Cython(self.urm_train.copy())

示例#7

0

显示文件

    def run(self, is_test):
        """
        From here we start each algorithm.
        :param is_test: specifies if we want to write a report or a submission
        """
        self.is_test = is_test

        if self.is_test:
            extractor = Extractor()
            urm = extractor.get_urm_all()
            self.icm = extractor.get_icm_all()
            # self.icm_dirty = extractor.get_icm_price_dirty()

            # Splitting into post-validation & testing in case of parameter tuning
            matrices = loo.split_train_leave_k_out_user_wise(
                urm, 1, False, True)

            self.urm_post_validation = matrices[0]
            self.urm_test = matrices[1]

            # ONLY TRAIN AND TEST
            self.urm_train = self.urm_post_validation

            # Splitting the post-validation matrix in train & validation
            # (Problem of merging train and validation again at the end => loo twice)
            # matrices_for_validation = loo.split_train_leave_k_out_user_wise(self.urm_post_validation, 1, False, True)
            # self.urm_train = matrices_for_validation[0]
            # self.urm_validation = matrices_for_validation[1]

            self.evaluate()

        else:
            extractor = Extractor()
            users = extractor.get_target_users_of_recs()
            self.urm_train = extractor.get_urm_all()
            #self.icm = extractor.get_icm_all()

            self.write_submission(users)

示例#8

0

显示文件

文件： XGBoostFeatures.py 项目： MathyasGiudici/polimi-recsys-challenge

    def __init__(self, dataframe, group_length):
        self.builder = Builder()
        extractor = Extractor()

        self.dataframe = dataframe
        self.group_length = group_length
        self.urm = extractor.get_urm_all()
        self.icm = extractor.get_icm_all()
        self.users = extractor.get_target_users_of_recs()

        self.df_user_id_col = list(self.dataframe.loc[:, 'user_id'])
        self.df_item_id_col = list(self.dataframe.loc[:, 'item_id'])

        # Conversion from list of strings in list of int
        self.df_user_id_col = [int(i) for i in self.df_user_id_col]
        self.df_item_id_col = [int(i) for i in self.df_item_id_col]

示例#9

0

显示文件

文件： CrossValidationRunner.py 项目： MathyasGiudici/polimi-recsys-challenge

    def __init__(self,
                 cbfknn=True,
                 icfknn=True,
                 ucfknn=True,
                 slim_bpr=True,
                 pure_svd=True,
                 als=True,
                 cfw=True,
                 p3a=True,
                 rp3b=True,
                 slim_en=True):
        """
        Initialization of the generic runner in which we decide whether or not use an algorithm
        """
        self.cbfknn = cbfknn
        self.icfknn = icfknn
        self.ucfknn = ucfknn
        self.slim_bpr = slim_bpr
        self.pure_svd = pure_svd
        self.als = als
        self.cfw = cfw
        self.p3a = p3a
        self.rp3b = rp3b
        self.slim_en = slim_en

        self.is_test = None
        self.writer = Writer
        self.extractor = Extractor()
        self.result_dict = None

        self.urm_train = None
        self.urm_validation = None
        self.icm = None

        self.p_cbfknn = None
        self.p_icfknn = None
        self.p_ucfknn = None
        self.p_slimbpr = None
        self.p_puresvd = None
        self.p_als = None
        self.p_cfw = None
        self.p_p3a = None
        self.p_rp3b = None
        self.p_slimen = None

        self.target_users = []
        self.results = []

        if self.cbfknn:
            self.p_cbfknn = WeightConstants.CBFKNN
        if self.icfknn:
            self.p_icfknn = WeightConstants.ICFKNN
        if self.ucfknn:
            self.p_ucfknn = WeightConstants.UCFKNN
        if self.slim_bpr:
            self.p_slimbpr = WeightConstants.SLIM_BPR
        if self.pure_svd:
            self.p_puresvd = WeightConstants.PURE_SVD
        if self.als:
            self.p_als = WeightConstants.ALS
        if self.cfw:
            self.p_cfw = WeightConstants.CFW
        if self.p3a:
            self.p_p3a = WeightConstants.P3A
        if self.rp3b:
            self.p_rp3b = WeightConstants.RP3B
        if self.slim_en:
            self.p_slimen = WeightConstants.SLIM_ELASTIC_NET

        self.MAPs = []

示例#10

0

显示文件

    def __init__(self, group_length):
        self.group_length = group_length

        extractor = Extractor()
        self.users = extractor.get_target_users_of_recs()

示例#11

0

显示文件

文件： UserCFKNNRecommender.py 项目： MathyasGiudici/polimi-recsys-challenge

    def filter_seen(self, user_id, scores):
        start_pos = self.URM.indptr[user_id]
        end_pos = self.URM.indptr[user_id + 1]

        user_profile = self.URM.indices[start_pos:end_pos]

        scores[user_profile] = -np.inf

        return scores


if __name__ == '__main__':
    extractor = Extractor
    userList = extractor.get_interaction_users(extractor, False)
    itemList = extractor.get_interaction_items(extractor, False)
    ratingList = np.ones(Extractor().get_numb_interactions())

    URM_all = extractor.get_interaction_matrix(extractor, False)
    warm_items_mask = np.ediff1d(URM_all.tocsc().indptr) > 0
    warm_items = np.arange(URM_all.shape[1])[warm_items_mask]

    URM_all = URM_all[:, warm_items]

    warm_users_mask = np.ediff1d(URM_all.tocsr().indptr) > 0
    warm_users = np.arange(URM_all.shape[0])[warm_users_mask]

    URM_all = URM_all[warm_users, :]

    URM_train, URM_test = train_test_holdout(URM_all, train_perc=0.8)

    recommender = UserCFKNNRecommender(URM_train)