def run(self, is_test, is_SSLIM): """ From here we start each algorithm. :param is_test: specifies if we want to write a report or a submission """ self.is_test = is_test self.is_SSLIM = is_SSLIM if self.is_test: extractor = Extractor() urm = extractor.get_urm_all() self.icm = extractor.get_icm_all() # Splitting into post-validation & testing in case of parameter tuning matrices = loo.split_train_leave_k_out_user_wise( urm, 1, False, True) self.urm_post_validation = matrices[0] self.urm_test = matrices[1] # Splitting the post-validation matrix in train & validation # (Problem of merging train and validation again at the end => loo twice) matrices_for_validation = loo.split_train_leave_k_out_user_wise( self.urm_post_validation, 1, False, True) self.urm_train = matrices_for_validation[0] self.urm_validation = matrices_for_validation[1] self.urm_train = extractor.preprocess_csr_matrix(self.urm_train) self.write_report() if self.is_SSLIM: # for topK in [50, 100, 200]: # for epochs in [10, 20, 50, 100, 200, 300]: self.sslim_pars = WeightConstants.SLIM_BPR_ICM slim_bpr = SLIM_BPR_Cython(self.icm.copy()) slim_bpr.fit(**self.sslim_pars) self.icm = slim_bpr.recs.copy().tocsr() self.evaluate() else: self.evaluate() else: extractor = Extractor() users = extractor.get_target_users_of_recs() self.urm_train = extractor.get_urm_all() self.icm = extractor.get_icm_all() self.write_submission(users)
def run(self, is_test): self.is_test = is_test if self.is_test: extractor = Extractor() builder = Builder() urm = extractor.get_urm_all() self.icm = extractor.get_icm_all() # Splitting into post-validation & testing in case of parameter tuning matrices = loo.split_train_leave_k_out_user_wise( urm, 1, False, True) self.urm_post_validation = matrices[0] self.urm_test = matrices[1] # Splitting the post-validation matrix in train & validation # (Problem of merging train and validation again at the end => loo twice) matrices_for_validation = loo.split_train_leave_k_out_user_wise( self.urm_post_validation, 1, False, True) self.urm_train = matrices_for_validation[0] self.urm_validation = matrices_for_validation[1] # Building the urm_per_feature lists if self.users_per_region: self.urm_per_region_list = builder.build_per_region_urm_train( self.urm_train) if self.users_per_age: self.urm_per_age_list = builder.build_per_age_urm_train( self.urm_train) self.write_report() self.evaluate() else: extractor = Extractor() builder = Builder() users = extractor.get_target_users_of_recs() self.urm_train = extractor.get_urm_all() self.icm = extractor.get_icm_all() # Building the urm_per_feature lists if self.users_per_region: self.urm_per_region_list = builder.build_per_region_urm_train( self.urm_train) if self.users_per_age: self.urm_per_age_list = builder.build_per_age_urm_train( self.urm_train) self.write_submission(users)
def __init__(self): self.HYP = {} self.report_counter = 60 self.writer = Writer() # Some parameters self.hyperparams = dict() self.hyperparams_names = list() self.hyperparams_values = list() self.hyperparams_single_value = dict() # Extractor for matricies extractor = Extractor() urm = extractor.get_urm_all() self.icm = extractor.get_icm_all() # Splitting into post-validation & testing in case of parameter tuning matrices = loo.split_train_leave_k_out_user_wise(urm, 1, False, True) self.urm_post_validation = matrices[0] self.urm_test = matrices[1] # Splitting the post-validation matrix in train & validation # (Problem of merging train and validation again at the end => loo twice) matrices_for_validation = loo.split_train_leave_k_out_user_wise( self.urm_post_validation, 1, False, True) self.urm_train = matrices_for_validation[0] self.urm_validation = matrices_for_validation[1]
def data_visualization(): # Retriving variables userList = list(Extractor().get_users(True)) itemList = list(Extractor().get_tracks(True, True)) userList_unique = list(set(userList)) itemList_unique = list(set(itemList)) numUsers = len(userList_unique) numItems = len(itemList_unique) numberInteractions = Extractor().get_numb_interactions() print("Number of items\t {}, Number of users\t {}".format(numItems, numUsers)) print("Max ID items\t {}, Max Id users\t {}\n".format(max(itemList_unique), max(userList_unique))) print("Average interactions per user {:.2f}".format(numberInteractions / numUsers)) print("Average interactions per item {:.2f}\n".format(numberInteractions / numItems)) print("Sparsity {:.2f} %".format((1 - float(numberInteractions) / (numItems * numUsers)) * 100)) URM_all = Extractor().get_train(True) URM_all.tocsr() itemPopularity = (URM_all > 0).sum(axis=0) itemPopularity = np.array(itemPopularity).squeeze() pyplot.plot(itemPopularity, 'ro') pyplot.ylabel('Num Interactions ') pyplot.xlabel('Item Index') pyplot.show() itemPopularity = np.sort(itemPopularity) pyplot.plot(itemPopularity, 'ro') pyplot.ylabel('Num Interactions ') pyplot.xlabel('Item Index') pyplot.show() userActivity = (URM_all > 0).sum(axis=1) userActivity = np.array(userActivity).squeeze() userActivity = np.sort(userActivity) pyplot.plot(userActivity, 'ro') pyplot.ylabel('Num Interactions ') pyplot.xlabel('User Index') pyplot.show()
def __init__(self, cutoff, cbfknn=False, icfknn=False, ucfknn=False, slim_bpr=False, pure_svd=False, als=False, cfw=False, p3a=False, rp3b=False, slim_en=False): """ Initialization of the generic runner in which we decide whether or not use an algorithm """ self.cutoff = cutoff self.cbfknn = cbfknn self.icfknn = icfknn self.ucfknn = ucfknn self.slim_bpr = slim_bpr self.pure_svd = pure_svd self.als = als self.cfw = cfw self.p3a = p3a self.rp3b = rp3b self.slim_en = slim_en self.writer = Writer self.extractor = Extractor() self.df_builder = XGBoostDataframe(self.cutoff) self.result_dict = None self.urm_train = None self.urm_validation = None self.icm = self.extractor.get_icm_all() self.p_cbfknn = None self.p_icfknn = None self.p_ucfknn = None self.p_slimbpr = None self.p_puresvd = None self.p_als = None self.p_cfw = None self.p_p3a = None self.p_rp3b = None self.p_slimen = None self.target_users = [] self.results = [] self.df_user_id_col = [] self.df_item_id_col = [] self.df_train = pd.DataFrame self.df_test = pd.DataFrame
def __init__(self, urm_train, icm, urm_per_region_list, urm_per_age_list, weights, add_pure_svd=False, add_slim_bpr=False): self.urm_train = urm_train self.urm_per_region_list = urm_per_region_list self.urm_per_age_list = urm_per_age_list self.icm = icm self.add_pure_svd = add_pure_svd self.add_slim_bpr = add_slim_bpr self.weights = weights self.icfknn_list = [] self.ucfknn_list = [] self.icbfknn_list = [] self.icm_bm25 = self.icm.copy().astype(np.float32) self.icm_bm25 = okapi_BM_25(self.icm_bm25) self.icm_bm25 = self.icm_bm25.tocsr() self.ratings = None self.extractor = Extractor() # Creation of the list of algortms that have to be used if self.urm_per_region_list is not None: for urm in self.urm_per_region_list: sps.csr_matrix(urm) self.icfknn_list.append(ItemCFKNNRecommender(urm.copy())) self.ucfknn_list.append(UserCFKNNRecommender(urm.copy())) self.icbfknn_list.append( ItemCBFKNNRecommender(urm.copy(), self.icm_bm25.copy())) if self.urm_per_age_list is not None: for urm in self.urm_per_age_list: sps.csr_matrix(urm) self.icfknn_list.append(ItemCFKNNRecommender(urm.copy())) self.ucfknn_list.append(UserCFKNNRecommender(urm.copy())) self.icbfknn_list.append( ItemCBFKNNRecommender(urm.copy(), self.icm_bm25.copy())) # self.icfknn_list.append(ItemCFKNNRecommender(self.urm_train.copy())) # self.ucfknn_list.append(UserCFKNNRecommender(self.urm_train.copy())) # self.icbfknn_list.append(ItemCBFKNNRecommender(self.urm_train.copy(), self.icm_bm25.copy())) if self.add_pure_svd: self.pure_SVD = PureSVDRecommender(self.urm_train.copy()) if self.add_slim_bpr: self.slim_bpr = SLIM_BPR_Cython(self.urm_train.copy())
def run(self, is_test): """ From here we start each algorithm. :param is_test: specifies if we want to write a report or a submission """ self.is_test = is_test if self.is_test: extractor = Extractor() urm = extractor.get_urm_all() self.icm = extractor.get_icm_all() # self.icm_dirty = extractor.get_icm_price_dirty() # Splitting into post-validation & testing in case of parameter tuning matrices = loo.split_train_leave_k_out_user_wise( urm, 1, False, True) self.urm_post_validation = matrices[0] self.urm_test = matrices[1] # ONLY TRAIN AND TEST self.urm_train = self.urm_post_validation # Splitting the post-validation matrix in train & validation # (Problem of merging train and validation again at the end => loo twice) # matrices_for_validation = loo.split_train_leave_k_out_user_wise(self.urm_post_validation, 1, False, True) # self.urm_train = matrices_for_validation[0] # self.urm_validation = matrices_for_validation[1] self.evaluate() else: extractor = Extractor() users = extractor.get_target_users_of_recs() self.urm_train = extractor.get_urm_all() #self.icm = extractor.get_icm_all() self.write_submission(users)
def __init__(self, dataframe, group_length): self.builder = Builder() extractor = Extractor() self.dataframe = dataframe self.group_length = group_length self.urm = extractor.get_urm_all() self.icm = extractor.get_icm_all() self.users = extractor.get_target_users_of_recs() self.df_user_id_col = list(self.dataframe.loc[:, 'user_id']) self.df_item_id_col = list(self.dataframe.loc[:, 'item_id']) # Conversion from list of strings in list of int self.df_user_id_col = [int(i) for i in self.df_user_id_col] self.df_item_id_col = [int(i) for i in self.df_item_id_col]
def __init__(self, cbfknn=True, icfknn=True, ucfknn=True, slim_bpr=True, pure_svd=True, als=True, cfw=True, p3a=True, rp3b=True, slim_en=True): """ Initialization of the generic runner in which we decide whether or not use an algorithm """ self.cbfknn = cbfknn self.icfknn = icfknn self.ucfknn = ucfknn self.slim_bpr = slim_bpr self.pure_svd = pure_svd self.als = als self.cfw = cfw self.p3a = p3a self.rp3b = rp3b self.slim_en = slim_en self.is_test = None self.writer = Writer self.extractor = Extractor() self.result_dict = None self.urm_train = None self.urm_validation = None self.icm = None self.p_cbfknn = None self.p_icfknn = None self.p_ucfknn = None self.p_slimbpr = None self.p_puresvd = None self.p_als = None self.p_cfw = None self.p_p3a = None self.p_rp3b = None self.p_slimen = None self.target_users = [] self.results = [] if self.cbfknn: self.p_cbfknn = WeightConstants.CBFKNN if self.icfknn: self.p_icfknn = WeightConstants.ICFKNN if self.ucfknn: self.p_ucfknn = WeightConstants.UCFKNN if self.slim_bpr: self.p_slimbpr = WeightConstants.SLIM_BPR if self.pure_svd: self.p_puresvd = WeightConstants.PURE_SVD if self.als: self.p_als = WeightConstants.ALS if self.cfw: self.p_cfw = WeightConstants.CFW if self.p3a: self.p_p3a = WeightConstants.P3A if self.rp3b: self.p_rp3b = WeightConstants.RP3B if self.slim_en: self.p_slimen = WeightConstants.SLIM_ELASTIC_NET self.MAPs = []
def __init__(self, group_length): self.group_length = group_length extractor = Extractor() self.users = extractor.get_target_users_of_recs()
def filter_seen(self, user_id, scores): start_pos = self.URM.indptr[user_id] end_pos = self.URM.indptr[user_id + 1] user_profile = self.URM.indices[start_pos:end_pos] scores[user_profile] = -np.inf return scores if __name__ == '__main__': extractor = Extractor userList = extractor.get_interaction_users(extractor, False) itemList = extractor.get_interaction_items(extractor, False) ratingList = np.ones(Extractor().get_numb_interactions()) URM_all = extractor.get_interaction_matrix(extractor, False) warm_items_mask = np.ediff1d(URM_all.tocsc().indptr) > 0 warm_items = np.arange(URM_all.shape[1])[warm_items_mask] URM_all = URM_all[:, warm_items] warm_users_mask = np.ediff1d(URM_all.tocsr().indptr) > 0 warm_users = np.arange(URM_all.shape[0])[warm_users_mask] URM_all = URM_all[warm_users, :] URM_train, URM_test = train_test_holdout(URM_all, train_perc=0.8) recommender = UserCFKNNRecommender(URM_train)