def __init__(self, pre_splitted_path): test_percentage = 0.2 validation_percentage = 0.2 pre_splitted_path += "data_split/" pre_splitted_filename = "splitted_data_" # If directory does not exist, create if not os.path.exists(pre_splitted_path): os.makedirs(pre_splitted_path) try: print( "Dataset_MovielensHetrec2011: Attempting to load pre-splitted data" ) for attrib_name, attrib_object in load_data_dict_zip( pre_splitted_path, pre_splitted_filename).items(): self.__setattr__(attrib_name, attrib_object) except FileNotFoundError: print( "Dataset_MovielensHetrec2011: Pre-splitted data not found, building new one" ) data_reader = MovielensHetrec2011Reader_DataManager() loaded_dataset = data_reader.load_data() URM_all = loaded_dataset.get_URM_all() # keep only ratings 5 URM_all.data = URM_all.data == 5 URM_all.eliminate_zeros() # create train - test - validation URM_train_original, URM_test = split_train_validation_percentage_user_wise( URM_all, train_percentage=1 - test_percentage, verbose=False) URM_train, URM_validation = split_train_validation_percentage_user_wise( URM_train_original, train_percentage=1 - validation_percentage, verbose=False) self.URM_DICT = { "URM_train": URM_train, "URM_test": URM_test, "URM_validation": URM_validation, } save_data_dict_zip(self.URM_DICT, self.ICM_DICT, pre_splitted_path, pre_splitted_filename) print("Dataset_MovielensHetrec2011: Dataset loaded") ut.print_stat_datareader(self)
def __init__(self, pre_splitted_path): pre_splitted_path += "data_split/" pre_splitted_filename = "splitted_data_" # If directory does not exist, create if not os.path.exists(pre_splitted_path): os.makedirs(pre_splitted_path) try: print("Dataset_FilmTrust: Attempting to load pre-splitted data") for attrib_name, attrib_object in load_data_dict_zip( pre_splitted_path, pre_splitted_filename).items(): self.__setattr__(attrib_name, attrib_object) except FileNotFoundError: print( "Dataset_FilmTrust: Pre-splitted data not found, building new one" ) data_reader = FilmTrustReader_DataManager() loaded_dataset = data_reader.load_data() URM_all = loaded_dataset.get_URM_all() URM_all.eliminate_zeros() URM_all.data = np.ones_like(URM_all.data) URM_train, URM_test = split_train_validation_percentage_random_holdout( URM_all, train_percentage=0.8) URM_train, URM_validation = split_train_validation_percentage_random_holdout( URM_train, train_percentage=0.9) self.URM_DICT = { "URM_train": URM_train, "URM_test": URM_test, "URM_validation": URM_validation, } save_data_dict_zip(self.URM_DICT, self.ICM_DICT, pre_splitted_path, pre_splitted_filename) print("FilmTrust: Dataset loaded") ut.print_stat_datareader(self)
def __init__(self): test_percentage = 0.2 validation_percentage = 0.2 pre_splitted_path = "Data_manager_split_datasets/AmazonInstantVideo/RecSys/SpectralCF_our_interface/" pre_splitted_filename = "splitted_data" ratings_file_name = "ratings_Amazon_Instant_Video.csv" # If directory does not exist, create if not os.path.exists(pre_splitted_path): os.makedirs(pre_splitted_path) try: print( "Dataset_AmazonInstantVideo: Attempting to load pre-splitted data" ) for attrib_name, attrib_object in load_data_dict( pre_splitted_path, pre_splitted_filename).items(): self.__setattr__(attrib_name, attrib_object) except FileNotFoundError: print( "Dataset_AmazonInstantVideo: Pre-splitted data not found, building new one" ) folder_path = self.DATASET_SPLIT_ROOT_FOLDER + self.DATASET_SUBFOLDER downloadFromURL(self.DATASET_URL, folder_path, ratings_file_name) # read Amazon Instant Video df = pd.read_csv(folder_path + ratings_file_name, sep=',', header=None, names=['user', 'item', 'rating', 'timestamp'])[['user', 'item', 'rating']] # keep only ratings = 5 URM_train_builder = IncrementalSparseMatrix( auto_create_col_mapper=True, auto_create_row_mapper=True) URM_train_builder.add_data_lists(df['user'].values, df['item'].values, df['rating'].values) URM_all = URM_train_builder.get_SparseMatrix() URM_all.data = URM_all.data == 5 URM_all.eliminate_zeros() # keep only users with at least 5 ratings URM_all = ut.filter_urm(URM_all, user_min_number_ratings=5, item_min_number_ratings=1) # create train - test - validation URM_train_original, self.URM_test = split_train_validation_percentage_user_wise( URM_all, train_percentage=1 - test_percentage, verbose=False) self.URM_train, self.URM_validation = split_train_validation_percentage_user_wise( URM_train_original, train_percentage=1 - validation_percentage, verbose=False) data_dict = { "URM_train": self.URM_train, "URM_test": self.URM_test, "URM_validation": self.URM_validation, } save_data_dict(data_dict, pre_splitted_path, pre_splitted_filename) print("Dataset_AmazonInstantVideo: Dataset loaded") ut.print_stat_datareader(self)
def __init__(self, pre_splitted_path, original=True): pre_splitted_path += "data_split/" pre_splitted_filename = "splitted_data_" # If directory does not exist, create if not os.path.exists(pre_splitted_path): os.makedirs(pre_splitted_path) try: print("Dataset_{}: Attempting to load pre-splitted data".format( self.DATASET_NAME)) for attrib_name, attrib_object in load_data_dict_zip( pre_splitted_path, pre_splitted_filename).items(): self.__setattr__(attrib_name, attrib_object) except FileNotFoundError: print("Dataset_{}: Pre-splitted data not found, building new one". format(self.DATASET_NAME)) compressed_file_folder = "Conferences/IJCAI/ConvNCF_github/Data/" decompressed_file_folder = "Data_manager_split_datasets/Yelp/" # compressed_file = tarfile.open(compressed_file_folder + "yelp.test.negative.gz", "r:gz") # compressed_file.extract("yelp.test.negative", path=decompressed_file_folder + "decompressed/") # compressed_file.close() # # compressed_file = tarfile.open(compressed_file_folder + "yelp.test.rating.gz", "r:gz") # compressed_file.extract("yelp.test.rating", path=decompressed_file_folder + "decompressed/") # compressed_file.close() # # compressed_file = tarfile.open(compressed_file_folder + "yelp.train.rating.gz", "r:gz") # compressed_file.extract("yelp.train.rating", path=decompressed_file_folder + "decompressed/") # compressed_file.close() # if original: Dataset_github.load_rating_file_as_list = Dataset_github.load_training_file_as_matrix try: dataset = Dataset_github(compressed_file_folder + "yelp") except FileNotFoundError as exc: print( "Dataset_{}: Uncompressed files not found, please manually decompress the *.gz files in this folder: '{}'" .format(self.DATASET_NAME, compressed_file_folder)) raise exc URM_train_original, URM_test = dataset.trainMatrix, dataset.testRatings n_users = max(URM_train_original.shape[0], URM_test.shape[0]) n_items = max(URM_train_original.shape[1], URM_test.shape[1]) URM_train_original = sps.csr_matrix(URM_train_original, shape=(n_users, n_items)) URM_test = sps.csr_matrix(URM_test, shape=(n_users, n_items)) URM_train_original.data = np.ones_like(URM_train_original.data) URM_test.data = np.ones_like(URM_test.data) URM_test_negatives_builder = IncrementalSparseMatrix( n_rows=n_users, n_cols=n_items) n_negative_samples = 999 for user_index in range(len(dataset.testNegatives)): user_test_items = dataset.testNegatives[user_index] if len(user_test_items) != n_negative_samples: print( "user id: {} has {} negative items instead {}".format( user_index, len(user_test_items), n_negative_samples)) URM_test_negatives_builder.add_single_row(user_index, user_test_items, data=1.0) URM_test_negative = URM_test_negatives_builder.get_SparseMatrix() URM_test_negative.data = np.ones_like(URM_test_negative.data) URM_train, URM_validation = split_train_validation_leave_one_out_user_wise( URM_train_original.copy(), verbose=False) # # else: # data_reader = YelpReader_DataManager() # loaded_dataset = data_reader.load_data() # # URM_all = loaded_dataset.get_URM_all() # # URM_timestamp = URM_all.copy() # # URM_all.data = np.ones_like(URM_all.data) # # URM_train, URM_validation, URM_test, URM_negative = split_data_on_timestamp(URM_all, URM_timestamp, negative_items_per_positive=999) # URM_train = URM_train + URM_validation # URM_train, URM_validation = split_train_validation_leave_one_out_user_wise(URM_train, verbose=False) shutil.rmtree(decompressed_file_folder + "decompressed/", ignore_errors=True) self.URM_DICT = { "URM_train": URM_train, "URM_test": URM_test, "URM_validation": URM_validation, "URM_test_negative": URM_test_negative, } save_data_dict_zip(self.URM_DICT, self.ICM_DICT, pre_splitted_path, pre_splitted_filename) print("{}: Dataset loaded".format(self.DATASET_NAME)) ut.print_stat_datareader(self)
def __init__(self, pre_splitted_path, type="original", cold_start=False, cold_items=None): assert type in ["original", "ours"] pre_splitted_path += "data_split/" pre_splitted_filename = "splitted_data_" # their mode in cold start mode = 1 # path for pre existed movielens1M split movielens_splitted_path = "Conferences/RecSys/SpectralCF_github/data/ml-1m/" # If directory does not exist, create if not os.path.exists(pre_splitted_path): os.makedirs(pre_splitted_path) try: print("Dataset_Movielens1M: Attempting to load pre-splitted data") for attrib_name, attrib_object in load_data_dict_zip( pre_splitted_path, pre_splitted_filename).items(): self.__setattr__(attrib_name, attrib_object) except FileNotFoundError: print( "Dataset_Movielens1M: Pre-splitted data not found, building new one" ) if type == "original": assert (cold_start is False) # use the SpectralCF class to read data data_generator = Data( train_file=movielens_splitted_path + 'train_users.dat', test_file=movielens_splitted_path + 'test_users.dat', batch_size=BATCH_SIZE) # convert train into csr full_train_matrix = sps.csr_matrix(data_generator.R) URM_train_original = full_train_matrix # convert test into csr test_set = data_generator.test_set uids, items = [], [] for uid in test_set.keys(): uids += np.full(len(test_set[uid]), uid).tolist() items += test_set[uid] test_matrix = sps.csr_matrix( (np.ones(len(items)), (uids, items)), shape=(full_train_matrix.shape)) if not cold_start: URM_test = test_matrix # create validation URM_train, URM_validation = split_train_validation_percentage_user_wise( URM_train_original, train_percentage=0.9, verbose=False) else: print('nothing') elif type == "ours": data_reader = Movielens1MReader_DataManager() loaded_dataset = data_reader.load_data() URM_all = loaded_dataset.get_URM_all() URM_all.data = URM_all.data == 5 URM_all.eliminate_zeros() if not cold_start: URM_train, URM_test = split_train_validation_percentage_user_wise( URM_all, train_percentage=0.8, verbose=False) URM_train, URM_validation = split_train_validation_percentage_user_wise( URM_train, train_percentage=0.9, verbose=False) else: if mode == 1: # their mode, cold start for full dataset URM_train, URM_test = split_train_validation_cold_start_user_wise( URM_all, full_train_percentage=0.0, cold_items=cold_items, verbose=False) URM_test, URM_validation = split_train_validation_percentage_user_wise( URM_test, train_percentage=0.9, verbose=False) if mode == 2: # cold start only for some users URM_train, URM_test = split_train_validation_cold_start_user_wise( URM_all, full_train_percentage=0.8, cold_items=cold_items, verbose=False) URM_train, URM_validation = split_train_validation_cold_start_user_wise( URM_train, full_train_percentage=0.9, cold_items=cold_items, verbose=False) self.URM_DICT = { "URM_train": URM_train, "URM_test": URM_test, "URM_validation": URM_validation, } save_data_dict_zip(self.URM_DICT, self.ICM_DICT, pre_splitted_path, pre_splitted_filename) print("Dataset_Movielens1M: Dataset loaded") ut.print_stat_datareader(self)
def __init__(self, pre_splitted_path, type='original'): assert type in ["original", "ours"] pre_splitted_path += "data_split/" pre_splitted_filename = "splitted_data_" # If directory does not exist, create if not os.path.exists(pre_splitted_path): os.makedirs(pre_splitted_path) try: print("Dataset_{}: Attempting to load pre-splitted data".format( self.DATASET_NAME)) for attrib_name, attrib_object in load_data_dict_zip( pre_splitted_path, pre_splitted_filename).items(): self.__setattr__(attrib_name, attrib_object) except FileNotFoundError: print("Dataset_{}: Pre-splitted data not found, building new one". format(self.DATASET_NAME)) from Conferences.IJCAI.CoupledCF_original import LoadMovieDataCnn as DatareaderOriginal path = "Conferences/IJCAI/CoupledCF_original/ml-1m/" n_users, gender, age, occupation = DatareaderOriginal.load_user_attributes( path=path, split=True) n_items, items_genres_mat = DatareaderOriginal.load_itemGenres_as_matrix( path=path) ratings = DatareaderOriginal.load_rating_train_as_matrix(path=path) testRatings = DatareaderOriginal.load_rating_file_as_list( path=path) testNegatives = DatareaderOriginal.load_negative_file(path=path) URM_all = ratings.tocsr() UCM_gender = gender.tocsr() UCM_age = age.tocsr() UCM_occupation = occupation.tocsr() UCM_all = sps.hstack((UCM_gender, UCM_age, UCM_occupation)).tocsr() ICM_all = sps.csr_matrix(items_genres_mat) testRatings = np.array(testRatings).T URM_test_builder = IncrementalSparseMatrix(n_rows=n_users + 1, n_cols=n_items + 1) URM_test_builder.add_data_lists(testRatings[0], testRatings[1], np.ones(len(testRatings[0]))) URM_test = URM_test_builder.get_SparseMatrix() URM_test_negatives_builder = IncrementalSparseMatrix( n_rows=n_users + 1, n_cols=n_items + 1) # care here, the test negative start from index 0 but it refer to user index 1 (user index start from 1) n_negative_samples = 99 for index in range(len(testNegatives)): user_test_items = testNegatives[index] if len(user_test_items) != n_negative_samples: print( "user id: {} has {} negative items instead {}".format( index + 1, len(user_test_items), n_negative_samples)) URM_test_negatives_builder.add_single_row(index + 1, user_test_items, data=1.0) URM_test_negative = URM_test_negatives_builder.get_SparseMatrix() URM_test_negative.data = np.ones_like(URM_test_negative.data) if type == 'original': URM_test = URM_test URM_train, URM_validation = split_train_validation_leave_one_out_user_wise( URM_all.copy(), verbose=False) else: # redo the split URM_full = URM_all + URM_test URM_temp, URM_test = split_train_validation_leave_one_out_user_wise( URM_full.copy(), verbose=False) URM_train, URM_validation = split_train_validation_leave_one_out_user_wise( URM_temp.copy(), verbose=False) self.ICM_DICT = { "UCM_gender": UCM_gender, "UCM_occupation": UCM_occupation, "UCM_age": UCM_age, "UCM_all": UCM_all, "ICM_all": ICM_all, } self.URM_DICT = { "URM_train": URM_train, "URM_test": URM_test, "URM_validation": URM_validation, "URM_test_negative": URM_test_negative, } save_data_dict_zip(self.URM_DICT, self.ICM_DICT, pre_splitted_path, pre_splitted_filename) print("{}: Dataset loaded".format(self.DATASET_NAME)) ut.print_stat_datareader(self)
def __init__(self, pre_splitted_path, type='original'): assert type in ["original", "ours"] pre_splitted_path += "data_split/" pre_splitted_filename = "splitted_data_" # If directory does not exist, create if not os.path.exists(pre_splitted_path): os.makedirs(pre_splitted_path) try: print("Dataset_{}: Attempting to load pre-splitted data".format( self.DATASET_NAME)) for attrib_name, attrib_object in load_data_dict_zip( pre_splitted_path, pre_splitted_filename).items(): self.__setattr__(attrib_name, attrib_object) except FileNotFoundError: print("Dataset_{}: Pre-splitted data not found, building new one". format(self.DATASET_NAME)) from Conferences.IJCAI.CoupledCF_original import LoadTafengDataCnn as DatareaderOriginal path = "Conferences/IJCAI/CoupledCF_original/tafeng/" n_users, user_attributes_mat = DatareaderOriginal.load_user_attributes( path=path) n_items, items_genres_mat = DatareaderOriginal.load_itemGenres_as_matrix( path=path) ratings = DatareaderOriginal.load_rating_train_as_matrix(path=path) testRatings = DatareaderOriginal.load_rating_file_as_list( path=path) testNegatives = DatareaderOriginal.load_negative_file(path=path) URM_all = ratings.tocsr() UCM_all = sps.csc_matrix(user_attributes_mat) UCM_age = UCM_all[:, 0:11].tocsr() UCM_region = UCM_all[:, 11:19].tocsr() UCM_all = UCM_all.tocsr() # col: 0->category, 2->asset(0-1), 1->price(0-1) ICM_original = sps.csc_matrix(items_genres_mat) # category could be used as matrix, not single row ICM_sub_class = ICM_original[:, 0:1].tocsr() max = ICM_sub_class.shape[0] rows, cols, data = [], [], [] for idx in range(max): # we have only index 0 as col data_vect = ICM_sub_class.data[ ICM_sub_class.indptr[idx]:ICM_sub_class.indptr[idx + 1]] if len(data_vect) == 0: # handle category value 0 that in a csr matrix is not present cols.append(int(0)) else: cols.append(int(data_vect[0])) rows.append(idx) data.append(1.0) ICM_sub_class = sps.csr_matrix((data, (rows, cols))) ICM_asset = ICM_original[:, 1:2].tocsr() ICM_price = ICM_original[:, 2:3].tocsr() ICM_original = ICM_original.tocsc() ICM_all = sps.hstack((ICM_sub_class, ICM_asset, ICM_price)) testRatings = np.array(testRatings).T URM_test_builder = IncrementalSparseMatrix(n_rows=n_users + 1, n_cols=n_items + 1) URM_test_builder.add_data_lists(testRatings[0], testRatings[1], np.ones(len(testRatings[0]))) URM_test = URM_test_builder.get_SparseMatrix() URM_test_negatives_builder = IncrementalSparseMatrix( n_rows=n_users + 1, n_cols=n_items + 1) # care here, the test negative start from index 0 but it refer to user index 1 (user index start from 1) n_negative_samples = 99 for index in range(len(testNegatives)): user_test_items = testNegatives[index] if len(user_test_items) != n_negative_samples: print( "user id: {} has {} negative items instead {}".format( index + 1, len(user_test_items), n_negative_samples)) URM_test_negatives_builder.add_single_row(index + 1, user_test_items, data=1.0) URM_test_negative = URM_test_negatives_builder.get_SparseMatrix() URM_test_negative.data = np.ones_like(URM_test_negative.data) if type == 'original': URM_test = URM_test URM_train, URM_validation = split_train_validation_leave_one_out_user_wise( URM_all.copy(), verbose=False) else: # redo the split URM_full = URM_all + URM_test URM_temp, URM_test = split_train_validation_leave_one_out_user_wise( URM_full.copy(), verbose=False) URM_train, URM_validation = split_train_validation_leave_one_out_user_wise( URM_temp.copy(), verbose=False) self.ICM_DICT = { "UCM_age": UCM_age, "UCM_region": UCM_region, "UCM_all": UCM_all, "ICM_all": ICM_all, "ICM_original": ICM_original, "ICM_sub_class": ICM_sub_class, "ICM_asset": ICM_asset, "ICM_price": ICM_price, } self.URM_DICT = { "URM_train": URM_train, "URM_test": URM_test, "URM_validation": URM_validation, "URM_test_negative": URM_test_negative, } save_data_dict_zip(self.URM_DICT, self.ICM_DICT, pre_splitted_path, pre_splitted_filename) print("{}: Dataset loaded".format(self.DATASET_NAME)) ut.print_stat_datareader(self)
def __init__(self, pre_splitted_path): pre_splitted_path += "data_split/" pre_splitted_filename = "splitted_data_" # If directory does not exist, create if not os.path.exists(pre_splitted_path): os.makedirs(pre_splitted_path) try: print("Dataset_{}: Attempting to load pre-splitted data".format( self.DATASET_NAME)) for attrib_name, attrib_object in load_data_dict_zip( pre_splitted_path, pre_splitted_filename).items(): self.__setattr__(attrib_name, attrib_object) except FileNotFoundError: print("Dataset_{}: Pre-splitted data not found, building new one". format(self.DATASET_NAME)) compressed_file_folder = "Conferences/IJCAI/ConvNCF_github/Data/" decompressed_file_folder = "Data_manager_split_datasets/Gowalla/" # compressed_file = tarfile.open(compressed_file_folder + "gowalla.test.negative.gz", "r:gz") # compressed_file.extract("yelp.test.negative", path=decompressed_file_folder + "decompressed/") # compressed_file.close() # # compressed_file = tarfile.open(compressed_file_folder + "gowalla.test.rating.gz", "r:gz") # compressed_file.extract("yelp.test.rating", path=decompressed_file_folder + "decompressed/") # compressed_file.close() # # compressed_file = tarfile.open(compressed_file_folder + "gowalla.train.rating.gz", "r:gz") # compressed_file.extract("yelp.train.rating", path=decompressed_file_folder + "decompressed/") # compressed_file.close() # if original: Dataset_github.load_rating_file_as_list = Dataset_github.load_training_file_as_matrix try: dataset = Dataset_github(compressed_file_folder + "gowalla") except FileNotFoundError as exc: print( "Dataset_{}: Gowalla files not found, please download them and put them in this folder '{}', url: {}" .format(self.DATASET_NAME, compressed_file_folder, self.DATASET_URL)) print( "Dataset_{}: Uncompressed files not found, please manually decompress the *.gz files in this folder: '{}'" .format(self.DATASET_NAME, compressed_file_folder)) raise exc URM_train_original, URM_test = dataset.trainMatrix, dataset.testRatings n_users = max(URM_train_original.shape[0], URM_test.shape[0]) n_items = max(URM_train_original.shape[1], URM_test.shape[1]) URM_train_original = sps.csr_matrix(URM_train_original, shape=(n_users, n_items)) URM_test = sps.csr_matrix(URM_test, shape=(n_users, n_items)) URM_train_original.data = np.ones_like(URM_train_original.data) URM_test.data = np.ones_like(URM_test.data) URM_test_negatives_builder = IncrementalSparseMatrix( n_rows=n_users, n_cols=n_items) n_negative_samples = 999 for user_index in range(len(dataset.testNegatives)): user_test_items = dataset.testNegatives[user_index] if len(user_test_items) != n_negative_samples: print( "user id: {} has {} negative items instead {}".format( user_index, len(user_test_items), n_negative_samples)) URM_test_negatives_builder.add_single_row(user_index, user_test_items, data=1.0) URM_test_negative = URM_test_negatives_builder.get_SparseMatrix( ).tocsr() URM_test_negative.data = np.ones_like(URM_test_negative.data) URM_train, URM_validation = split_train_validation_leave_one_out_user_wise( URM_train_original.copy(), verbose=False) # # # # NOT USED # # elif not time_split: #create from full dataset with random leave one out from LINKED dateset in the article since timestamp is not present. # # # # data_reader = GowallaGithubReader_DataManager() # # loaded_dataset = data_reader.load_data() # # # # URM_all = loaded_dataset.get_URM_all() # # # # URM_all.eliminate_zeros() # # # # URM_all.data = np.ones_like(URM_all.data) # # # # #use this function 2 time because the order could change slightly the number of final interactions # # #with this order we get the same number of interactions as in the paper # # URM_all = filter_urm(URM_all, user_min_number_ratings=0, item_min_number_ratings=10) # # URM_all = filter_urm(URM_all, user_min_number_ratings=2, item_min_number_ratings=0) # # # # URM_train, URM_validation, URM_test, URM_negative = split_train_validation_test_negative_leave_one_out_user_wise(URM_all, negative_items_per_positive=999, # # at_least_n_train_items_test=0, at_least_n_train_items_validation=0, # # verbose=True) # # URM_timestamp = sps.csc_matrix(([],([],[])), shape=URM_train.shape) # # else: # create from full dataset with leave out one time wise from ORIGINAL full dateset # data_reader = GowallaReader_DataManager() # loaded_dataset = data_reader.load_data() # # URM_all = loaded_dataset.get_URM_all() # # # use this function 2 time because the order could change slightly the number of final interactions # # with this order we get the same number of interactions as in the paper # URM_all = filter_urm(URM_all, user_min_number_ratings=0, item_min_number_ratings=10) # URM_all = filter_urm(URM_all, user_min_number_ratings=2, item_min_number_ratings=0) # # URM_timestamp = URM_all.copy() # URM_all.data = np.ones_like(URM_all.data) # # URM_train, URM_validation, URM_test, URM_negative = split_data_on_timestamp(URM_all, URM_timestamp, negative_items_per_positive=999) # URM_train = URM_train + URM_validation # URM_train, URM_validation = split_train_validation_leave_one_out_user_wise(URM_train, verbose=False) self.URM_DICT = { "URM_train": URM_train, "URM_test": URM_test, "URM_validation": URM_validation, "URM_test_negative": URM_test_negative, } save_data_dict_zip(self.URM_DICT, self.ICM_DICT, pre_splitted_path, pre_splitted_filename) print("{}: Dataset loaded".format(self.DATASET_NAME)) ut.print_stat_datareader(self)