def prepare_data(self, original_matrix, train_matrix, test_matrix): self.num_user, self.num_item = original_matrix.shape self.train_uid, self.train_iid, _ = mtl.matrix_to_list(train_matrix) self.neg_dict, self.ranking_dict, self.test_dict = \ mtl.negdict_mat(original_matrix, test_matrix, mod='precision', random_state=20) # self.neg_dict, self.ranking_dict, self.test_dict = mtl.negdict_mat(original_matrix, test_matrix, num_neg=self.num_ranking_neg) self.num_training = len(self.train_uid) self.num_batch = int(self.num_training / self.batch_size) print("Data Preparation Completed.")
def prepare_data(self, original_matrix, train_matrix, test_matrix): self.num_user, self.num_item = original_matrix.shape self.train_uid, self.train_iid, self.train_ratings = mtl.matrix_to_list( train_matrix) self.test_uid, self.test_iid, self.test_ratings = mtl.matrix_to_list( test_matrix) self.mu = np.mean(self.train_ratings) self.num_training = len(self.train_ratings) self.num_batch = int(self.num_training / self.batch_size) print("Data Preparation Completed.")
def prepare_data(self, original_matrix, train_matrix, test_matrix): self.num_user, self.num_item = original_matrix.shape self.train_array = train_matrix.toarray() _, self.ranking_dict, self.test_dict = \ mtl.negdict_mat(original_matrix, test_matrix, num_neg=None, neg_ratio=self.ranking_list_ratio) self.negative_output_mask = mtl.neg_mask_array(original_matrix, train_matrix, neg_ratio = self.neg_ratio) self.num_training = self.train_array.shape[0] self.num_batch = int(self.num_training / self.batch_size) print("Data Preparation Completed.")
def prepare_data(self, original_matrix, train_matrix, test_matrix): self.num_user, self.num_item = original_matrix.shape self.train_array = train_matrix.toarray() if self.is_prec: _, self.ranking_dict, self.test_dict = \ mtl.negdict_mat(original_matrix, test_matrix, mod='precision', random_state=20) else: _, self.ranking_dict, self.test_dict = \ mtl.negdict_mat(original_matrix, test_matrix, num_neg=199, mod='others', random_state=0) self.num_training = self.train_array.shape[0] self.num_batch = int(self.num_training / self.batch_size) print("Data Preparation Completed.")
def prepare_data(self, original_matrix, train_matrix, test_matrix): self.num_user, self.num_item = original_matrix.shape self.train_uid, self.train_iid, self.train_labels = mtl.matrix_to_list( train_matrix) self.neg_dict, self.ranking_dict, self.test_dict = mtl.negdict_mat( original_matrix, test_matrix, num_neg=self.num_ranking_neg) # Negative Sampling on Lists print("Enter NegSa") start_time = time.time() self.train_uid, self.train_iid, self.train_labels = \ mtl.negative_sample_list(user_list=self.train_uid,item_list=self.train_iid,rating_list=self.train_labels, num_neg=self.num_neg,neg_val=0,neg_dict=self.neg_dict) print("Leaving NegSa") print("Negative Sampling Time: {0}".format(time.time() - start_time)) self.num_training = len(self.train_labels) self.num_batch = int(self.num_training / self.batch_size) print("Data Preparation Completed.")
def prepare_data(self, original_matrix, train_matrix, test_matrix): self.num_user, self.num_item = original_matrix.shape self.train_array = train_matrix.toarray() _, self.ranking_dict, self.test_dict = \ mtl.negdict_mat(original_matrix, test_matrix, mod = 'precision', random_state = 20) # self.negative_output_mask = mtl.neg_mask_array(original_matrix, train_matrix, num_neg=self.output_neg_num) self.num_training = self.train_array.shape[0] self.num_batch = int(self.num_training / self.batch_size) print("Data Preparation Completed.")
def prepare_data(self, original_matrix, train_matrix, test_matrix): self.num_user, self.num_item = original_matrix.shape # Negative Dicts for Ranking _, self.ranking_dict, self.test_dict = mtl.negdict_mat( original_matrix, test_matrix, num_neg=self.num_ranking_neg) # To Lists # Contain all the explicit and implicit ratings for training self.train_uid, self.train_iid, self.train_ratings = mtl.get_full_matrix( train_matrix, test_matrix) # The Cui and Pui parameters in the loss function self.weights_ui = [1 + self.alpha * ele for ele in self.train_ratings] self.indicators_ui = mtl.list_to_binary(self.train_ratings, 0) # This is for the ALS algorithm self.train_matrix_csr, self.train_matrix_csc = train_matrix.tocsr( ), train_matrix.tocsc() self.num_training = len(self.train_ratings) self.num_batch = int(self.num_training / self.batch_size) print("Data Preparation Completed.")
def prepare_data(self, original_matrix1, train_matrix1, test_matrix1, original_matrix2, train_matrix2, test_matrix2): # Meta Info self.num_user1, self.num_item1 = train_matrix1.shape self.num_user2, self.num_item2 = train_matrix2.shape self.neg_dict1, self.test_dict1 = mtl.negdict_mat( original_matrix1, test_matrix1, num_neg=self.num_ranking_list - 1) self.neg_dict2, self.test_dict2 = mtl.negdict_mat( original_matrix2, test_matrix2, num_neg=self.num_ranking_list - 1) self.train_uid1, self.train_iid1, self.train_labels1 = mtl.matrix_to_list( train_matrix1) self.train_uid2, self.train_iid2, self.train_labels2 = mtl.matrix_to_list( train_matrix2) # Extend the shorter training data length1, length2 = len(self.train_labels1), len(self.train_labels2) if length1 < length2: self.train_uid1, self.train_iid1, self.train_labels1 =\ mtl.data_upsample_list(self.train_uid1, self.train_iid1, self.train_labels1,num_ext=length2-length1) if length2 < length1: self.train_uid2, self.train_iid2, self.train_labels2 =\ mtl.data_upsample_list(self.train_uid2, self.train_iid2, self.train_labels2,num_ext=length1-length2) assert len(self.train_labels1) == len(self.train_labels2) # Negative Sampling on Lists print("Enter NegSa") start_time = time.time() results = mp.Pool(processes=2).map( gtl.mphelper, [(mtl.negative_sample_list, self.neg_dict1, self.train_uid1, self.train_iid1, self.train_labels1, self.num_neg, 0), (mtl.negative_sample_list, self.neg_dict2, self.train_uid2, self.train_iid2, self.train_labels2, self.num_neg, 0)]) self.train_uid1, self.train_iid1, self.train_labels1 = results[0] self.train_uid2, self.train_iid2, self.train_labels2 = results[1] # self.train_uid1, self.train_iid1, self.train_labels1 \ # = mtl.negative_sample_list(self.neg_dict1, self.train_uid1, self.train_iid1, self.train_labels1,num_neg=self.num_neg,neg_val=0) # # self.train_uid2, self.train_iid2, self.train_labels2 \ # = mtl.negative_sample_list(self.neg_dict2, self.train_uid2, self.train_iid2, self.train_labels2, num_neg=self.num_neg, neg_val=0) print("Leaving NegSa") print("Negative Sampling Time: {0}".format(time.time() - start_time)) assert len(self.train_labels1) == len(self.train_labels2) self.num_training = len(self.train_labels1) self.num_batch = int(self.num_training / self.batch_size) print("Data Preparation Completed.")
if __name__ == "__main__": args = parseArgs() num_epochs, batch_size, \ reg_embs, num_neg, lr, ndcgk, num_factors, num_ranking_list, num_test = \ args.epochs, args.batch_size,\ args.ebregs, args.num_neg, args.lr, args.ndcgk, args.nfactors, args.num_rk, args.ntest reg_embs = list(np.float32(eval(reg_embs))) # original_matrix, train_matrix, test_matrix, num_users, num_items \ # = mtl.load_as_matrix(datafile='Data/books_and_elecs_merged.csv') original_matrix \ = mtl.load_original_matrix(datafile='Data/ml-100k/u.data',header=['uid','iid','ratings','time'],sep='\t') original_matrix = mtl.matrix_to_binary(original_matrix, 0) train_matrix, test_matrix = mtl.matrix_split(original_matrix, opt='ranking', n_item_per_user=num_test) num_users, num_items = original_matrix.shape print("Number of users is {0}".format(num_users)) print("Number of items is {0}".format(num_items)) print("Number of ratings for all is {0}".format(original_matrix.nnz)) print("Number of ratings for training is {0}".format(train_matrix.nnz)) print("Ratings density for training is {0}".format( train_matrix.nnz / (num_users * num_items))) gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
default='adam', choices=('adam', 'adagrad', 'rmsprop', 'sgd'), help='Specify an optimizer: adagrad, adam, rmsprop, sgd') return parser.parse_args() if __name__ == "__main__": args = parseArgs() num_epochs, batch_size, regs, num_neg, alpha, lr, ndcgk, num_factors, num_ranking_list = \ args.epochs, args.batch_size, args.ebregs,args.num_neg,args.alpha,args.lr,args.ndcgk, args.nfactors, args.num_rk regs = list(np.float32(eval(regs))) original_matrix1, train_matrix1, test_matrix1, num_users1, num_items1\ = mtl.load_as_matrix(datafile='Data/books_small/original.csv') original_matrix2, train_matrix2, test_matrix2, num_users2, num_items2\ = mtl.load_as_matrix(datafile='Data/elec_small/original.csv') print("Number of users in domain 1 is {0}".format(num_users1)) print("Number of items in domain 1 is {0}".format(num_items1)) print("Number of ratings in domain 1 in all is {0}".format( original_matrix1.nnz)) print("Number of ratings in domain 1 for training is {0}".format( train_matrix1.nnz)) print("Ratings density of domain 1 for training is {0}".format( train_matrix1.nnz / (num_users1 * num_items1))) print("Number of users in domain 2 is {0}".format(num_users2)) print("Number of items in domain 2 is {0}".format(num_items2))
num_user, num_item = original_matrix.shape[0], original_matrix.shape[1] topK = [5, 10] total_hr, total_ndcg = np.zeros(len(topK)), np.zeros(len(topK)) tr_mat = train_matrix.transpose() item_pop_dict = {} item_pop_list = [] for item, ratings in enumerate(tr_mat.data): item_pop_dict[item] = len(ratings) item_pop_list.append(len(ratings)) item_pop_arr = np.asarray(item_pop_list) _, ranking_dict, test_dict = mtl.negdict_mat(original_matrix, test_matrix, num_neg=199, mod='others', random_state=10) for user in ranking_dict: if len(test_dict[user]) == 0: continue iid = ranking_dict[user] # The ranking item ids for user u rk = item_pop_arr[np.asarray(iid)] print(rk) hr, ndcg = evl.rankingMetrics(rk, iid, topK, test_dict[user], mod='hr') total_hr += hr total_ndcg += ndcg
if __name__ == "__main__": args = parseArgs() num_epochs, batch_size, \ regs, lr, ndcgk, num_factors, num_ranking_list, num_test = \ args.epochs, args.batch_size,\ args.regs, args.lr, args.ndcgk, args.nfactors, args.num_rk, args.ntest regs_ui = list(np.float32(eval(regs))) # original_matrix, train_matrix, test_matrix, num_users, num_items \ # = mtl.load_as_matrix(datafile='Data/books_and_elecs_merged.csv') original_matrix \ = mtl.load_original_matrix(datafile='Data/ml-1m/ratings.dat', header=['uid', 'iid', 'ratings', 'time'], sep='::') # original_matrix = mtl.matrix_theshold(original_matrix,threshold=2) original_matrix = mtl.matrix_to_binary(original_matrix, 0) # train_matrix, test_matrix = mtl.matrix_split(original_matrix,n_item_per_user=num_test) train_matrix, test_matrix = mtl.matrix_split(original_matrix, opt='prediction', mode='user', test_size=0.2, random_state=10) gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=8,