def sensitivity(train, validation, params): progress = WorkSplitter() progress.section("PMI-PLRec Default") RQ, Yt, _ = params['models']['NCE-PLRec'](train, embeded_matrix=np.empty((0)), iteration=params['iter'], rank=params['rank'], lam=params['lambda'], root=1.0) Y = Yt.T default_prediction = predict(matrix_U=RQ, matrix_V=Y, topK=params['topK'][-1], matrix_Train=train, gpu=True) default_result = evaluate(default_prediction, validation, params['metric'], params['topK']) print("-") print("Rank: {0}".format(params['rank'])) print("Lambda: {0}".format(params['lambda'])) print("SVD Iteration: {0}".format(params['iter'])) print("Evaluation Ranking Topk: {0}".format(params['topK'])) for key in default_result.keys(): print("{0} :{1}".format(key, default_result[key])) sensitivity_results = dict() for root in tqdm(params['root']): progress.section("PMI-PLRec, Root: " + str(root)) RQ, Yt, _ = params['models']['NCE-PLRec'](train, embeded_matrix=np.empty((0)), iteration=params['iter'], rank=params['rank'], lam=params['lambda'], root=root) Y = Yt.T prediction = predict(matrix_U=RQ, matrix_V=Y, topK=params['topK'][-1], matrix_Train=train, gpu=True) result = evaluate(prediction, validation, params['metric'], params['topK']) sensitivity_results[root] = result print("-") print("Root: {0}".format(root)) print("Rank: {0}".format(params['rank'])) print("Lambda: {0}".format(params['lambda'])) print("SVD Iteration: {0}".format(params['iter'])) print("Evaluation Ranking Topk: {0}".format(params['topK'])) for key in result.keys(): print("{0} :{1}".format(key, result[key])) return default_result, sensitivity_results
def train_model(self, matrix_train, matrix_valid, epoch, metric_names): user_item_matrix = lil_matrix(matrix_train) user_item_pairs = np.asarray(user_item_matrix.nonzero()).T # Training best_AUC, best_RQ, best_Y, best_uBias, best_iBias = 0, [], [], [], [] for i in tqdm(range(epoch)): batches = self.get_batches(user_item_pairs, matrix_train, self.batch_size) for step in range(len(batches)): reg_idx = self.compute_2i_regularization_id(batches[step][1], self.num_items) feed_dict = {self.user_idx: batches[step][0], self.item_idx: batches[step][1], self.label: batches[step][2], self.reg_idx: reg_idx } _ = self.sess.run([self.optimizer], feed_dict=feed_dict) RQ, Y, uBias, iBias = self.sess.run([self.user_embeddings, self.item_embeddings[0:self.num_items, :], self.user_bias_embeddings, self.item_bias_embeddings[0:self.num_items]]) prediction = predict(matrix_U=RQ, matrix_V=Y, matrix_Valid=matrix_valid, ubias=uBias, ibias=iBias, gpu=self.gpu_on) result = evaluate(prediction, matrix_valid, metric_names, gpu=self.gpu_on) if result['AUC'][0] > best_AUC: best_AUC = result['AUC'][0] best_RQ, best_Y, best_uBias, best_iBias = RQ, Y, uBias, iBias return best_RQ, best_Y, best_uBias, best_iBias
def train_model(self, rating_matrix, matrix_valid, invP, epoch, metric_names): user_item_matrix = lil_matrix(rating_matrix) user_item_pairs = np.asarray(user_item_matrix.nonzero()).T # Training best_AUC, best_RQ, best_Y, best_uBias, best_iBias = 0, [], [], [], [] for i in tqdm(range(epoch)): batches = self.get_batches(user_item_pairs, rating_matrix, invP, self.batch_size) for step in range(len(batches)): feed_dict = {self.user_idx: batches[step][0], self.item_idx: batches[step][1], self.label: batches[step][2], self.weight: batches[step][3] } _ = self.sess.run([self.optimizer], feed_dict=feed_dict) RQ, Y, uBias, iBias = self.sess.run( [self.user_embeddings, self.item_embeddings, self.user_bias_embeddings, self.item_bias_embeddings]) prediction = predict(matrix_U=RQ, matrix_V=Y, matrix_Valid=matrix_valid, ubias=uBias, ibias=iBias, gpu=self.gpu_on) result = evaluate(prediction, matrix_valid, metric_names, gpu=self.gpu_on) if result['AUC'][0] > best_AUC: best_AUC = result['AUC'][0] best_RQ, best_Y, best_uBias, best_iBias = RQ, Y, uBias, iBias return best_RQ, best_Y, best_uBias, best_iBias
def train_model(self, matrix_train, matrix_valid, epoch, metric_names): user_item_matrix = lil_matrix(matrix_train) user_item_pairs = np.asarray(user_item_matrix.nonzero()).T # Training best_AUC, best_RQ, best_Y, best_uBias, best_iBias, best_refined_label, best_prediction = 0, [], [], [], [], [], [] for i in tqdm(range(epoch)): batches = self.get_batches(user_item_pairs, matrix_train, self.batch_size) refined_label = [] for step in range(len(batches)): feed_dict = {self.user_idx: batches[step][0], self.item_idx: batches[step][1], self.label: batches[step][2] } _, temp_refined_label = self.sess.run([self.optimizer, self.refined_label], feed_dict=feed_dict) refined_label.append(np.stack((batches[step][0], batches[step][1], np.asarray(temp_refined_label)), axis=-1)) RQ, Y, uBias, iBias = self.sess.run( [self.norm_user_embeddings, self.norm_item_embeddings, self.norm_user_bias_embeddings, self.norm_item_bias_embeddings]) prediction = predict(matrix_U=RQ, matrix_V=Y, matrix_Valid=matrix_valid, ubias=uBias, ibias=iBias, gpu=self.gpu_on) result = evaluate(prediction, matrix_valid, metric_names, gpu=self.gpu_on) if result['AUC'][0] > best_AUC: best_AUC = result['AUC'][0] best_RQ, best_Y, best_uBias, best_iBias, best_refined_label, best_prediction = RQ, Y, uBias, iBias, np.vstack(refined_label), prediction return best_RQ, best_Y, best_uBias, best_iBias, best_refined_label, user_item_pairs, best_prediction
def train_model(self, rating_matrix, matrix_valid, epoch, metric_names): # Training best_AUC, best_RQ, best_X, best_xBias, best_Y, best_yBias = 0, [], [], [], [], [] for i in tqdm(range(epoch)): batches, sample_idx = self.get_batches(rating_matrix, self.batch_size) for step in range(len(batches)): feed_dict = { self.inputs: batches[step].todense(), self.sample_idx: sample_idx[step] } _ = self.sess.run([self.optimizer], feed_dict=feed_dict) RQ, X, xBias = self.get_RQ(rating_matrix) Y = self.get_Y() yBias = self.get_yBias() prediction = predict(matrix_U=RQ, matrix_V=Y.T, matrix_Valid=matrix_valid, bias=yBias, gpu=self.gpu_on) result = evaluate(prediction, matrix_valid, metric_names, gpu=self.gpu_on) if result['AUC'][0] > best_AUC: best_AUC = result['AUC'][0] best_RQ, best_X, best_xBias, best_Y, best_yBias = RQ, X, xBias, Y, yBias return best_RQ, best_X, best_xBias, best_Y, best_yBias
def train_model(self, rating_matrix, matrix_unif_train, matrix_valid, epoch, metric_names): user_item_matrix = lil_matrix(rating_matrix) user_item_pairs = np.asarray(user_item_matrix.nonzero()).T unif_user_item_matrix = lil_matrix(matrix_unif_train) unif_user_item_pairs = np.asarray(unif_user_item_matrix.nonzero()).T user_set, user_activity = np.unique(user_item_pairs[:, 0], return_counts=True) unif_user_set = np.unique(unif_user_item_pairs[:, 0]) unif_user_activity = user_activity[unif_user_set] sorted_idx = np.argsort(unif_user_activity) tail_users = unif_user_set[sorted_idx[0:int(len(unif_user_set) / 2)]] head_users = unif_user_set[sorted_idx[int(len(unif_user_set) / 2):]] item_set, item_popularity = np.unique(user_item_pairs[:, 1], return_counts=True) unif_item_set = np.unique(unif_user_item_pairs[:, 1]) unif_item_popularity = item_popularity[unif_item_set] sorted_idx = np.argsort(unif_item_popularity) tail_items = unif_item_set[sorted_idx[0:int(len(unif_item_set) / 2)]] head_items = unif_item_set[sorted_idx[int(len(unif_item_set) / 2):]] # Training best_AUC, best_RQ, best_Y, best_uBias, best_iBias = 0, [], [], [], [] for i in tqdm(range(epoch)): batches = self.get_batches(user_item_pairs, unif_user_item_pairs, matrix_unif_train, rating_matrix, self.batch_size, head_users, tail_users, head_items, tail_items) for step in range(len(batches)): feed_dict = { self.user_idx: batches[step][0], self.item_idx: batches[step][1], self.label: batches[step][2] } _, loss = self.sess.run([self.optimizer, self.loss], feed_dict=feed_dict) RQ, Y, uBias, iBias = self.sess.run([ self.user_embeddings, self.item_embeddings, self.user_bias_embeddings, self.item_bias_embeddings ]) prediction = predict(matrix_U=RQ, matrix_V=Y, matrix_Valid=matrix_valid, ubias=uBias, ibias=iBias, gpu=self.gpu_on) result = evaluate(prediction, matrix_valid, metric_names, gpu=self.gpu_on) if result['AUC'][0] > best_AUC: best_AUC = result['AUC'][0] best_RQ, best_Y, best_uBias, best_iBias = RQ, Y, uBias, iBias return best_RQ, best_Y, best_uBias, best_iBias
def execute(test, params, folder='latent'): df = pd.DataFrame(columns=['model', 'way']) if params['model'] in ['DeepAutoRec', 'HintAE', 'SoftLabelAE']: if params['way'] is not None: RQ = np.load('{3}/{2}_U_{0}_{1}.npy'.format( params['model'], params['rank'], params['way'], folder)) Y = np.load('{3}/{2}_K_{0}_{1}.npy'.format(params['model'], params['rank'], params['way'], folder)) Bias = np.load('{3}/{2}_kB_{0}_{1}.npy'.format( params['model'], params['rank'], params['way'], folder)) else: RQ = np.load('{2}/U_{0}_{1}.npy'.format(params['model'], params['rank'], folder)) Y = np.load('{2}/K_{0}_{1}.npy'.format(params['model'], params['rank'], folder)) Bias = np.load('{2}/kB_{0}_{1}.npy'.format(params['model'], params['rank'], folder)) else: if params['way'] is not None: RQ = np.load('{3}/{2}_U_{0}_{1}.npy'.format( params['model'], params['rank'], params['way'], folder)) Y = np.load('{3}/{2}_Y_{0}_{1}.npy'.format(params['model'], params['rank'], params['way'], folder)) Bias = np.load('{3}/{2}_yB_{0}_{1}.npy'.format( params['model'], params['rank'], params['way'], folder)) else: RQ = np.load('{2}/U_{0}_{1}.npy'.format(params['model'], params['rank'], folder)) Y = np.load('{2}/Y_{0}_{1}.npy'.format(params['model'], params['rank'], folder)) Bias = np.load('{2}/yB_{0}_{1}.npy'.format(params['model'], params['rank'], folder)) prediction = predict(matrix_U=RQ, matrix_V=Y.T, matrix_Valid=test, bias=Bias) result = evaluate(prediction, test, params['metric']) result_dict = {'model': params['model'], 'way': params['way']} for name in result.keys(): result_dict[name] = round(result[name][0], 8) df = df.append(result_dict, ignore_index=True) return df
def main(args): # Progress bar progress = WorkSplitter() # Show hyper parameter settings progress.section("Parameter Setting") print("Data Path: {}".format(args.path)) print("Train File Name: {}".format(args.train)) if args.validation: print("Valid File Name: {}".format(args.valid)) print("Algorithm: {}".format(args.model)) print("Lambda Diversity: {}".format(args.lambda_diversity)) print("Lambda Serendipity: {}".format(args.lambda_serendipity)) print("Nearest Neighbor Number: {}".format(args.k)) print("Evaluation Ranking Topk: {}".format(args.topk)) # Load Data progress.section("Loading Data") start_time = time.time() R_train = load_numpy(path=args.path, name=args.train) print("Elapsed: {}".format(inhour(time.time() - start_time))) print("Train U-I Dimensions: {}".format(R_train.shape)) progress.section("Train") model = models[args.model]() model.train(R_train) progress.section("Predict") prediction_score = model.predict( R_train, k=args.k, lambda_diversity=args.lambda_diversity, lambda_serendipity=args.lambda_serendipity) prediction = predict(prediction_score=prediction_score, topK=args.topk, matrix_Train=R_train) if args.validation: progress.section("Create Metrics") start_time = time.time() metric_names = [ 'R-Precision', 'NDCG', 'Clicks', 'Recall', 'Precision', 'MAP' ] R_valid = load_numpy(path=args.path, name=args.valid) result = evaluate(prediction, R_valid, metric_names, [args.topk]) print("-") for metric in result.keys(): print("{}:{}".format(metric, result[metric])) print("Elapsed: {}".format(inhour(time.time() - start_time)))
def simpleKNNPrediction(similarityMatrix, predictionMatrix, kValue, validOrTestMatrix): "Declaration for kRange = range(50,120,10)" similarity = train(similarityMatrix) user_item_prediction_score = predict(predictionMatrix, kValue, similarity, item_similarity_en=False) user_item_predict = prediction(user_item_prediction_score, 50, predictionMatrix) user_item_res = evaluate(user_item_predict, validOrTestMatrix) MAP10 = user_item_res.get('MAP@10')[0] return MAP10
def hyper_parameter_tuning(train, validation, params, save_path): progress = WorkSplitter() table_path = load_yaml('config/global.yml', key='path')['tables'] try: df = load_dataframe_csv(table_path, save_path) except: df = pd.DataFrame(columns=['model', 'k', 'topK']) num_user = train.shape[0] for algorithm in params['models']: for k in params['k']: if ((df['model'] == algorithm) & (df['k'] == k)).any(): continue format = "model: {}, k: {}" progress.section(format.format(algorithm, k)) progress.subsection("Training") model = params['models'][algorithm]() model.train(train) progress.subsection("Prediction") prediction_score = model.predict(train, k=k) prediction = predict(prediction_score=prediction_score, topK=params['topK'][-1], matrix_Train=train) progress.subsection("Evaluation") result = evaluate(prediction, validation, params['metric'], params['topK']) result_dict = {'model': algorithm, 'k': k} for name in result.keys(): result_dict[name] = [ round(result[name][0], 4), round(result[name][1], 4) ] df = df.append(result_dict, ignore_index=True) save_dataframe_csv(df, table_path, save_path)
def execute(train, test, params, model, gpu_on=True, analytical=False): progress = WorkSplitter() columns = ['model', 'rank', 'lambda', 'epoch', 'corruption', 'topK'] progress.section("\n".join( [":".join((str(k), str(params[k]))) for k in columns])) df = pd.DataFrame(columns=columns) progress.subsection("Train") RQ, Yt, Bias = model(train, epoch=params['epoch'], lamb=params['lambda'], rank=params['rank'], corruption=params['corruption']) Y = Yt.T progress.subsection("Prediction") prediction = predict(matrix_U=RQ, matrix_V=Y, bias=Bias, topK=params['topK'][-1], matrix_Train=train, gpu=gpu_on) progress.subsection("Evaluation") result = evaluate(prediction, test, params['metric'], params['topK'], analytical=analytical) if analytical: return result else: result_dict = params for name in result.keys(): result_dict[name] = [ round(result[name][0], 4), round(result[name][1], 4) ] df = df.append(result_dict, ignore_index=True) return df
def lookup(train, validation, params, measure='Cosine', gpu_on=True): progress = WorkSplitter() df = pd.DataFrame(columns=['model']) num_user = train.shape[0] for algorithm in params['models']: RQ = np.load('latent/U_{0}_{1}.npy'.format(algorithm, params['rank'])) Y = np.load('latent/V_{0}_{1}.npy'.format(algorithm, params['rank'])) if os.path.isfile('latent/B_{0}_{1}.npy'.format( algorithm, params['rank'])): Bias = np.load('latent/B_{0}_{1}.npy'.format( algorithm, params['rank'])) else: Bias = None progress.subsection("Prediction") prediction = predict(matrix_U=RQ, matrix_V=Y, measure=measure, bias=Bias, topK=params['topK'][-1], matrix_Train=train, gpu=gpu_on) progress.subsection("Evaluation") result = evaluate(prediction, validation, params['metric'], params['topK']) result_dict = {'model': algorithm} for name in result.keys(): result_dict[name] = [ round(result[name][0], 4), round(result[name][1], 4) ] df = df.append(result_dict, ignore_index=True) return df
def execute(train, test, params, model, analytical=False): progress = WorkSplitter() columns = ['model', 'k', 'topK'] progress.section("\n".join( [":".join((str(k), str(params[k]))) for k in columns])) df = pd.DataFrame(columns=columns) progress.subsection("Train") model = model() model.train(train) progress.subsection("Prediction") prediction_score = model.predict(train, k=params['k']) prediction = predict(prediction_score=prediction_score, topK=params['topK'][-1], matrix_Train=train) progress.subsection("Evaluation") result = evaluate(prediction, test, params['metric'], params['topK'], analytical=analytical) if analytical: return result else: result_dict = params for name in result.keys(): result_dict[name] = [ round(result[name][0], 4), round(result[name][1], 4) ] df = df.append(result_dict, ignore_index=True) return df
def execute(test, params, folder='latent'): RQ, Y, uBias, iBias = None, None, None, None df = pd.DataFrame(columns=['model', 'way']) if params['way'] is not None: RQ = np.load('{3}/{2}_U_{0}_{1}.npy'.format(params['model'], params['rank'], params['way'], folder)) Y = np.load('{3}/{2}_V_{0}_{1}.npy'.format(params['model'], params['rank'], params['way'], folder)) uBias = np.load('{3}/{2}_uB_{0}_{1}.npy'.format( params['model'], params['rank'], params['way'], folder)) iBias = np.load('{3}/{2}_iB_{0}_{1}.npy'.format( params['model'], params['rank'], params['way'], folder)) else: RQ = np.load('{2}/U_{0}_{1}.npy'.format(params['model'], params['rank'], folder)) Y = np.load('{2}/V_{0}_{1}.npy'.format(params['model'], params['rank'], folder)) uBias = np.load('{2}/uB_{0}_{1}.npy'.format(params['model'], params['rank'], folder)) iBias = np.load('{2}/iB_{0}_{1}.npy'.format(params['model'], params['rank'], folder)) prediction = predict(matrix_U=RQ, matrix_V=Y, matrix_Valid=test, ubias=uBias, ibias=iBias) result = evaluate(prediction, test, params['metric']) result_dict = {'model': params['model'], 'way': params['way']} for name in result.keys(): result_dict[name] = round(result[name][0], 8) df = df.append(result_dict, ignore_index=True) return df
def hyper_parameter_tuning(train, validation, params, save_path, gpu_on=True): progress = WorkSplitter() table_path = load_yaml('config/global.yml', key='path')['tables'] try: df = load_dataframe_csv(table_path, save_path) except: df = pd.DataFrame( columns=['model', 'rank', 'lambda', 'epoch', 'corruption', 'topK']) num_user = train.shape[0] for algorithm in params['models']: for rank in params['rank']: for lamb in params['lambda']: for corruption in params['corruption']: if ((df['model'] == algorithm) & (df['rank'] == rank) & (df['lambda'] == lamb) & (df['corruption'] == corruption)).any(): continue format = "model: {}, rank: {}, lambda: {}, corruption: {}" progress.section( format.format(algorithm, rank, lamb, corruption)) RQ, Yt, Bias = params['models'][algorithm]( train, epoch=params['epoch'], lamb=lamb, rank=rank, corruption=corruption) Y = Yt.T progress.subsection("Prediction") prediction = predict(matrix_U=RQ, matrix_V=Y, bias=Bias, topK=params['topK'][-1], matrix_Train=train, gpu=gpu_on) progress.subsection("Evaluation") result = evaluate(prediction, validation, params['metric'], params['topK']) result_dict = { 'model': algorithm, 'rank': rank, 'lambda': lamb, 'epoch': params['epoch'], 'corruption': corruption } for name in result.keys(): result_dict[name] = [ round(result[name][0], 4), round(result[name][1], 4) ] df = df.append(result_dict, ignore_index=True) save_dataframe_csv(df, table_path, save_path)
def computeUUCombination(rtrain, rtrain_userAvg, userVisitMatrix, rtrain_implicit, combinationDict, SimilarityMatrixIndex, kTune, method='max'): prediction1 = {} prediction2 = {} prediction3 = {} prediction4 = {} for combination, indexList in combinationDict.items(): #Loop through the similarity matrices for index in SimilarityMatrixIndex.keys(): if index in indexList: if index == 1: similarityOne = SimilarityMatrixIndex[1][0] elif index == 2: similarityTwo = SimilarityMatrixIndex[2][0] elif index == 3: similarityThree = SimilarityMatrixIndex[3][0] elif index == 4: similarityFour = SimilarityMatrixIndex[4][0] else: if index == 1: similarityOne = SimilarityMatrixIndex[1][1] elif index == 2: similarityTwo = SimilarityMatrixIndex[2][1] elif index == 3: similarityThree = SimilarityMatrixIndex[3][1] elif index == 4: similarityFour = SimilarityMatrixIndex[4][1] user_item_prediction_score1 = predictUU(rtrain, kTune, similarityOne, similarityTwo, similarityThree, similarityFour, chooseWeigthMethod=method, item_similarity_en=False) user_item_predict1 = prediction(user_item_prediction_score1, 50, rtrain) user_item_res1 = evaluate(user_item_predict1, rvalid) prediction1[combination] = user_item_res1.get('MAP@10')[0] user_item_prediction_score2 = predictUU(rtrain_userAvg, kTune, similarityOne, similarityTwo, similarityThree, similarityFour, chooseWeigthMethod=method, item_similarity_en=False) user_item_predict2 = prediction(user_item_prediction_score2, 50, rtrain_userAvg) user_item_res2 = evaluate(user_item_predict2, rvalid_userAvg) prediction2[combination] = user_item_res2.get('MAP@10')[0] user_item_prediction_score3 = predictUU(userVisitMatrix, kTune, similarityOne, similarityTwo, similarityThree, similarityFour, chooseWeigthMethod=method, item_similarity_en=False) user_item_predict3 = prediction(user_item_prediction_score3, 50, userVisitMatrix) user_item_res3 = evaluate(user_item_predict3, rvalid_implicit) prediction3[combination] = user_item_res3.get('MAP@10')[0] user_item_prediction_score4 = predictUU(rtrain_implicit, kTune, similarityOne, similarityTwo, similarityThree, similarityFour, chooseWeigthMethod=method, item_similarity_en=False) user_item_predict4 = prediction(user_item_prediction_score4, 50, rtrain_implicit) user_item_res4 = evaluate(user_item_predict4, rvalid_implicit) prediction4[combination] = user_item_res4.get('MAP@10')[0] plotingCombination(prediction1, prediction2, prediction3, prediction4, kTune, method)
def hyper_parameter_tuning(train, validation, params, unif_train, save_path, seed, way, dataset, gpu_on): progress = WorkSplitter() table_path = 'tables/' data_name = save_path.split('/')[0] save_dir = 'tables/' + data_name + '/' if not os.path.exists(save_dir): os.makedirs(save_dir) for algorithm in params['models']: if algorithm in ['BiasedMF', 'PropensityMF']: df = pd.DataFrame(columns=['model', 'batch_size', 'lambda', 'iter']) for batch_size in params['batch_size']: for lam in params['lambda']: format = "model: {0}, batch_size: {1}, lambda: {2}" progress.section(format.format(algorithm, batch_size, lam)) RQ, Y, uBias, iBias = params['models'][algorithm](train, validation, matrix_unif_train=unif_train, iteration=params['iter'], rank=params['rank'], gpu_on=gpu_on, lam=lam, seed=seed, batch_size=batch_size, way=way, dataset=dataset) progress.subsection("Prediction") prediction = predict(matrix_U=RQ, matrix_V=Y, matrix_Valid=validation, ubias=uBias, ibias=iBias, gpu=gpu_on) progress.subsection("Evaluation") result = evaluate(prediction, validation, params['metric'], gpu=gpu_on) result_dict = {'model': algorithm, 'batch_size': batch_size, 'lambda': lam, 'iter': params['iter']} for name in result.keys(): result_dict[name] = round(result[name][0], 8) df = df.append(result_dict, ignore_index=True) save_dataframe_csv(df, table_path, save_path) elif algorithm in ['InitFeatureEmbedMF', 'AlterFeatureEmbedMF', 'WRSampleMF']: df = pd.DataFrame(columns=['model', 'lambda', 'iter']) for lam in params['lambda']: format = "model: {0}, lambda: {1}" progress.section(format.format(algorithm, lam)) RQ, Y, uBias, iBias = params['models'][algorithm](train, validation, matrix_unif_train=unif_train, iteration=params['iter'], rank=params['rank'], gpu_on=gpu_on, lam=lam, seed=seed, batch_size=params['batch_size'], way=way, dataset=dataset) progress.subsection("Prediction") prediction = predict(matrix_U=RQ, matrix_V=Y, matrix_Valid=validation, ubias=uBias, ibias=iBias, gpu=gpu_on) progress.subsection("Evaluation") result = evaluate(prediction, validation, params['metric'], gpu=gpu_on) result_dict = {'model': algorithm, 'lambda': lam, 'iter': params['iter']} for name in result.keys(): result_dict[name] = round(result[name][0], 8) df = df.append(result_dict, ignore_index=True) save_dataframe_csv(df, table_path, save_path) elif algorithm in ['CausalSampleMF', 'BridgeLabelMF']: df = pd.DataFrame(columns=['model', 'lambda', 'lambda2', 'iter']) for lam in params['lambda']: for lam2 in params['lambda2']: format = "model: {0}, lambda: {1}, lambda2: {2}" progress.section(format.format(algorithm, lam, lam2)) RQ, Y, uBias, iBias = params['models'][algorithm](train, validation, matrix_unif_train=unif_train, iteration=params['iter'], rank=params['rank'], gpu_on=gpu_on, lam=lam, lam2=lam2, seed=seed, batch_size=params['batch_size'], way=way, dataset=dataset) progress.subsection("Prediction") prediction = predict(matrix_U=RQ, matrix_V=Y, matrix_Valid=validation, ubias=uBias, ibias=iBias, gpu=gpu_on) progress.subsection("Evaluation") result = evaluate(prediction, validation, params['metric'], gpu=gpu_on) result_dict = {'model': algorithm, 'lambda': lam, 'lambda2': lam2, 'iter': params['iter']} for name in result.keys(): result_dict[name] = round(result[name][0], 8) df = df.append(result_dict, ignore_index=True) save_dataframe_csv(df, table_path, save_path) elif algorithm in ['UnionSampleMF', 'RefineLabelMF']: df = pd.DataFrame(columns=['model', 'confidence', 'iter']) for conf in params['confidence']: format = "model: {0}, confidence: {1}" progress.section(format.format(algorithm, conf)) RQ, Y, uBias, iBias = params['models'][algorithm](train, validation, matrix_unif_train=unif_train, iteration=params['iter'], rank=params['rank'], gpu_on=gpu_on, lam=params['lambda'], seed=seed, batch_size=params['batch_size'], way=way, confidence=conf, dataset=dataset) progress.subsection("Prediction") prediction = predict(matrix_U=RQ, matrix_V=Y, matrix_Valid=validation, ubias=uBias, ibias=iBias, gpu=gpu_on) progress.subsection("Evaluation") result = evaluate(prediction, validation, params['metric'], gpu=gpu_on) result_dict = {'model': algorithm, 'confidence': conf, 'iter': params['iter']} for name in result.keys(): result_dict[name] = round(result[name][0], 8) df = df.append(result_dict, ignore_index=True) save_dataframe_csv(df, table_path, save_path) elif algorithm in ['BatchSampleMF']: df = pd.DataFrame(columns=['model', 'step', 'iter']) for step in params['step']: format = "model: {0}, step: {1}" progress.section(format.format(algorithm, step)) RQ, Y, uBias, iBias = params['models'][algorithm](train, validation, matrix_unif_train=unif_train, iteration=params['iter'], rank=params['rank'], gpu_on=gpu_on, lam=params['lambda'], seed=seed, batch_size=params['batch_size'], way=way, step=step, dataset=dataset) progress.subsection("Prediction") prediction = predict(matrix_U=RQ, matrix_V=Y, matrix_Valid=validation, ubias=uBias, ibias=iBias, gpu=gpu_on) progress.subsection("Evaluation") result = evaluate(prediction, validation, params['metric'], gpu=gpu_on) result_dict = {'model': algorithm, 'step': step, 'iter': params['iter']} for name in result.keys(): result_dict[name] = round(result[name][0], 8) df = df.append(result_dict, ignore_index=True) save_dataframe_csv(df, table_path, save_path)
def main(args): # Progress bar progress = WorkSplitter() # Show hyper parameter settings progress.section("Parameter Setting") print("Data Path: {0}".format(args.path)) print("Train File Name: {0}".format(args.train)) if args.validation: print("Valid File Name: {0}".format(args.valid)) print("Algorithm: {0}".format(args.model)) if args.item == True: mode = "Item-based" else: mode = "User-based" print("Mode: {0}".format(mode)) print("Alpha: {0}".format(args.alpha)) print("Rank: {0}".format(args.rank)) print("Lambda: {0}".format(args.lamb)) print("SVD/Alter Iteration: {0}".format(args.iter)) print("Evaluation Ranking Topk: {0}".format(args.topk)) # Load Data progress.section("Loading Data") start_time = time.time() if args.shape is None: R_train = load_numpy(path=args.path, name=args.train) else: # R_train = load_pandas(path=args.path, name=args.train, shape=args.shape) R_train = load_csv(path=args.path, name=args.train, shape=args.shape) print "Elapsed: {0}".format(inhour(time.time() - start_time)) print("Train U-I Dimensions: {0}".format(R_train.shape)) # Item-Item or User-User if args.item == True: RQ, Yt, Bias = models[args.model](R_train, embeded_matrix=np.empty((0)), iteration=args.iter, rank=args.rank, corruption=args.corruption, lam=args.lamb, alpha=args.alpha, seed=args.seed, root=args.root) Y = Yt.T else: Y, RQt, Bias = models[args.model](R_train.T, embeded_matrix=np.empty((0)), iteration=args.iter, rank=args.rank, corruption=args.corruption, lam=args.lamb, alpha=args.alpha, seed=args.seed, root=args.root) RQ = RQt.T # Save Files # progress.section("Save U-V Matrix") # start_time = time.time() # save_mxnet(matrix=RQ, path=args.path+mode+'/', # name='U_{0}_{1}_{2}'.format(args.rank, args.lamb, args.model)) # save_mxnet(matrix=Y, path=args.path+mode+'/', # name='V_{0}_{1}_{2}'.format(args.rank, args.lamb, args.model)) # print "Elapsed: {0}".format(inhour(time.time() - start_time)) np.save('latent/U_{0}_{1}'.format(args.model, args.rank), RQ) np.save('latent/V_{0}_{1}'.format(args.model, args.rank), Y) if Bias is not None: np.save('latent/B_{0}_{1}'.format(args.model, args.rank), Bias) progress.section("Predict") prediction = predict(matrix_U=RQ, matrix_V=Y, bias=Bias, topK=args.topk, matrix_Train=R_train, measure=args.sim_measure, gpu=True) if args.validation: progress.section("Create Metrics") start_time = time.time() metric_names = ['R-Precision', 'NDCG', 'Clicks', 'Recall', 'Precision'] R_valid = load_numpy(path=args.path, name=args.valid) result = evaluate(prediction, R_valid, metric_names, [args.topk]) print("-") for metric in result.keys(): print("{0}:{1}".format(metric, result[metric])) print "Elapsed: {0}".format(inhour(time.time() - start_time))
def usercategory(Rtrain, Rvalid, df_input, topK, metric, problem, model_folder, gpu_on=True): user_observation_counts = np.array(np.sum(Rtrain, axis=1)).flatten() user_observation_counts = user_observation_counts[ np.array(np.sum(Rvalid, axis=1)).flatten() != 0] index = None evaluated_metrics = None medians = [] giant_dataframes = [] for idx, row in df_input.iterrows(): row = row.to_dict() RQ = np.load('{2}/U_{0}_{1}.npy'.format(row['model'], row['rank'], model_folder)) Y = np.load('{2}/V_{0}_{1}.npy'.format(row['model'], row['rank'], model_folder)) if os.path.isfile('{2}/B_{0}_{1}.npy'.format(row['model'], row['rank'], model_folder)): Bias = np.load('{2}/B_{0}_{1}.npy'.format(row['model'], row['rank'], model_folder)) else: Bias = None prediction = predict(matrix_U=RQ, matrix_V=Y, bias=Bias, topK=topK[-1], matrix_Train=Rtrain, measure=row['similarity'], gpu=gpu_on) result = evaluate(prediction, Rvalid, metric, topK, analytical=True) df = pd.DataFrame(result) df['model'] = row['model'] df['user_count'] = user_observation_counts giant_dataframes.append(df) if evaluated_metrics is None: evaluated_metrics = result.keys() giant_df = pd.concat(giant_dataframes) giant_df['group'] = getGroup(giant_df['user_count'].values) giant_df = giant_df.sort_values('group', ascending=True).reset_index(drop=True) for metric in evaluated_metrics: pandas_bar_plot(x='group', y=metric, hue='model', x_name='User Category', y_name=metric, df=giant_df, folder='analysis/{0}/numofrating'.format(problem), name=metric)
def main(args): # Progress bar progress = WorkSplitter() # Show hyper parameter settings progress.section("Parameter Setting") print("Data Path: {0}".format(args.path)) print("Train File Name: {0}".format(args.dataset + args.train)) print("Uniform Train File Name: {0}".format(args.dataset + args.unif_train)) print("Valid File Name: {0}".format(args.dataset + args.valid)) print("Algorithm: {0}".format(args.model)) print("Way: {0}".format(args.way)) print("Seed: {0}".format(args.seed)) print("Batch Size: {0}".format(args.batch_size)) print("Rank: {0}".format(args.rank)) print("Lambda: {0}".format(args.lamb)) print("Iteration: {0}".format(args.iter)) # Load Data progress.section("Loading Data") start_time = time.time() train = load_numpy(path=args.path, name=args.dataset + args.train) print("Elapsed: {0}".format(inhour(time.time() - start_time))) print("Train U-I Dimensions: {0}".format(train.shape)) # Train Model valid = load_numpy(path=args.path, name=args.dataset + args.valid) unif_train = load_numpy(path=args.path, name=args.dataset + args.unif_train) if args.model in ['DeepAutoRec', 'HintAE', 'SoftLabelAE']: RQ, X, xBias, Y, yBias, Z, zBias, K, kBias = models[args.model]( train, valid, dataset=args.dataset, matrix_unif_train=unif_train, iteration=args.iter, rank=args.rank, rank2=args.rank2, gpu_on=args.gpu, lam=args.lamb, seed=args.seed, batch_size=args.batch_size, way=args.way, confidence=args.confidence, step=args.step, tau=args.tau) save_path = 'latent/' + args.dataset if not os.path.exists(save_path): os.makedirs(save_path) if args.way is None: np.save(save_path + '/U_{0}_{1}'.format(args.model, args.rank), RQ) np.save(save_path + '/Y_{0}_{1}'.format(args.model, args.rank), Y) np.save(save_path + '/X_{0}_{1}'.format(args.model, args.rank), X) np.save(save_path + '/Z_{0}_{1}'.format(args.model, args.rank), Z) np.save(save_path + '/K_{0}_{1}'.format(args.model, args.rank), K) if xBias is not None: np.save( save_path + '/xB_{0}_{1}'.format(args.model, args.rank), xBias) np.save( save_path + '/yB_{0}_{1}'.format(args.model, args.rank), yBias) np.save( save_path + '/zB_{0}_{1}'.format(args.model, args.rank), zBias) np.save( save_path + '/kB_{0}_{1}'.format(args.model, args.rank), kBias) else: np.save( save_path + '/' + args.way + '_U_{0}_{1}'.format(args.model, args.rank), RQ) np.save( save_path + '/' + args.way + '_Y_{0}_{1}'.format(args.model, args.rank), Y) np.save( save_path + '/' + args.way + '_X_{0}_{1}'.format(args.model, args.rank), X) np.save( save_path + '/' + args.way + '_Z_{0}_{1}'.format(args.model, args.rank), Z) np.save( save_path + '/' + args.way + '_K_{0}_{1}'.format(args.model, args.rank), K) if xBias is not None: np.save( save_path + '/' + args.way + '_xB_{0}_{1}'.format(args.model, args.rank), xBias) np.save( save_path + '/' + args.way + '_yB_{0}_{1}'.format(args.model, args.rank), yBias) np.save( save_path + '/' + args.way + '_zB_{0}_{1}'.format(args.model, args.rank), zBias) np.save( save_path + '/' + args.way + '_kB_{0}_{1}'.format(args.model, args.rank), kBias) progress.section("Predict") prediction = predict(matrix_U=RQ, matrix_V=K.T, matrix_Valid=valid, bias=yBias, gpu=args.gpu) progress.section("Evaluation") start_time = time.time() metric_names = ['NLL', 'AUC'] result = evaluate(prediction, valid, metric_names, gpu=args.gpu) print("----Final Result----") for metric in result.keys(): print("{0}:{1}".format(metric, result[metric])) print("Elapsed: {0}".format(inhour(time.time() - start_time))) else: RQ, X, xBias, Y, yBias = models[args.model]( train, valid, dataset=args.dataset, matrix_unif_train=unif_train, iteration=args.iter, rank=args.rank, gpu_on=args.gpu, lam=args.lamb, lam2=args.lamb2, seed=args.seed, batch_size=args.batch_size, way=args.way, confidence=args.confidence, step=args.step) save_path = 'latent/' + args.dataset if not os.path.exists(save_path): os.makedirs(save_path) if args.way is None: np.save(save_path + '/U_{0}_{1}'.format(args.model, args.rank), RQ) np.save(save_path + '/Y_{0}_{1}'.format(args.model, args.rank), Y) np.save(save_path + '/X_{0}_{1}'.format(args.model, args.rank), X) if xBias is not None: np.save( save_path + '/xB_{0}_{1}'.format(args.model, args.rank), xBias) np.save( save_path + '/yB_{0}_{1}'.format(args.model, args.rank), yBias) else: np.save( save_path + '/' + args.way + '_U_{0}_{1}'.format(args.model, args.rank), RQ) np.save( save_path + '/' + args.way + '_Y_{0}_{1}'.format(args.model, args.rank), Y) np.save( save_path + '/' + args.way + '_X_{0}_{1}'.format(args.model, args.rank), X) if xBias is not None: np.save( save_path + '/' + args.way + '_xB_{0}_{1}'.format(args.model, args.rank), xBias) np.save( save_path + '/' + args.way + '_yB_{0}_{1}'.format(args.model, args.rank), yBias) progress.section("Predict") prediction = predict(matrix_U=RQ, matrix_V=Y.T, matrix_Valid=valid, bias=yBias, gpu=args.gpu) progress.section("Evaluation") start_time = time.time() metric_names = ['NLL', 'AUC'] result = evaluate(prediction, valid, metric_names, gpu=args.gpu) print("----Final Result----") for metric in result.keys(): print("{0}:{1}".format(metric, result[metric])) print("Elapsed: {0}".format(inhour(time.time() - start_time)))
def hyper_parameter_tuning(train, validation, params, save_path, measure='Cosine', gpu_on=True): progress = WorkSplitter() table_path = load_yaml('config/global.yml', key='path')['tables'] try: df = load_dataframe_csv(table_path, save_path) except: df = pd.DataFrame(columns=['model', 'similarity', 'alpha', 'batch_size', 'corruption', 'epoch', 'iteration', 'key_dimension', 'lambda', 'learning_rate', 'mode_dimension', 'normalize', 'rank', 'root', 'topK']) num_user = train.shape[0] for algorithm in params['models']: for alpha in params['alpha']: for batch_size in params['batch_size']: for corruption in params['corruption']: for epoch in params['epoch']: for iteration in params['iteration']: for key_dim in params['key_dimension']: for lamb in params['lambda']: for learning_rate in params['learning_rate']: for mode_dim in params['mode_dimension']: for rank in params['rank']: for root in params['root']: if ((df['model'] == algorithm) & (df['alpha'] == alpha) & (df['batch_size'] == batch_size) & (df['corruption'] == corruption) & (df['epoch'] == epoch) & (df['iteration'] == iteration) & (df['key_dimension'] == key_dim) & (df['lambda'] == lamb) & (df['learning_rate'] == learning_rate) & (df['mode_dimension'] == mode_dim) & (df['rank'] == rank) & (df['root'] == root)).any(): continue format = "model: {}, alpha: {}, batch_size: {}, corruption: {}, epoch: {}, iteration: {}, \ key_dimension: {}, lambda: {}, learning_rate: {}, mode_dimension: {}, rank: {}, root: {}" progress.section(format.format(algorithm, alpha, batch_size, corruption, epoch, iteration, key_dim, lamb, learning_rate, mode_dim, rank, root)) RQ, Yt, Bias = params['models'][algorithm](train, embedded_matrix=np.empty((0)), mode_dim=mode_dim, key_dim=key_dim, batch_size=batch_size, learning_rate=learning_rate, iteration=iteration, epoch=epoch, rank=rank, corruption=corruption, gpu_on=gpu_on, lamb=lamb, alpha=alpha, root=root) Y = Yt.T progress.subsection("Prediction") prediction = predict(matrix_U=RQ, matrix_V=Y, bias=Bias, topK=params['topK'][-1], matrix_Train=train, measure=measure, gpu=gpu_on) progress.subsection("Evaluation") result = evaluate(prediction, validation, params['metric'], params['topK']) result_dict = {'model': algorithm, 'alpha': alpha, 'batch_size': batch_size, 'corruption': corruption, 'epoch': epoch, 'iteration': iteration, 'key_dimension': key_dim, 'lambda': lamb, 'learning_rate': learning_rate, 'mode_dimension': mode_dim, 'rank': rank, 'similarity': params['similarity'], 'root': root} for name in result.keys(): result_dict[name] = [round(result[name][0], 4), round(result[name][1], 4)] df = df.append(result_dict, ignore_index=True) save_dataframe_csv(df, table_path, save_path)
def converge(Rtrain, Rtest, df, table_path, file_name, epochs=10, gpu_on=True): progress = WorkSplitter() m, n = Rtrain.shape valid_models = autoencoders.keys() results = pd.DataFrame( columns=['model', 'rank', 'lambda', 'epoch', 'optimizer']) for run in range(3): for idx, row in df.iterrows(): row = row.to_dict() if row['model'] not in valid_models: continue progress.section(json.dumps(row)) row['metric'] = ['NDCG', 'R-Precision'] row['topK'] = [50] if 'optimizer' not in row.keys(): row['optimizer'] = 'RMSProp' try: model = autoencoders[row['model']]( n, row['rank'], batch_size=100, lamb=row['lambda'], optimizer=Regularizer[row['optimizer']]) except: model = autoencoders[row['model']]( m, n, row['rank'], batch_size=100, lamb=row['lambda'], optimizer=Regularizer[row['optimizer']]) batches = model.get_batches(Rtrain, 100) epoch_batch = 50 for i in range(epochs // epoch_batch): model.train_model(Rtrain, corruption=row['corruption'], epoch=epoch_batch, batches=batches) RQ = model.get_RQ(Rtrain) Y = model.get_Y() Bias = model.get_Bias() Y = Y.T prediction = predict(matrix_U=RQ, matrix_V=Y, bias=Bias, topK=row['topK'][0], matrix_Train=Rtrain, measure='Cosine', gpu=gpu_on) result = evaluate(prediction, Rtest, row['metric'], row['topK']) # Note Finished yet result_dict = { 'model': row['model'], 'rank': row['rank'], 'lambda': row['lambda'], 'optimizer': row['optimizer'], 'epoch': (i + 1) * epoch_batch } for name in result.keys(): result_dict[name] = round(result[name][0], 4) results = results.append(result_dict, ignore_index=True) model.sess.close() tf.reset_default_graph() save_dataframe_csv(results, table_path, file_name) return results
def hyper_parameter_tuning(train, validation, params, save_path, measure='Cosine', gpu_on=True): progress = WorkSplitter() table_path = load_yaml('config/global.yml', key='path')['tables'] try: df = load_dataframe_csv(table_path, save_path) except: df = pd.DataFrame(columns=[ 'model', 'rank', 'alpha', 'lambda', 'iter', 'similarity', 'corruption', 'root', 'topK' ]) num_user = train.shape[0] for algorithm in params['models']: for rank in params['rank']: for alpha in params['alpha']: for lam in params['lambda']: for corruption in params['corruption']: for root in params['root']: if ((df['model'] == algorithm) & (df['rank'] == rank) & (df['alpha'] == alpha) & (df['lambda'] == lam) & (df['corruption'] == corruption) & (df['root'] == root)).any(): continue format = "model: {0}, rank: {1}, alpha: {2}, lambda: {3}, corruption: {4}, root: {5}" progress.section( format.format(algorithm, rank, alpha, lam, corruption, root)) RQ, Yt, Bias = params['models'][algorithm]( train, embeded_matrix=np.empty((0)), iteration=params['iter'], rank=rank, lam=lam, alpha=alpha, corruption=corruption, root=root, gpu_on=gpu_on) Y = Yt.T progress.subsection("Prediction") prediction = predict(matrix_U=RQ, matrix_V=Y, measure=measure, bias=Bias, topK=params['topK'][-1], matrix_Train=train, gpu=gpu_on) progress.subsection("Evaluation") result = evaluate(prediction, validation, params['metric'], params['topK']) result_dict = { 'model': algorithm, 'rank': rank, 'alpha': alpha, 'lambda': lam, 'iter': params['iter'], 'similarity': params['similarity'], 'corruption': corruption, 'root': root } for name in result.keys(): result_dict[name] = [ round(result[name][0], 4), round(result[name][1], 4) ] df = df.append(result_dict, ignore_index=True) save_dataframe_csv(df, table_path, save_path)
def general(train, test, params, model, measure='Cosine', gpu_on=True, analytical=False, model_folder='latent'): progress = WorkSplitter() columns = [ 'model', 'similarity', 'alpha', 'batch_size', 'corruption', 'epoch', 'iteration', 'key_dimension', 'lambda', 'learning_rate', 'mode_dimension', 'normalize', 'rank', 'root', 'topK' ] progress.section("\n".join( [":".join((str(k), str(params[k]))) for k in columns])) df = pd.DataFrame(columns=columns) if os.path.isfile('{2}/U_{0}_{1}.npy'.format(params['model'], params['rank'], model_folder)): RQ = np.load('{2}/U_{0}_{1}.npy'.format(params['model'], params['rank'], model_folder)) Y = np.load('{2}/V_{0}_{1}.npy'.format(params['model'], params['rank'], model_folder)) if os.path.isfile('{2}/B_{0}_{1}.npy'.format(params['model'], params['rank'], model_folder)): Bias = np.load('{2}/B_{0}_{1}.npy'.format(params['model'], params['rank'], model_folder)) else: Bias = None else: RQ, Yt, Bias = model(train, embedded_matrix=np.empty((0)), mode_dim=params['mode_dimension'], key_dim=params['key_dimension'], batch_size=params['batch_size'], learning_rate=params['learning_rate'], iteration=params['iteration'], epoch=params['epoch'], rank=params['rank'], corruption=params['corruption'], gpu_on=gpu_on, lamb=params['lambda'], alpha=params['alpha'], root=params['root']) Y = Yt.T """ np.save('{2}/U_{0}_{1}'.format(params['model'], params['rank'], model_folder), RQ) np.save('{2}/V_{0}_{1}'.format(params['model'], params['rank'], model_folder), Y) if Bias is not None: np.save('{2}/B_{0}_{1}'.format(params['model'], params['rank'], model_folder), Bias) """ progress.subsection("Prediction") prediction = predict(matrix_U=RQ, matrix_V=Y, measure=measure, bias=Bias, topK=params['topK'][-1], matrix_Train=train, gpu=gpu_on) progress.subsection("Evaluation") result = evaluate(prediction, test, params['metric'], params['topK'], analytical=analytical) if analytical: return result else: result_dict = params for name in result.keys(): result_dict[name] = [ round(result[name][0], 4), round(result[name][1], 4) ] df = df.append(result_dict, ignore_index=True) return df
def main(args): # Progress bar progress = WorkSplitter() # Show hyper parameter settings progress.section("Parameter Setting") print("Data Path: {0}".format(args.path)) print("Train File Name: {0}".format(args.dataset + args.train)) print("Uniform Train File Name: {0}".format(args.dataset + args.unif_train)) print("Valid File Name: {0}".format(args.dataset + args.valid)) print("Algorithm: {0}".format(args.model)) print("Way: {0}".format(args.way)) print("Seed: {0}".format(args.seed)) print("Batch Size: {0}".format(args.batch_size)) print("Rank: {0}".format(args.rank)) print("Lambda: {0}".format(args.lamb)) print("Iteration: {0}".format(args.iter)) # Load Data progress.section("Loading Data") start_time = time.time() train = load_numpy(path=args.path, name=args.dataset + args.train) print("Elapsed: {0}".format(inhour(time.time() - start_time))) print("Train U-I Dimensions: {0}".format(train.shape)) # Train Model valid = load_numpy(path=args.path, name=args.dataset + args.valid) unif_train = load_numpy(path=args.path, name=args.dataset + args.unif_train) RQ, Y, uBias, iBias = models[args.model](train, valid, dataset=args.dataset, matrix_unif_train=unif_train, iteration=args.iter, rank=args.rank, gpu_on=args.gpu, lam=args.lamb, lam2=args.lamb2, seed=args.seed, batch_size=args.batch_size, way=args.way, confidence=args.confidence, step=args.step) save_path = 'latent/' + args.dataset if not os.path.exists(save_path): os.makedirs(save_path) if args.way is None: np.save(save_path + '/U_{0}_{1}'.format(args.model, args.rank), RQ) np.save(save_path + '/V_{0}_{1}'.format(args.model, args.rank), Y) if uBias is not None: np.save(save_path + '/uB_{0}_{1}'.format(args.model, args.rank), uBias) np.save(save_path + '/iB_{0}_{1}'.format(args.model, args.rank), iBias) else: np.save(save_path + '/' + args.way + '_U_{0}_{1}'.format(args.model, args.rank), RQ) np.save(save_path + '/' + args.way + '_V_{0}_{1}'.format(args.model, args.rank), Y) if uBias is not None: np.save(save_path + '/' + args.way + '_uB_{0}_{1}'.format(args.model, args.rank), uBias) np.save(save_path + '/' + args.way + '_iB_{0}_{1}'.format(args.model, args.rank), iBias) progress.section("Predict") prediction = predict(matrix_U=RQ, matrix_V=Y, matrix_Valid=valid, ubias=uBias, ibias=iBias, gpu=args.gpu) progress.section("Evaluation") start_time = time.time() metric_names = ['NLL', 'AUC'] result = evaluate(prediction, valid, metric_names, gpu=args.gpu) print("----Final Result----") for metric in result.keys(): print("{0}:{1}".format(metric, result[metric])) print("Elapsed: {0}".format(inhour(time.time() - start_time)))
def hyper_parameter_tuning(train, validation, params, measure='Cosine', gpu_on=True): progress = WorkSplitter() df = pd.DataFrame(columns=['model', 'rank', 'alpha', 'root', 'topK']) num_user = train.shape[0] for algorithm in params['models']: for rank in params['rank']: if 'alpha' in inspect.getargspec(params['models'][algorithm])[0]: alphas = params['alpha'] else: alphas = [1] for alpha in alphas: if 'root' in inspect.getargspec( params['models'][algorithm])[0]: roots = params['root'] else: roots = [1] for root in roots: progress.section( "model: {0}, rank: {1}, root: {2}, alpha: {3}".format( algorithm, rank, root, alpha)) RQ, Yt, Bias = params['models'][algorithm]( train, embeded_matrix=np.empty((0)), iteration=params['iter'], rank=rank, lam=params['lam'], root=root, alpha=alpha, gpu_on=True) Y = Yt.T progress.subsection("Prediction") prediction = predict(matrix_U=RQ, matrix_V=Y, measure=measure, bias=Bias, topK=params['topK'][-1], matrix_Train=train, gpu=gpu_on) progress.subsection("Evaluation") result = evaluate(prediction, validation, params['metric'], params['topK']) result_dict = { 'model': algorithm, 'rank': rank, 'root': root, 'alpha': alpha } for name in result.keys(): result_dict[name] = [ round(result[name][0], 4), round(result[name][1], 4) ] df = df.append(result_dict, ignore_index=True) return df
def main(args): # Progress bar progress = WorkSplitter() # Show hyper parameter settings progress.section("Parameter Setting") print("Data Path: {0}".format(args.data_dir)) print("Train File Name: {0}".format(args.train_set)) if args.validation: print("Valid File Name: {0}".format(args.valid_set)) print("Algorithm: {0}".format(args.model)) if args.item == True: mode = "Item-based" else: mode = "User-based" print("Normalize: {0}".format(args.normalize)) print("Mode: {0}".format(mode)) print("Alpha: {0}".format(args.alpha)) print("Rank: {0}".format(args.rank)) print("Mode Dimension: {0}".format(args.mode_dim)) print("Key Dimension: {0}".format(args.key_dim)) print("Batch Size: {0}".format(args.batch_size)) print("Optimizer: {0}".format(args.optimizer)) print("Learning Rate: {0}".format(args.learning_rate)) print("Lambda: {0}".format(args.lamb)) print("SVD/Alter Iteration: {0}".format(args.iteration)) print("Epoch: {0}".format(args.epoch)) print("Corruption: {0}".format(args.corruption)) print("Root: {0}".format(args.root)) print("Evaluation Ranking Topk: {0}".format(args.topk)) # Load Data progress.section("Loading Data") start_time = time.time() if args.shape is None: R_train = load_numpy(path=args.data_dir, name=args.train_set) else: # R_train = load_pandas(path=args.data_dir, name=args.train_set, shape=args.shape) R_train = load_csv(path=args.data_dir, name=args.train_set, shape=args.shape) print("Elapsed: {0}".format(inhour(time.time() - start_time))) print("Train U-I Dimensions: {0}".format(R_train.shape)) # Item-Item or User-User if args.item == True: RQ, Yt, Bias = models[args.model](R_train, embedded_matrix=np.empty((0)), mode_dim=args.mode_dim, key_dim=args.key_dim, batch_size=args.batch_size, optimizer=args.optimizer, learning_rate=args.learning_rate, normalize=args.normalize, iteration=args.iteration, epoch=args.epoch, rank=args.rank, corruption=args.corruption, gpu_on=args.gpu, lamb=args.lamb, alpha=args.alpha, seed=args.seed, root=args.root) Y = Yt.T else: Y, RQt, Bias = models[args.model](R_train.T, embedded_matrix=np.empty((0)), mode_dim=args.mode_dim, key_dim=args.key_dim, batch_size=args.batch_size, optimizer=args.optimizer, learning_rate=args.learning_rate, normalize=args.normalize, iteration=args.iteration, rank=args.rank, corruption=args.corruption, gpu_on=args.gpu, lamb=args.lamb, alpha=args.alpha, seed=args.seed, root=args.root) RQ = RQt.T # np.save('latent/U_{0}_{1}'.format(args.model, args.rank), RQ) # np.save('latent/V_{0}_{1}'.format(args.model, args.rank), Y) # if Bias is not None: # np.save('latent/B_{0}_{1}'.format(args.model, args.rank), Bias) progress.section("Predict") prediction = predict(matrix_U=RQ, matrix_V=Y, bias=Bias, topK=args.topk, matrix_Train=R_train, measure=args.sim_measure, gpu=args.gpu) if args.validation: progress.section("Create Metrics") start_time = time.time() metric_names = ['R-Precision', 'NDCG', 'Clicks', 'Recall', 'Precision'] R_valid = load_numpy(path=args.data_dir, name=args.valid_set) result = evaluate(prediction, R_valid, metric_names, [args.topk]) print("-") for metric in result.keys(): print("{0}:{1}".format(metric, result[metric])) print("Elapsed: {0}".format(inhour(time.time() - start_time)))
def hyper_parameter_tuning(train, validation, params, unif_train, save_path, seed, way, dataset, gpu_on): progress = WorkSplitter() table_path = 'tables/' data_name = save_path.split('/')[0] save_dir = 'tables/' + data_name + '/' if not os.path.exists(save_dir): os.makedirs(save_dir) for algorithm in params['models']: if algorithm in ['AutoRec']: df = pd.DataFrame( columns=['model', 'rank', 'batch_size', 'lambda', 'iter']) for rank in params['rank']: for batch_size in params['batch_size']: for lam in params['lambda']: format = "model: {0}, rank: {1}, batch_size: {2}, lambda: {3}" progress.section( format.format(algorithm, rank, batch_size, lam)) RQ, X, xBias, Y, yBias = params['models'][algorithm]( train, validation, matrix_unif_train=unif_train, iteration=params['iter'], rank=rank, gpu_on=gpu_on, lam=lam, seed=seed, batch_size=batch_size, way=way, dataset=dataset) progress.subsection("Prediction") prediction = predict(matrix_U=RQ, matrix_V=Y.T, matrix_Valid=validation, bias=yBias, gpu=gpu_on) progress.subsection("Evaluation") result = evaluate(prediction, validation, params['metric'], gpu=gpu_on) result_dict = { 'model': algorithm, 'rank': rank, 'batch_size': batch_size, 'lambda': lam, 'iter': params['iter'] } for name in result.keys(): result_dict[name] = round(result[name][0], 8) df = df.append(result_dict, ignore_index=True) save_dataframe_csv(df, table_path, save_path) elif algorithm in ['InitFeatureEmbedAE', 'ConcatFeatureEmbedAE']: df = pd.DataFrame( columns=['model', 'batch_size', 'lambda', 'iter']) for batch_size in params['batch_size']: for lam in params['lambda']: format = "model: {0}, batch_size: {1}, lambda: {2}" progress.section(format.format(algorithm, batch_size, lam)) RQ, X, xBias, Y, yBias = params['models'][algorithm]( train, validation, matrix_unif_train=unif_train, iteration=params['iter'], rank=params['rank'], gpu_on=gpu_on, lam=lam, seed=seed, batch_size=batch_size, way=way, dataset=dataset) progress.subsection("Prediction") prediction = predict(matrix_U=RQ, matrix_V=Y.T, matrix_Valid=validation, bias=yBias, gpu=gpu_on) progress.subsection("Evaluation") result = evaluate(prediction, validation, params['metric'], gpu=gpu_on) result_dict = { 'model': algorithm, 'batch_size': batch_size, 'lambda': lam, 'iter': params['iter'] } for name in result.keys(): result_dict[name] = round(result[name][0], 8) df = df.append(result_dict, ignore_index=True) save_dataframe_csv(df, table_path, save_path) elif algorithm in ['UnionSampleAE', 'RefineLabelAE']: df = pd.DataFrame(columns=['model', 'confidence', 'iter']) for conf in params['confidence']: format = "model: {0}, confidence: {1}" progress.section(format.format(algorithm, conf)) RQ, X, xBias, Y, yBias = params['models'][algorithm]( train, validation, matrix_unif_train=unif_train, iteration=params['iter'], rank=params['rank'], gpu_on=gpu_on, lam=params['lambda'], seed=seed, batch_size=params['batch_size'], way=way, confidence=conf, dataset=dataset) progress.subsection("Prediction") prediction = predict(matrix_U=RQ, matrix_V=Y.T, matrix_Valid=validation, bias=yBias, gpu=gpu_on) progress.subsection("Evaluation") result = evaluate(prediction, validation, params['metric'], gpu=gpu_on) result_dict = { 'model': algorithm, 'confidence': conf, 'iter': params['iter'] } for name in result.keys(): result_dict[name] = round(result[name][0], 8) df = df.append(result_dict, ignore_index=True) save_dataframe_csv(df, table_path, save_path) elif algorithm in ['BatchSampleAE']: df = pd.DataFrame(columns=['model', 'step', 'iter']) for step in params['step']: format = "model: {0}, step: {1}" progress.section(format.format(algorithm, step)) RQ, X, xBias, Y, yBias = params['models'][algorithm]( train, validation, matrix_unif_train=unif_train, iteration=params['iter'], rank=params['rank'], gpu_on=gpu_on, lam=params['lambda'], seed=seed, batch_size=params['batch_size'], way=way, step=step, dataset=dataset) progress.subsection("Prediction") prediction = predict(matrix_U=RQ, matrix_V=Y.T, matrix_Valid=validation, bias=yBias, gpu=gpu_on) progress.subsection("Evaluation") result = evaluate(prediction, validation, params['metric'], gpu=gpu_on) result_dict = { 'model': algorithm, 'step': step, 'iter': params['iter'] } for name in result.keys(): result_dict[name] = round(result[name][0], 8) df = df.append(result_dict, ignore_index=True) save_dataframe_csv(df, table_path, save_path) elif algorithm in ['BridgeLabelAE']: df = pd.DataFrame(columns=['model', 'lambda', 'lambda2', 'iter']) for lam in params['lambda']: for lam2 in params['lambda2']: format = "model: {0}, lambda: {1}, lambda2: {2}" progress.section(format.format(algorithm, lam, lam2)) RQ, X, xBias, Y, yBias = params['models'][algorithm]( train, validation, matrix_unif_train=unif_train, iteration=params['iter'], rank=params['rank'], gpu_on=gpu_on, lam=lam, lam2=lam2, seed=seed, batch_size=params['batch_size'], way=way, dataset=dataset) progress.subsection("Prediction") prediction = predict(matrix_U=RQ, matrix_V=Y.T, matrix_Valid=validation, bias=yBias, gpu=gpu_on) progress.subsection("Evaluation") result = evaluate(prediction, validation, params['metric'], gpu=gpu_on) result_dict = { 'model': algorithm, 'lambda': lam, 'lambda2': lam2, 'iter': params['iter'] } for name in result.keys(): result_dict[name] = round(result[name][0], 8) df = df.append(result_dict, ignore_index=True) save_dataframe_csv(df, table_path, save_path) elif algorithm in ['SoftLabelAE']: df = pd.DataFrame(columns=['model', 'confidence', 'tau', 'iter']) for conf in params['confidence']: for tau in params['tau']: format = "model: {0}, confidence: {1}, tau: {2}" progress.section(format.format(algorithm, conf, tau)) RQ, X, xBias, Y, yBias, Z, zBias, K, kBias = params[ 'models'][algorithm](train, validation, matrix_unif_train=unif_train, iteration=params['iter'], rank=params['rank'], rank2=params['rank2'], gpu_on=gpu_on, lam=params['lambda'], seed=seed, batch_size=params['batch_size'], confidence=conf, tau=tau, dataset=dataset) progress.subsection("Prediction") prediction = predict(matrix_U=RQ, matrix_V=K.T, matrix_Valid=validation, bias=yBias, gpu=gpu_on) progress.subsection("Evaluation") result = evaluate(prediction, validation, params['metric'], gpu=gpu_on) result_dict = { 'model': algorithm, 'confidence': conf, 'tau': tau, 'iter': params['iter'] } for name in result.keys(): result_dict[name] = round(result[name][0], 8) df = df.append(result_dict, ignore_index=True) save_dataframe_csv(df, table_path, save_path) elif algorithm in ['HintAE']: df = pd.DataFrame(columns=['model', 'confidence', 'iter']) for conf in params['confidence']: format = "model: {0}, confidence: {1}" progress.section(format.format(algorithm, conf)) RQ, X, xBias, Y, yBias, Z, zBias, K, kBias = params['models'][ algorithm](train, validation, matrix_unif_train=unif_train, iteration=params['iter'], rank=params['rank'], rank2=params['rank2'], gpu_on=gpu_on, lam=params['lambda'], seed=seed, batch_size=params['batch_size'], confidence=conf, dataset=dataset) progress.subsection("Prediction") prediction = predict(matrix_U=RQ, matrix_V=K.T, matrix_Valid=validation, bias=yBias, gpu=gpu_on) progress.subsection("Evaluation") result = evaluate(prediction, validation, params['metric'], gpu=gpu_on) result_dict = { 'model': algorithm, 'confidence': conf, 'iter': params['iter'] } for name in result.keys(): result_dict[name] = round(result[name][0], 8) df = df.append(result_dict, ignore_index=True) save_dataframe_csv(df, table_path, save_path)