def main(args): table_path = load_yaml('config/global.yml', key='path')['tables'] df = find_best_hyperparameters(table_path + args.problem, 'NDCG') R_train = load_numpy(path=args.path, name=args.train) R_valid = load_numpy(path=args.path, name=args.valid) R_test = load_numpy(path=args.path, name=args.test) R_train = R_train + R_valid topK = [5, 10, 15, 20, 50] frame = [] for idx, row in df.iterrows(): start = timeit.default_timer() row = row.to_dict() row['metric'] = ['R-Precision', 'NDCG', 'Precision', 'Recall', "MAP"] row['topK'] = topK result = execute(R_train, R_test, row, models[row['model']], gpu_on=args.gpu) stop = timeit.default_timer() print('Time: ', stop - start) frame.append(result) results = pd.concat(frame) save_dataframe_csv(results, table_path, args.name)
def main(args): R_train = load_numpy(path=args.data_dir, name=args.train_set) R_train_keyphrase = load_numpy(path=args.data_dir, name=args.train_keyphrase_set).toarray() R_train_keyphrase[R_train_keyphrase != 0] = 1 R_train_item_keyphrase = load_numpy(path=args.data_dir, name=args.train_item_keyphrase_set).T.toarray() num_items, num_keyphrases = R_train_item_keyphrase.shape for item in range(num_items): item_keyphrase = R_train_item_keyphrase[item] nonzero_keyphrases_index = item_keyphrase.nonzero()[0] nonzero_keyphrases_frequency = item_keyphrase[nonzero_keyphrases_index] candidate_index = nonzero_keyphrases_index[np.argsort(-nonzero_keyphrases_frequency)[:10]] binarized_keyphrase = np.zeros(num_keyphrases) binarized_keyphrase[candidate_index] = 1 R_train_item_keyphrase[item] = binarized_keyphrase R_train_item_keyphrase = sparse.csr_matrix(R_train_item_keyphrase).T params = dict() # params['model_saved_path'] = args.model_saved_path critiquing(R_train, R_train_keyphrase, R_train_item_keyphrase, params, args.num_users_sampled, load_path=args.load_path, save_path=args.save_path, critiquing_function=args.critiquing_function)
def main(args): table_path = load_yaml('config/global.yml', key='path')['tables'] df = find_best_hyperparameters(table_path+args.tuning_result_path, 'MAP@10') R_train = load_numpy(path=args.path, name=args.train) R_valid = load_numpy(path=args.path, name=args.valid) R_test = load_numpy(path=args.path, name=args.test) R_train = R_train + R_valid # R_train[(R_train <= 3).nonzero()] = 0 # R_test[(R_test <= 3).nonzero()] = 0 # R_train[(R_train > 3).nonzero()] = 1 # R_test[(R_test > 3).nonzero()] = 1 # import ipdb; ipdb.set_trace() topK = [5, 10, 15, 20, 50] frame = [] for idx, row in df.iterrows(): start = timeit.default_timer() row = row.to_dict() row['metric'] = ['R-Precision', 'NDCG', 'Precision', 'Recall', "MAP"] row['topK'] = topK result = execute(R_train, R_test, row, models[row['model']]) stop = timeit.default_timer() print('Time: ', stop - start) frame.append(result) results = pd.concat(frame) save_dataframe_csv(results, table_path, args.name)
def main(args): table_path = load_yaml('config/global.yml', key='path')['tables'] df = find_best_hyperparameters(table_path + args.tuning_result_path, 'NDCG') R_train = load_numpy(path=args.data_dir, name=args.train_set) R_valid = load_numpy(path=args.data_dir, name=args.valid_set) R_test = load_numpy(path=args.data_dir, name=args.test_set) R_train = R_train + R_valid topK = [5, 10, 15, 20, 50] frame = [] for idx, row in df.iterrows(): start = timeit.default_timer() row = row.to_dict() row['metric'] = ['R-Precision', 'NDCG', 'Precision', 'Recall', "MAP"] row['topK'] = topK result = general(R_train, R_test, row, models[row['model']], measure=row['similarity'], gpu_on=args.gpu, model_folder=args.model_folder) stop = timeit.default_timer() print('Time: ', stop - start) frame.append(result) results = pd.concat(frame) save_dataframe_csv(results, table_path, args.save_path)
def main(args): settings_df = load_dataframe_csv(args.tab_path + args.setting_dir) R_train = load_numpy(path=args.data_dir, name=args.train_set) R_valid = load_numpy(path=args.data_dir, name=args.valid_set) R_test = load_numpy(path=args.data_dir, name=args.test_set) index_map = np.load(args.data_dir + args.index) item_names = None try: item_names = load_dataframe_csv(args.data_dir + args.names, delimiter="::", names=['ItemID', 'Name', 'Category']) except: print("Meta-data does not exist") attention(R_train, R_valid, R_test, index_map, item_names, args.tex_path, args.fig_path, settings_df, args.template_path, preference_analysis=args.preference_analysis, case_study=args.case_study, gpu_on=True)
def main(args): # Progress bar progress = WorkSplitter() # Show hyperparameter settings progress.section("Parameter Setting") print("Data Directory: {}".format(args.data_dir)) print("Number of Users Sampled: {}".format(args.num_users_sampled)) print("Number of Items Sampled: {}".format(args.num_items_sampled)) print("Number of Max Allowed Iterations: {}".format( args.max_iteration_threshold)) print("Critiquing Model: {}".format(args.critiquing_model_name)) R_train = load_numpy(path=args.data_dir, name=args.train_set) print("Train U-I Dimensions: {}".format(R_train.shape)) R_test = load_numpy(path=args.data_dir, name=args.test_set) print("Test U-I Dimensions: {}".format(R_test.shape)) R_train_keyphrase = load_numpy(path=args.data_dir, name=args.train_keyphrase_set).toarray() print("Train Item Keyphrase U-I Dimensions: {}".format( R_train_keyphrase.shape)) R_train_item_keyphrase = load_numpy( path=args.data_dir, name=args.train_item_keyphrase_set).toarray() table_path = load_yaml('config/global.yml', key='path')['tables'] parameters = find_best_hyperparameters(table_path + args.dataset_name, 'NDCG') parameters_row = parameters.loc[parameters['model'] == args.model] if args.dataset_name == "yelp/": R_train_item_keyphrase = R_train_item_keyphrase.T start_time = time.time() results = critiquing( matrix_Train=R_train, matrix_Test=R_test, keyphrase_freq=R_train_keyphrase, item_keyphrase_freq=R_train_item_keyphrase, num_users_sampled=args.num_users_sampled, num_items_sampled=args.num_items_sampled, max_iteration_threshold=args.max_iteration_threshold, dataset_name=args.dataset_name, model=models[args.model], parameters_row=parameters_row, critiquing_model_name=args.critiquing_model_name, keyphrase_selection_method=args.keyphrase_selection_method, topk=args.topk, lamb=args.lamb) print("Final Time Elapsed: {}".format(inhour(time.time() - start_time))) table_path = load_yaml('config/global.yml', key='path')['tables'] save_dataframe_csv(results, table_path, args.save_path)
def main(args): progress = WorkSplitter() progress.section("Tune Parameters") params = load_yaml(args.grid) params['models'] = {params['models']: models[params['models']]} train = load_numpy(path=args.path, name=args.dataset + args.train) unif_train = load_numpy(path=args.path, name=args.dataset + args.unif_train) valid = load_numpy(path=args.path, name=args.dataset + args.valid) hyper_parameter_tuning(train, valid, params, unif_train=unif_train, save_path=args.dataset + args.name, gpu_on=args.gpu, seed=args.seed, way=args.way, dataset=args.dataset)
def main(args): params = load_yaml(args.grid) params['models'] = {params['models']: models[params['models']]} R_train = load_numpy(path=args.path, name=args.train) R_valid = load_numpy(path=args.path, name=args.valid) hyper_parameter_tuning(R_train, R_valid, params, save_path=args.name, measure=params['similarity'], gpu_on=args.gpu)
def main(args): table_path = load_yaml('config/global.yml', key='path')['tables'] df = find_best_hyperparameters(table_path+args.param, 'NDCG') R_train = load_numpy(path=args.path, name=args.train) R_valid = load_numpy(path=args.path, name=args.valid) results = converge(R_train, R_valid, df, table_path, args.name, epochs=500, gpu_on=args.gpu) show_training_progress(results, hue='model', metric='NDCG', name="epoch_vs_ndcg")
def main(args): # Progress bar progress = WorkSplitter() # Show hyperparameter settings progress.section("Parameter Setting") print("Data Directory: {}".format(args.data_dir)) print("Number of Users Sampled: {}".format(args.num_users_sampled)) print("Number of Items Sampled: {}".format(args.num_items_sampled)) print("Number of Max Allowed Iterations: {}".format(args.max_iteration_threshold)) print("Critiquing Model: {}".format(args.critiquing_model_name)) R_train = load_numpy(path=args.data_dir, name=args.train_set) print("Train U-I Dimensions: {}".format(R_train.shape)) R_test = load_numpy(path=args.data_dir, name=args.test_set) print("Test U-I Dimensions: {}".format(R_test.shape)) R_train_keyphrase = load_numpy(path=args.data_dir, name=args.train_keyphrase_set).toarray() print("Train Item Keyphrase U-I Dimensions: {}".format(R_train_keyphrase.shape)) R_train_item_keyphrase = load_numpy(path=args.data_dir, name=args.train_item_keyphrase_set).toarray() table_path = load_yaml('config/global.yml', key='path')['tables'] # parameters = find_best_hyperparameters(table_path+args.dataset_name, 'NDCG') # parameters_row = parameters.loc[parameters['model'] == args.model] parameters_row = { 'iter' : 10, 'lambda' : 200, 'rank' : 200 } keyphrases_names = load_dataframe_csv(path = args.data_dir, name = "Keyphrases.csv")['Phrases'].tolist() results = critiquing(matrix_Train=R_train, matrix_Test=R_test, keyphrase_freq=R_train_keyphrase, item_keyphrase_freq=R_train_item_keyphrase.T, num_users_sampled=args.num_users_sampled, num_items_sampled=args.num_items_sampled, max_iteration_threshold=args.max_iteration_threshold, dataset_name=args.dataset_name, model=models[args.model], parameters_row=parameters_row, critiquing_model_name=args.critiquing_model_name, lamb = args.lambdas, keyphrases_names = keyphrases_names, keyphrase_selection_method = args.keyphrase_selection_method) table_path = load_yaml('config/global.yml', key='path')['tables'] save_dataframe_csv(results, table_path, args.save_path)
def main(args): # Progress bar progress = WorkSplitter() # Show hyper parameter settings progress.section("Parameter Setting") print("Data Path: {}".format(args.path)) print("Train File Name: {}".format(args.train)) if args.validation: print("Valid File Name: {}".format(args.valid)) print("Algorithm: {}".format(args.model)) print("Lambda Diversity: {}".format(args.lambda_diversity)) print("Lambda Serendipity: {}".format(args.lambda_serendipity)) print("Nearest Neighbor Number: {}".format(args.k)) print("Evaluation Ranking Topk: {}".format(args.topk)) # Load Data progress.section("Loading Data") start_time = time.time() R_train = load_numpy(path=args.path, name=args.train) print("Elapsed: {}".format(inhour(time.time() - start_time))) print("Train U-I Dimensions: {}".format(R_train.shape)) progress.section("Train") model = models[args.model]() model.train(R_train) progress.section("Predict") prediction_score = model.predict( R_train, k=args.k, lambda_diversity=args.lambda_diversity, lambda_serendipity=args.lambda_serendipity) prediction = predict(prediction_score=prediction_score, topK=args.topk, matrix_Train=R_train) if args.validation: progress.section("Create Metrics") start_time = time.time() metric_names = [ 'R-Precision', 'NDCG', 'Clicks', 'Recall', 'Precision', 'MAP' ] R_valid = load_numpy(path=args.path, name=args.valid) result = evaluate(prediction, R_valid, metric_names, [args.topk]) print("-") for metric in result.keys(): print("{}:{}".format(metric, result[metric])) print("Elapsed: {}".format(inhour(time.time() - start_time)))
def main(args): # Progress bar progress = WorkSplitter() # Show hyperparameter settings progress.section("Parameter Setting") print("Data Directory: {}".format(args.data_dir)) print("Number of Users Sampled: {}".format(args.num_users_sampled)) print("Number of Items Sampled: {}".format(args.num_items_sampled)) print("Number of Max Allowed Iterations: {}".format( args.max_iteration_threshold)) print("Critiquing Model: {}".format(args.critiquing_model_name)) R_train = load_numpy(path=args.data_dir, name=args.train_set) print("Train U-I Dimensions: {}".format(R_train.shape)) R_test = load_numpy(path=args.data_dir, name=args.test_set) print("Test U-I Dimensions: {}".format(R_test.shape)) R_train_keyphrase = load_numpy(path=args.data_dir, name=args.train_keyphrase_set).toarray() print("Train User Keyphrase U-I Dimensions: {}".format( R_train_keyphrase.shape)) R_train_item_keyphrase = load_numpy( path=args.data_dir, name=args.train_item_keyphrase_set).toarray() print("Train Item Keyphrase U-I Dimensions: {}".format( R_train_item_keyphrase.shape)) # table_path = load_yaml('config/global.yml', key='path')['tables'] # parameters = find_best_hyperparameters(table_path+args.dataset_name, 'NDCG') # parameters_row = parameters.loc[parameters['model'] == args.model] parameters_row = pd.DataFrame({'iter': [4], 'lambda': [80], 'rank': [200]}) results = critiquing(matrix_Train=R_train, matrix_Test=R_test, keyphrase_freq=R_train_keyphrase, item_keyphrase_freq=R_train_item_keyphrase, num_users_sampled=args.num_users_sampled, num_items_sampled=args.num_items_sampled, max_iteration_threshold=args.max_iteration_threshold, dataset_name=args.dataset_name, model=models[args.model], parameters_row=parameters_row, critiquing_model_name=args.critiquing_model_name) # table_path = load_yaml('config/global.yml', key='path')['tables'] table_path = '/home/shuyang/data4/LatentLinearCritiquingforConvRecSys/' save_dataframe_csv(results, table_path, args.save_path)
def main(args): progress = WorkSplitter() table_path = 'tables/' test = load_numpy(path=args.path, name=args.dataset + args.test) df = pd.DataFrame({ 'model': [ 'RestrictedBatchSampleMF', 'RestrictedBatchSampleMF', 'RestrictedBatchSampleMF', 'RestrictedBatchSampleMF', 'RestrictedBatchSampleMF' ], 'way': [None, 'head_users', 'tail_users', 'head_items', 'tail_items'] }) progress.subsection("Gain Analysis") frame = [] for idx, row in df.iterrows(): row = row.to_dict() row['metric'] = ['NLL', 'AUC'] row['rank'] = 10 result = execute(test, row, folder=args.model_folder + args.dataset) frame.append(result) results = pd.concat(frame) save_dataframe_csv(results, table_path, args.name)
def main(args): progress = WorkSplitter() table_path = 'tables/' test = load_numpy(path=args.path, name=args.dataset + args.test) df = pd.DataFrame({ 'model': [ "BiasedMF", "BiasedMF", "BiasedMF", "PropensityMF", "InitFeatureEmbedMF", "InitFeatureEmbedMF", "InitFeatureEmbedMF", "AlterFeatureEmbedMF", "ConcatFeatureEmbedMF", "CausalSampleMF", "UnionSampleMF", "WRSampleMF", "BatchSampleMF", "BridgeLabelMF", "RefineLabelMF" ], 'way': [ None, "unif", "combine", None, "user", "item", "both", None, None, None, None, None, None, None, None ] }) progress.subsection("Reproduce") frame = [] for idx, row in df.iterrows(): row = row.to_dict() row['metric'] = ['NLL', 'AUC'] row['rank'] = 10 result = execute(test, row, folder=args.model_folder + args.dataset) frame.append(result) results = pd.concat(frame) save_dataframe_csv(results, table_path, args.name)
def main(args): progress = WorkSplitter() table_path = 'tables/' test = load_numpy(path=args.path, name=args.dataset + args.test) df = pd.DataFrame({ 'model': [ 'AutoRec', 'AutoRec', 'AutoRec', 'InitFeatureEmbedAE', 'InitFeatureEmbedAE', 'InitFeatureEmbedAE', 'AlterFeatureEmbedAE', 'ConcatFeatureEmbedAE', 'UnionSampleAE', 'WRSampleAE', 'BatchSampleAE', 'BridgeLabelAE', 'RefineLabelAE', 'DeepAutoRec', 'DeepAutoRec', 'SoftLabelAE', 'HintAE' ], 'way': [ None, 'unif', 'combine', 'user', 'item', 'both', None, None, None, None, None, None, None, None, 'unif', None, None ] }) progress.subsection("Reproduce") frame = [] for idx, row in df.iterrows(): row = row.to_dict() row['metric'] = ['NLL', 'AUC'] row['rank'] = 200 result = execute(test, row, folder=args.model_folder + args.dataset) frame.append(result) results = pd.concat(frame) save_dataframe_csv(results, table_path, args.name)
def main(args): table_path = load_yaml('config/global.yml', key='path')['tables'] df = find_best_hyperparameters(table_path + args.problem, 'NDCG') R_train = load_numpy(path=args.path, name=args.train) R_valid = load_numpy(path=args.path, name=args.valid) R_test = load_numpy(path=args.path, name=args.test) R_train = R_train + R_valid topK = [5, 10, 15, 20, 25, 30, 35, 40, 45, 50] metric = ['R-Precision', 'NDCG', 'Clicks', 'Recall', 'Precision', 'MAP'] usercategory(R_train, R_test, df, topK, metric, args.problem, args.model_folder, gpu_on=args.gpu)
def main(args): table_path = load_yaml('config/global.yml', key='path')['tables'] df = find_best_hyperparameters(table_path + args.problem, 'NDCG') R_train = load_numpy(path=args.path, name=args.train) R_valid = load_numpy(path=args.path, name=args.valid) R_test = load_numpy(path=args.path, name=args.test) R_train = R_train + R_valid topK = [1, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50] personalization(R_train, R_test, df, topK, args.problem, args.model_folder, gpu_on=args.gpu)
def main(args): params = dict() params['tuning_result_path'] = args.tuning_result_path R_train = load_numpy(path=args.data_dir, name=args.train_set) R_test = load_numpy(path=args.data_dir, name=args.test_set) R_train_keyphrase = load_numpy(path=args.data_dir, name=args.train_keyphrase_set) R_test_keyphrase = load_numpy(path=args.data_dir, name=args.test_keyphrase_set) R_train_keyphrase[R_train_keyphrase != 0] = 1 R_test_keyphrase[R_test_keyphrase != 0] = 1 general(R_train, R_test, R_train_keyphrase.todense(), R_test_keyphrase, params, save_path=args.save_path, final_explanation=args.final_explanation)
def main(args): params = load_yaml(args.parameters) params['models'] = {params['models']: models[params['models']]} R_train = load_numpy(path=args.data_dir, name=args.train_set) R_valid = load_numpy(path=args.data_dir, name=args.valid_set) R_train_keyphrase = load_numpy(path=args.data_dir, name=args.train_keyphrase_set) R_valid_keyphrase = load_numpy(path=args.data_dir, name=args.valid_keyphrase_set) R_train_keyphrase[R_train_keyphrase != 0] = 1 R_valid_keyphrase[R_valid_keyphrase != 0] = 1 hyper_parameter_tuning(R_train, R_valid, R_train_keyphrase.todense(), R_valid_keyphrase, params, save_path=args.save_path, tune_explanation=args.tune_explanation)
def cml_normalized(matrix_train, time_stamp_matrix=None, embeded_matrix=np.empty((0)), iteration=100, lam=80, rank=200, seed=1, **unused): progress = WorkSplitter() matrix_input = matrix_train from utils.io import load_numpy time_stamp_matrix = load_numpy(path='datax/', name='Rtime.npz') orders = get_orders(time_stamp_matrix.multiply(matrix_train)) if embeded_matrix.shape[0] > 0: matrix_input = vstack((matrix_input, embeded_matrix.T)) m, n = matrix_input.shape model = NormalizedCollaborativeMetricLearning(num_users=m, num_items=n, embed_dim=rank, cov_loss_weight=lam) model.train_model(matrix_input, orders, iteration) RQ = model.get_RQ() Y = model.get_Y().T tf.reset_default_graph() return RQ, Y, None
def main(args): # Progress bar progress = WorkSplitter() # Show hyper parameter settings progress.section("Parameter Setting") print("Data Path: {0}".format(args.data_dir)) print("Train File Name: {0}".format(args.train_set)) if args.validation: print("Valid File Name: {0}".format(args.valid_set)) print("Algorithm: {0}".format(args.model)) if args.item == True: mode = "Item-based" else: mode = "User-based" print("Normalize: {0}".format(args.normalize)) print("Mode: {0}".format(mode)) print("Alpha: {0}".format(args.alpha)) print("Rank: {0}".format(args.rank)) print("Mode Dimension: {0}".format(args.mode_dim)) print("Key Dimension: {0}".format(args.key_dim)) print("Batch Size: {0}".format(args.batch_size)) print("Optimizer: {0}".format(args.optimizer)) print("Learning Rate: {0}".format(args.learning_rate)) print("Lambda: {0}".format(args.lamb)) print("SVD/Alter Iteration: {0}".format(args.iteration)) print("Epoch: {0}".format(args.epoch)) print("Corruption: {0}".format(args.corruption)) print("Root: {0}".format(args.root)) print("Evaluation Ranking Topk: {0}".format(args.topk)) # Load Data progress.section("Loading Data") start_time = time.time() if args.shape is None: R_train = load_numpy(path=args.data_dir, name=args.train_set) else: # R_train = load_pandas(path=args.data_dir, name=args.train_set, shape=args.shape) R_train = load_csv(path=args.data_dir, name=args.train_set, shape=args.shape) print("Elapsed: {0}".format(inhour(time.time() - start_time))) print("Train U-I Dimensions: {0}".format(R_train.shape)) # Item-Item or User-User if args.item == True: RQ, Yt, Bias = models[args.model](R_train, embedded_matrix=np.empty((0)), mode_dim=args.mode_dim, key_dim=args.key_dim, batch_size=args.batch_size, optimizer=args.optimizer, learning_rate=args.learning_rate, normalize=args.normalize, iteration=args.iteration, epoch=args.epoch, rank=args.rank, corruption=args.corruption, gpu_on=args.gpu, lamb=args.lamb, alpha=args.alpha, seed=args.seed, root=args.root) Y = Yt.T else: Y, RQt, Bias = models[args.model](R_train.T, embedded_matrix=np.empty((0)), mode_dim=args.mode_dim, key_dim=args.key_dim, batch_size=args.batch_size, optimizer=args.optimizer, learning_rate=args.learning_rate, normalize=args.normalize, iteration=args.iteration, rank=args.rank, corruption=args.corruption, gpu_on=args.gpu, lamb=args.lamb, alpha=args.alpha, seed=args.seed, root=args.root) RQ = RQt.T # np.save('latent/U_{0}_{1}'.format(args.model, args.rank), RQ) # np.save('latent/V_{0}_{1}'.format(args.model, args.rank), Y) # if Bias is not None: # np.save('latent/B_{0}_{1}'.format(args.model, args.rank), Bias) progress.section("Predict") prediction = predict(matrix_U=RQ, matrix_V=Y, bias=Bias, topK=args.topk, matrix_Train=R_train, measure=args.sim_measure, gpu=args.gpu) if args.validation: progress.section("Create Metrics") start_time = time.time() metric_names = ['R-Precision', 'NDCG', 'Clicks', 'Recall', 'Precision'] R_valid = load_numpy(path=args.data_dir, name=args.valid_set) result = evaluate(prediction, R_valid, metric_names, [args.topk]) print("-") for metric in result.keys(): print("{0}:{1}".format(metric, result[metric])) print("Elapsed: {0}".format(inhour(time.time() - start_time)))
def main(args): params = load_yaml(args.parameters) params['models'] = {params['models']: models[params['models']]} R_train = load_numpy(path=args.path, name=args.train) R_valid = load_numpy(path=args.path, name=args.valid) hyper_parameter_tuning(R_train, R_valid, params, save_path=args.save_path)
def main(args): # Progress bar progress = WorkSplitter() # Show hyper parameter settings progress.section("Parameter Setting") print("Data Path: {0}".format(args.path)) print("Train File Name: {0}".format(args.train)) if args.validation: print("Valid File Name: {0}".format(args.valid)) print("Algorithm: {0}".format(args.model)) if args.item == True: mode = "Item-based" else: mode = "User-based" print("Mode: {0}".format(mode)) print("Alpha: {0}".format(args.alpha)) print("Rank: {0}".format(args.rank)) print("Lambda: {0}".format(args.lamb)) print("SVD/Alter Iteration: {0}".format(args.iter)) print("Evaluation Ranking Topk: {0}".format(args.topk)) # Load Data progress.section("Loading Data") start_time = time.time() if args.shape is None: R_train = load_numpy(path=args.path, name=args.train) else: # R_train = load_pandas(path=args.path, name=args.train, shape=args.shape) R_train = load_csv(path=args.path, name=args.train, shape=args.shape) print "Elapsed: {0}".format(inhour(time.time() - start_time)) print("Train U-I Dimensions: {0}".format(R_train.shape)) # Item-Item or User-User if args.item == True: RQ, Yt, Bias = models[args.model](R_train, embeded_matrix=np.empty((0)), iteration=args.iter, rank=args.rank, corruption=args.corruption, lam=args.lamb, alpha=args.alpha, seed=args.seed, root=args.root) Y = Yt.T else: Y, RQt, Bias = models[args.model](R_train.T, embeded_matrix=np.empty((0)), iteration=args.iter, rank=args.rank, corruption=args.corruption, lam=args.lamb, alpha=args.alpha, seed=args.seed, root=args.root) RQ = RQt.T # Save Files # progress.section("Save U-V Matrix") # start_time = time.time() # save_mxnet(matrix=RQ, path=args.path+mode+'/', # name='U_{0}_{1}_{2}'.format(args.rank, args.lamb, args.model)) # save_mxnet(matrix=Y, path=args.path+mode+'/', # name='V_{0}_{1}_{2}'.format(args.rank, args.lamb, args.model)) # print "Elapsed: {0}".format(inhour(time.time() - start_time)) np.save('latent/U_{0}_{1}'.format(args.model, args.rank), RQ) np.save('latent/V_{0}_{1}'.format(args.model, args.rank), Y) if Bias is not None: np.save('latent/B_{0}_{1}'.format(args.model, args.rank), Bias) progress.section("Predict") prediction = predict(matrix_U=RQ, matrix_V=Y, bias=Bias, topK=args.topk, matrix_Train=R_train, measure=args.sim_measure, gpu=True) if args.validation: progress.section("Create Metrics") start_time = time.time() metric_names = ['R-Precision', 'NDCG', 'Clicks', 'Recall', 'Precision'] R_valid = load_numpy(path=args.path, name=args.valid) result = evaluate(prediction, R_valid, metric_names, [args.topk]) print("-") for metric in result.keys(): print("{0}:{1}".format(metric, result[metric])) print "Elapsed: {0}".format(inhour(time.time() - start_time))
def main(args): # Progress bar progress = WorkSplitter() # Show hyperparameter settings progress.section("Parameter Setting") print("Data Directory: {}".format(args.data_dir)) print("Algorithm: {}".format(args.model)) print("Optimizer: {}".format(args.optimizer)) print("Corruption Rate: {}".format(args.corruption)) print("Learning Rate: {}".format(args.learning_rate)) print("Epoch: {}".format(args.epoch)) print("Lambda L2: {}".format(args.lamb_l2)) print("Lambda Keyphrase: {}".format(args.lamb_keyphrase)) print("Lambda Latent: {}".format(args.lamb_latent)) print("Lambda Rating: {}".format(args.lamb_rating)) print("Beta: {}".format(args.beta)) print("Rank: {}".format(args.rank)) print("Train Batch Size: {}".format(args.train_batch_size)) print("Predict Batch Size: {}".format(args.predict_batch_size)) print("Evaluation Ranking Topk: {}".format(args.topk)) print("Validation Enabled: {}".format(args.enable_validation)) # Load Data progress.section("Load Data") start_time = time.time() R_train = load_numpy(path=args.data_dir, name=args.train_set) print("Train U-I Dimensions: {}".format(R_train.shape)) R_train_keyphrase = load_numpy(path=args.data_dir, name=args.train_keyphrase_set).toarray() print("Train Keyphrase U-S Dimensions: {}".format(R_train_keyphrase.shape)) if args.enable_validation: R_valid = load_numpy(path=args.data_dir, name=args.valid_set) R_valid_keyphrase = load_numpy(path=args.data_dir, name=args.valid_keyphrase_set) else: R_valid = load_numpy(path=args.data_dir, name=args.test_set) R_valid_keyphrase = load_numpy(path=args.data_dir, name=args.test_keyphrase_set) print("Elapsed: {}".format(inhour(time.time() - start_time))) progress.section("Preprocess Keyphrase Frequency") start_time = time.time() R_train_keyphrase[R_train_keyphrase != 0] = 1 R_valid_keyphrase[R_valid_keyphrase != 0] = 1 print("Elapsed: {}".format(inhour(time.time() - start_time))) progress.section("Train") start_time = time.time() model = models[args.model](matrix_train=R_train, epoch=args.epoch, lamb_l2=args.lamb_l2, lamb_keyphrase=args.lamb_keyphrase, lamb_latent=args.lamb_latent, lamb_rating=args.lamb_rating, beta=args.beta, learning_rate=args.learning_rate, rank=args.rank, corruption=args.corruption, optimizer=args.optimizer, matrix_train_keyphrase=R_train_keyphrase) print("Elapsed: {}".format(inhour(time.time() - start_time))) progress.section("Predict") start_time = time.time() rating_score, keyphrase_score = model.predict(R_train.todense()) prediction = predict(rating_score, args.topk, matrix_Train=R_train) print("Elapsed: {}".format(inhour(time.time() - start_time))) if args.enable_evaluation: progress.section("Create Metrics") start_time = time.time() metric_names = [ 'R-Precision', 'NDCG', 'Clicks', 'Recall', 'Precision', 'MAP' ] result = evaluate(prediction, R_valid, metric_names, [args.topk]) print("-") for metric in result.keys(): print("{}:{}".format(metric, result[metric])) if keyphrase_score is not None: keyphrase_prediction = predict_keyphrase(keyphrase_score, args.topk) keyphrase_result = evaluate(keyphrase_prediction, sparse.csr_matrix(R_valid_keyphrase), metric_names, [args.topk]) print("-") for metric in keyphrase_result.keys(): print("{}:{}".format(metric, keyphrase_result[metric])) print("Elapsed: {}".format(inhour(time.time() - start_time))) model.sess.close() tf.reset_default_graph()
def main(args): # Progress bar progress = WorkSplitter() # Show hyper parameter settings progress.section("Parameter Setting") print("Data Path: {0}".format(args.path)) print("Train File Name: {0}".format(args.dataset + args.train)) print("Uniform Train File Name: {0}".format(args.dataset + args.unif_train)) print("Valid File Name: {0}".format(args.dataset + args.valid)) print("Algorithm: {0}".format(args.model)) print("Way: {0}".format(args.way)) print("Seed: {0}".format(args.seed)) print("Batch Size: {0}".format(args.batch_size)) print("Rank: {0}".format(args.rank)) print("Lambda: {0}".format(args.lamb)) print("Iteration: {0}".format(args.iter)) # Load Data progress.section("Loading Data") start_time = time.time() train = load_numpy(path=args.path, name=args.dataset + args.train) print("Elapsed: {0}".format(inhour(time.time() - start_time))) print("Train U-I Dimensions: {0}".format(train.shape)) # Train Model valid = load_numpy(path=args.path, name=args.dataset + args.valid) unif_train = load_numpy(path=args.path, name=args.dataset + args.unif_train) RQ, Y, uBias, iBias = models[args.model](train, valid, dataset=args.dataset, matrix_unif_train=unif_train, iteration=args.iter, rank=args.rank, gpu_on=args.gpu, lam=args.lamb, lam2=args.lamb2, seed=args.seed, batch_size=args.batch_size, way=args.way, confidence=args.confidence, step=args.step) save_path = 'latent/' + args.dataset if not os.path.exists(save_path): os.makedirs(save_path) if args.way is None: np.save(save_path + '/U_{0}_{1}'.format(args.model, args.rank), RQ) np.save(save_path + '/V_{0}_{1}'.format(args.model, args.rank), Y) if uBias is not None: np.save(save_path + '/uB_{0}_{1}'.format(args.model, args.rank), uBias) np.save(save_path + '/iB_{0}_{1}'.format(args.model, args.rank), iBias) else: np.save(save_path + '/' + args.way + '_U_{0}_{1}'.format(args.model, args.rank), RQ) np.save(save_path + '/' + args.way + '_V_{0}_{1}'.format(args.model, args.rank), Y) if uBias is not None: np.save(save_path + '/' + args.way + '_uB_{0}_{1}'.format(args.model, args.rank), uBias) np.save(save_path + '/' + args.way + '_iB_{0}_{1}'.format(args.model, args.rank), iBias) progress.section("Predict") prediction = predict(matrix_U=RQ, matrix_V=Y, matrix_Valid=valid, ubias=uBias, ibias=iBias, gpu=args.gpu) progress.section("Evaluation") start_time = time.time() metric_names = ['NLL', 'AUC'] result = evaluate(prediction, valid, metric_names, gpu=args.gpu) print("----Final Result----") for metric in result.keys(): print("{0}:{1}".format(metric, result[metric])) print("Elapsed: {0}".format(inhour(time.time() - start_time)))
def main(args): # Progress bar progress = WorkSplitter() # Show hyper parameter settings progress.section("Parameter Setting") print("Data Path: {0}".format(args.path)) print("Train File Name: {0}".format(args.dataset + args.train)) print("Uniform Train File Name: {0}".format(args.dataset + args.unif_train)) print("Valid File Name: {0}".format(args.dataset + args.valid)) print("Algorithm: {0}".format(args.model)) print("Way: {0}".format(args.way)) print("Seed: {0}".format(args.seed)) print("Batch Size: {0}".format(args.batch_size)) print("Rank: {0}".format(args.rank)) print("Lambda: {0}".format(args.lamb)) print("Iteration: {0}".format(args.iter)) # Load Data progress.section("Loading Data") start_time = time.time() train = load_numpy(path=args.path, name=args.dataset + args.train) print("Elapsed: {0}".format(inhour(time.time() - start_time))) print("Train U-I Dimensions: {0}".format(train.shape)) # Train Model valid = load_numpy(path=args.path, name=args.dataset + args.valid) unif_train = load_numpy(path=args.path, name=args.dataset + args.unif_train) if args.model in ['DeepAutoRec', 'HintAE', 'SoftLabelAE']: RQ, X, xBias, Y, yBias, Z, zBias, K, kBias = models[args.model]( train, valid, dataset=args.dataset, matrix_unif_train=unif_train, iteration=args.iter, rank=args.rank, rank2=args.rank2, gpu_on=args.gpu, lam=args.lamb, seed=args.seed, batch_size=args.batch_size, way=args.way, confidence=args.confidence, step=args.step, tau=args.tau) save_path = 'latent/' + args.dataset if not os.path.exists(save_path): os.makedirs(save_path) if args.way is None: np.save(save_path + '/U_{0}_{1}'.format(args.model, args.rank), RQ) np.save(save_path + '/Y_{0}_{1}'.format(args.model, args.rank), Y) np.save(save_path + '/X_{0}_{1}'.format(args.model, args.rank), X) np.save(save_path + '/Z_{0}_{1}'.format(args.model, args.rank), Z) np.save(save_path + '/K_{0}_{1}'.format(args.model, args.rank), K) if xBias is not None: np.save( save_path + '/xB_{0}_{1}'.format(args.model, args.rank), xBias) np.save( save_path + '/yB_{0}_{1}'.format(args.model, args.rank), yBias) np.save( save_path + '/zB_{0}_{1}'.format(args.model, args.rank), zBias) np.save( save_path + '/kB_{0}_{1}'.format(args.model, args.rank), kBias) else: np.save( save_path + '/' + args.way + '_U_{0}_{1}'.format(args.model, args.rank), RQ) np.save( save_path + '/' + args.way + '_Y_{0}_{1}'.format(args.model, args.rank), Y) np.save( save_path + '/' + args.way + '_X_{0}_{1}'.format(args.model, args.rank), X) np.save( save_path + '/' + args.way + '_Z_{0}_{1}'.format(args.model, args.rank), Z) np.save( save_path + '/' + args.way + '_K_{0}_{1}'.format(args.model, args.rank), K) if xBias is not None: np.save( save_path + '/' + args.way + '_xB_{0}_{1}'.format(args.model, args.rank), xBias) np.save( save_path + '/' + args.way + '_yB_{0}_{1}'.format(args.model, args.rank), yBias) np.save( save_path + '/' + args.way + '_zB_{0}_{1}'.format(args.model, args.rank), zBias) np.save( save_path + '/' + args.way + '_kB_{0}_{1}'.format(args.model, args.rank), kBias) progress.section("Predict") prediction = predict(matrix_U=RQ, matrix_V=K.T, matrix_Valid=valid, bias=yBias, gpu=args.gpu) progress.section("Evaluation") start_time = time.time() metric_names = ['NLL', 'AUC'] result = evaluate(prediction, valid, metric_names, gpu=args.gpu) print("----Final Result----") for metric in result.keys(): print("{0}:{1}".format(metric, result[metric])) print("Elapsed: {0}".format(inhour(time.time() - start_time))) else: RQ, X, xBias, Y, yBias = models[args.model]( train, valid, dataset=args.dataset, matrix_unif_train=unif_train, iteration=args.iter, rank=args.rank, gpu_on=args.gpu, lam=args.lamb, lam2=args.lamb2, seed=args.seed, batch_size=args.batch_size, way=args.way, confidence=args.confidence, step=args.step) save_path = 'latent/' + args.dataset if not os.path.exists(save_path): os.makedirs(save_path) if args.way is None: np.save(save_path + '/U_{0}_{1}'.format(args.model, args.rank), RQ) np.save(save_path + '/Y_{0}_{1}'.format(args.model, args.rank), Y) np.save(save_path + '/X_{0}_{1}'.format(args.model, args.rank), X) if xBias is not None: np.save( save_path + '/xB_{0}_{1}'.format(args.model, args.rank), xBias) np.save( save_path + '/yB_{0}_{1}'.format(args.model, args.rank), yBias) else: np.save( save_path + '/' + args.way + '_U_{0}_{1}'.format(args.model, args.rank), RQ) np.save( save_path + '/' + args.way + '_Y_{0}_{1}'.format(args.model, args.rank), Y) np.save( save_path + '/' + args.way + '_X_{0}_{1}'.format(args.model, args.rank), X) if xBias is not None: np.save( save_path + '/' + args.way + '_xB_{0}_{1}'.format(args.model, args.rank), xBias) np.save( save_path + '/' + args.way + '_yB_{0}_{1}'.format(args.model, args.rank), yBias) progress.section("Predict") prediction = predict(matrix_U=RQ, matrix_V=Y.T, matrix_Valid=valid, bias=yBias, gpu=args.gpu) progress.section("Evaluation") start_time = time.time() metric_names = ['NLL', 'AUC'] result = evaluate(prediction, valid, metric_names, gpu=args.gpu) print("----Final Result----") for metric in result.keys(): print("{0}:{1}".format(metric, result[metric])) print("Elapsed: {0}".format(inhour(time.time() - start_time)))
def main(args): # Progress bar progress = WorkSplitter() # Show hyperparameter settings progress.section("Parameter Setting") print("Data Directory: {}".format(args.data_dir)) print("Number of Users Sampled: {}".format(args.num_users_sampled)) print("Number of Items Sampled: {}".format(args.num_items_sampled)) print("Number of Max Allowed Iterations: {}".format( args.max_iteration_threshold)) print("Critiquing Model: {}".format(args.critiquing_model_name)) R_train = load_numpy(path=args.data_dir, name=args.train_set) print("Train U-I Dimensions: {}".format(R_train.shape)) R_test = load_numpy(path=args.data_dir, name=args.test_set) print("Test U-I Dimensions: {}".format(R_test.shape)) R_train_keyphrase = load_numpy(path=args.data_dir, name=args.train_keyphrase_set).toarray() print("Train Item Keyphrase U-I Dimensions: {}".format( R_train_keyphrase.shape)) R_train_item_keyphrase = load_numpy( path=args.data_dir, name=args.train_item_keyphrase_set).toarray() table_path = load_yaml('config/global.yml', key='path')['tables'] parameters = find_best_hyperparameters(table_path + args.dataset_name, 'NDCG') parameters_row = parameters.loc[parameters['model'] == args.model] lambs = [ 0.001, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 30, 50, 70, 90, 100, 200, 500, 1000, 10000, 100000 ] topks = [10, 20, 50, 100] if args.dataset_name == "yelp/": R_train_item_keyphrase = R_train_item_keyphrase.T for topk in topks: for lamb in lambs: results = critiquing( matrix_Train=R_train, matrix_Test=R_test, keyphrase_freq=R_train_keyphrase, item_keyphrase_freq=R_train_item_keyphrase, num_users_sampled=args.num_users_sampled, num_items_sampled=args.num_items_sampled, max_iteration_threshold=args.max_iteration_threshold, dataset_name=args.dataset_name, model=models[args.model], parameters_row=parameters_row, critiquing_model_name=args.critiquing_model_name, keyphrase_selection_method=args.keyphrase_selection_method, topk=topk, lamb=lamb) table_path = load_yaml('config/global.yml', key='path')['tables'] topk_path = "topk_" + str(topk) + "/" save_name = args.save_path + topk_path + "tuning_at_lamb_" + str( lamb) + "_with_" + args.keyphrase_selection_method + ".csv" save_dataframe_csv(results, table_path, save_name)
def main(args): # Progress bar progress = WorkSplitter() # Show parameter settings progress.section("Parameter Setting") print("Data Path: {}".format(args.path)) print("Active Learning Algorithm: {}".format(args.active_model)) print("Recommendation Algorithm: {}".format(args.rec_model)) print("GPU: {}".format(args.gpu)) print("Iterative: {}".format(args.iterative)) print("Sample From All: {}".format(args.sample_from_all)) print("Train Valid Test Split Ratio: {}".format(args.ratio)) print("Learning Rate: {}".format(args.learning_rate)) print("Rank: {}".format(args.rank)) print("Lambda: {}".format(args.lamb)) print("Epoch: {}".format(args.epoch)) print("Active Learning Iteration: {}".format(args.active_iteration)) print("Evaluation Ranking Topk: {}".format(args.topk)) print("UCB Confidence: {}".format(args.confidence_interval)) print("Number of Item per Active Iteration: {}".format(args.num_item_per_iter)) print("UCB Number of Latent Sampling: {}".format(args.num_latent_sampling)) # Load Data progress.section("Loading Data") start_time = time.time() R_train = load_numpy(path=args.path, name=args.train) print("Train U-I Dimensions: {}".format(R_train.shape)) R_active = load_numpy(path=args.path, name=args.active) print("Active U-I Dimensions: {}".format(R_active.shape)) R_test = load_numpy(path=args.path, name=args.test) print("Test U-I Dimensions: {}".format(R_test.shape)) print("Elapsed: {}".format(inhour(time.time() - start_time))) train_index = int(R_test.shape[0]*args.ratio[0]) progress.section("Preparing Data") matrix_train, matrix_active, matrix_test, _ = filter_users(R_train, R_active, R_test, train_index=train_index, active_threshold=2*args.num_item_per_iter*args.active_iteration, test_threshold=2*args.topk) m, n = matrix_train.shape history_items = np.array([]) model = rec_models[args.rec_model](observation_dim=n, latent_dim=args.rank, batch_size=128, lamb=args.lamb, learning_rate=args.learning_rate, optimizer=Regularizer[args.optimizer]) progress.section("Training") model.train_model(matrix_train[:train_index], args.corruption, args.epoch) for i in range(args.active_iteration): print('This is step {} \n'.format(i)) print('The number of ones in train set is {}'.format(len(matrix_train[train_index:].nonzero()[0]))) print('The number of ones in active set is {}'.format(len(matrix_active[train_index:].nonzero()[0]))) progress.section("Predicting") observation = active_models[args.active_model](model=model, matrix=matrix_train[train_index:].A, ci=args.confidence_interval, num_latent_sampling=args.num_latent_sampling) progress.section("Update Train Set") matrix_train, history_items = update_matrix(history_items, matrix_train, matrix_active, observation, train_index, args.iterative, args.sample_from_all, args.num_item_per_iter, args.active_iteration, args.gpu) if not args.iterative: break # matrix_train = matrix_train + matrix_active print('The number of ones in train set is {}'.format(len(matrix_train[train_index:].nonzero()[0]))) progress.section("Re-Training") model.train_model(matrix_train, args.corruption, args.epoch) progress.section("Re-Predicting") observation = active_models['Greedy'](model=model, matrix=matrix_train.A) result = {} for topk in [5, 10, 15, 20, 50]: predict_items, _ = sampling_predict(prediction_scores=observation[train_index:], topK=topk, matrix_train=matrix_train[train_index:], matrix_active=matrix_active[train_index:], sample_from_all=True, iterative=False, history_items=np.array([]), gpu=args.gpu) progress.section("Create Metrics") result.update(eval(matrix_test[train_index:], topk, predict_items)) print(result) model.sess.close() tf.reset_default_graph()