def main(args): R_train = load_numpy(path=args.data_dir, name=args.train_set) R_train_keyphrase = load_numpy(path=args.data_dir, name=args.train_keyphrase_set).toarray() R_train_keyphrase[R_train_keyphrase != 0] = 1 R_train_item_keyphrase = load_numpy(path=args.data_dir, name=args.train_item_keyphrase_set).T.toarray() num_items, num_keyphrases = R_train_item_keyphrase.shape for item in range(num_items): item_keyphrase = R_train_item_keyphrase[item] nonzero_keyphrases_index = item_keyphrase.nonzero()[0] nonzero_keyphrases_frequency = item_keyphrase[nonzero_keyphrases_index] candidate_index = nonzero_keyphrases_index[np.argsort(-nonzero_keyphrases_frequency)[:10]] binarized_keyphrase = np.zeros(num_keyphrases) binarized_keyphrase[candidate_index] = 1 R_train_item_keyphrase[item] = binarized_keyphrase R_train_item_keyphrase = sparse.csr_matrix(R_train_item_keyphrase).T params = dict() # params['model_saved_path'] = args.model_saved_path critiquing(R_train, R_train_keyphrase, R_train_item_keyphrase, params, args.num_users_sampled, load_path=args.load_path, save_path=args.save_path, critiquing_function=args.critiquing_function)
def main(args): num_users = pd.read_csv(args.data_dir + args.user_col + '.csv')[args.user_col].nunique() num_items = pd.read_csv(args.data_dir + args.item_col + '.csv')[args.item_col].nunique() df_train = pd.read_csv(args.data_dir + args.train_set) df_train = df_train[df_train[args.rating_col] == 1] df_train[args.keyphrase_vector_col] = df_train[ args.keyphrase_vector_col].apply(ast.literal_eval) keyphrase_names = pd.read_csv(args.data_dir + args.keyphrase_set)[ args.keyphrase_col].values params = dict() params['model_saved_path'] = args.model_saved_path critiquing(num_users, num_items, args.user_col, args.item_col, args.rating_col, args.keyphrase_vector_col, df_train, keyphrase_names, params, args.num_users_sampled, load_path=args.load_path, save_path=args.save_path)
def main(args): # Progress bar progress = WorkSplitter() # Show hyperparameter settings progress.section("Parameter Setting") print("Data Directory: {}".format(args.data_dir)) print("Number of Users Sampled: {}".format(args.num_users_sampled)) print("Number of Items Sampled: {}".format(args.num_items_sampled)) print("Number of Max Allowed Iterations: {}".format( args.max_iteration_threshold)) print("Critiquing Model: {}".format(args.critiquing_model_name)) R_train = load_numpy(path=args.data_dir, name=args.train_set) print("Train U-I Dimensions: {}".format(R_train.shape)) R_test = load_numpy(path=args.data_dir, name=args.test_set) print("Test U-I Dimensions: {}".format(R_test.shape)) R_train_keyphrase = load_numpy(path=args.data_dir, name=args.train_keyphrase_set).toarray() print("Train Item Keyphrase U-I Dimensions: {}".format( R_train_keyphrase.shape)) R_train_item_keyphrase = load_numpy( path=args.data_dir, name=args.train_item_keyphrase_set).toarray() table_path = load_yaml('config/global.yml', key='path')['tables'] parameters = find_best_hyperparameters(table_path + args.dataset_name, 'NDCG') parameters_row = parameters.loc[parameters['model'] == args.model] if args.dataset_name == "yelp/": R_train_item_keyphrase = R_train_item_keyphrase.T start_time = time.time() results = critiquing( matrix_Train=R_train, matrix_Test=R_test, keyphrase_freq=R_train_keyphrase, item_keyphrase_freq=R_train_item_keyphrase, num_users_sampled=args.num_users_sampled, num_items_sampled=args.num_items_sampled, max_iteration_threshold=args.max_iteration_threshold, dataset_name=args.dataset_name, model=models[args.model], parameters_row=parameters_row, critiquing_model_name=args.critiquing_model_name, keyphrase_selection_method=args.keyphrase_selection_method, topk=args.topk, lamb=args.lamb) print("Final Time Elapsed: {}".format(inhour(time.time() - start_time))) table_path = load_yaml('config/global.yml', key='path')['tables'] save_dataframe_csv(results, table_path, args.save_path)
def main(args): # Progress bar progress = WorkSplitter() # Show hyperparameter settings progress.section("Parameter Setting") print("Data Directory: {}".format(args.data_dir)) print("Number of Users Sampled: {}".format(args.num_users_sampled)) print("Number of Items Sampled: {}".format(args.num_items_sampled)) print("Number of Max Allowed Iterations: {}".format(args.max_iteration_threshold)) print("Critiquing Model: {}".format(args.critiquing_model_name)) R_train = load_numpy(path=args.data_dir, name=args.train_set) print("Train U-I Dimensions: {}".format(R_train.shape)) R_test = load_numpy(path=args.data_dir, name=args.test_set) print("Test U-I Dimensions: {}".format(R_test.shape)) R_train_keyphrase = load_numpy(path=args.data_dir, name=args.train_keyphrase_set).toarray() print("Train Item Keyphrase U-I Dimensions: {}".format(R_train_keyphrase.shape)) R_train_item_keyphrase = load_numpy(path=args.data_dir, name=args.train_item_keyphrase_set).toarray() table_path = load_yaml('config/global.yml', key='path')['tables'] # parameters = find_best_hyperparameters(table_path+args.dataset_name, 'NDCG') # parameters_row = parameters.loc[parameters['model'] == args.model] parameters_row = { 'iter' : 10, 'lambda' : 200, 'rank' : 200 } keyphrases_names = load_dataframe_csv(path = args.data_dir, name = "Keyphrases.csv")['Phrases'].tolist() results = critiquing(matrix_Train=R_train, matrix_Test=R_test, keyphrase_freq=R_train_keyphrase, item_keyphrase_freq=R_train_item_keyphrase.T, num_users_sampled=args.num_users_sampled, num_items_sampled=args.num_items_sampled, max_iteration_threshold=args.max_iteration_threshold, dataset_name=args.dataset_name, model=models[args.model], parameters_row=parameters_row, critiquing_model_name=args.critiquing_model_name, lamb = args.lambdas, keyphrases_names = keyphrases_names, keyphrase_selection_method = args.keyphrase_selection_method) table_path = load_yaml('config/global.yml', key='path')['tables'] save_dataframe_csv(results, table_path, args.save_path)
def main(args): # Progress bar progress = WorkSplitter() # Show hyperparameter settings progress.section("Parameter Setting") print("Data Directory: {}".format(args.data_dir)) print("Number of Users Sampled: {}".format(args.num_users_sampled)) print("Number of Items Sampled: {}".format(args.num_items_sampled)) print("Number of Max Allowed Iterations: {}".format( args.max_iteration_threshold)) print("Critiquing Model: {}".format(args.critiquing_model_name)) R_train = load_numpy(path=args.data_dir, name=args.train_set) print("Train U-I Dimensions: {}".format(R_train.shape)) R_test = load_numpy(path=args.data_dir, name=args.test_set) print("Test U-I Dimensions: {}".format(R_test.shape)) R_train_keyphrase = load_numpy(path=args.data_dir, name=args.train_keyphrase_set).toarray() print("Train User Keyphrase U-I Dimensions: {}".format( R_train_keyphrase.shape)) R_train_item_keyphrase = load_numpy( path=args.data_dir, name=args.train_item_keyphrase_set).toarray() print("Train Item Keyphrase U-I Dimensions: {}".format( R_train_item_keyphrase.shape)) # table_path = load_yaml('config/global.yml', key='path')['tables'] # parameters = find_best_hyperparameters(table_path+args.dataset_name, 'NDCG') # parameters_row = parameters.loc[parameters['model'] == args.model] parameters_row = pd.DataFrame({'iter': [4], 'lambda': [80], 'rank': [200]}) results = critiquing(matrix_Train=R_train, matrix_Test=R_test, keyphrase_freq=R_train_keyphrase, item_keyphrase_freq=R_train_item_keyphrase, num_users_sampled=args.num_users_sampled, num_items_sampled=args.num_items_sampled, max_iteration_threshold=args.max_iteration_threshold, dataset_name=args.dataset_name, model=models[args.model], parameters_row=parameters_row, critiquing_model_name=args.critiquing_model_name) # table_path = load_yaml('config/global.yml', key='path')['tables'] table_path = '/home/shuyang/data4/LatentLinearCritiquingforConvRecSys/' save_dataframe_csv(results, table_path, args.save_path)
def main(args): # Progress bar progress = WorkSplitter() # Show hyperparameter settings progress.section("Parameter Setting") print("Data Directory: {}".format(args.data_dir)) print("Number of Users Sampled: {}".format(args.num_users_sampled)) print("Number of Items Sampled: {}".format(args.num_items_sampled)) print("Number of Max Allowed Iterations: {}".format( args.max_iteration_threshold)) print("Critiquing Model: {}".format(args.critiquing_model_name)) R_train = load_numpy(path=args.data_dir, name=args.train_set) print("Train U-I Dimensions: {}".format(R_train.shape)) R_test = load_numpy(path=args.data_dir, name=args.test_set) print("Test U-I Dimensions: {}".format(R_test.shape)) R_train_keyphrase = load_numpy(path=args.data_dir, name=args.train_keyphrase_set).toarray() print("Train Item Keyphrase U-I Dimensions: {}".format( R_train_keyphrase.shape)) R_train_item_keyphrase = load_numpy( path=args.data_dir, name=args.train_item_keyphrase_set).toarray() table_path = load_yaml('config/global.yml', key='path')['tables'] parameters = find_best_hyperparameters(table_path + args.dataset_name, 'NDCG') parameters_row = parameters.loc[parameters['model'] == args.model] lambs = [ 0.001, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 30, 50, 70, 90, 100, 200, 500, 1000, 10000, 100000 ] topks = [10, 20, 50, 100] if args.dataset_name == "yelp/": R_train_item_keyphrase = R_train_item_keyphrase.T for topk in topks: for lamb in lambs: results = critiquing( matrix_Train=R_train, matrix_Test=R_test, keyphrase_freq=R_train_keyphrase, item_keyphrase_freq=R_train_item_keyphrase, num_users_sampled=args.num_users_sampled, num_items_sampled=args.num_items_sampled, max_iteration_threshold=args.max_iteration_threshold, dataset_name=args.dataset_name, model=models[args.model], parameters_row=parameters_row, critiquing_model_name=args.critiquing_model_name, keyphrase_selection_method=args.keyphrase_selection_method, topk=topk, lamb=lamb) table_path = load_yaml('config/global.yml', key='path')['tables'] topk_path = "topk_" + str(topk) + "/" save_name = args.save_path + topk_path + "tuning_at_lamb_" + str( lamb) + "_with_" + args.keyphrase_selection_method + ".csv" save_dataframe_csv(results, table_path, save_name)