Exemplo n.º 1
0
def main(args):

    R_train = load_numpy(path=args.data_dir, name=args.train_set)
    R_train_keyphrase = load_numpy(path=args.data_dir, name=args.train_keyphrase_set).toarray()

    R_train_keyphrase[R_train_keyphrase != 0] = 1
    R_train_item_keyphrase = load_numpy(path=args.data_dir, name=args.train_item_keyphrase_set).T.toarray()

    num_items, num_keyphrases = R_train_item_keyphrase.shape
    for item in range(num_items):
        item_keyphrase = R_train_item_keyphrase[item]
        nonzero_keyphrases_index = item_keyphrase.nonzero()[0]
        nonzero_keyphrases_frequency = item_keyphrase[nonzero_keyphrases_index]
        candidate_index = nonzero_keyphrases_index[np.argsort(-nonzero_keyphrases_frequency)[:10]]
        binarized_keyphrase = np.zeros(num_keyphrases)
        binarized_keyphrase[candidate_index] = 1
        R_train_item_keyphrase[item] = binarized_keyphrase

    R_train_item_keyphrase = sparse.csr_matrix(R_train_item_keyphrase).T

    params = dict()
#    params['model_saved_path'] = args.model_saved_path

    critiquing(R_train,
               R_train_keyphrase,
               R_train_item_keyphrase,
               params,
               args.num_users_sampled,
               load_path=args.load_path,
               save_path=args.save_path,
               critiquing_function=args.critiquing_function)
def main(args):

    num_users = pd.read_csv(args.data_dir + args.user_col +
                            '.csv')[args.user_col].nunique()
    num_items = pd.read_csv(args.data_dir + args.item_col +
                            '.csv')[args.item_col].nunique()

    df_train = pd.read_csv(args.data_dir + args.train_set)
    df_train = df_train[df_train[args.rating_col] == 1]
    df_train[args.keyphrase_vector_col] = df_train[
        args.keyphrase_vector_col].apply(ast.literal_eval)

    keyphrase_names = pd.read_csv(args.data_dir + args.keyphrase_set)[
        args.keyphrase_col].values

    params = dict()
    params['model_saved_path'] = args.model_saved_path

    critiquing(num_users,
               num_items,
               args.user_col,
               args.item_col,
               args.rating_col,
               args.keyphrase_vector_col,
               df_train,
               keyphrase_names,
               params,
               args.num_users_sampled,
               load_path=args.load_path,
               save_path=args.save_path)
Exemplo n.º 3
0
def main(args):
    # Progress bar
    progress = WorkSplitter()

    # Show hyperparameter settings
    progress.section("Parameter Setting")
    print("Data Directory: {}".format(args.data_dir))
    print("Number of Users Sampled: {}".format(args.num_users_sampled))
    print("Number of Items Sampled: {}".format(args.num_items_sampled))
    print("Number of Max Allowed Iterations: {}".format(
        args.max_iteration_threshold))
    print("Critiquing Model: {}".format(args.critiquing_model_name))

    R_train = load_numpy(path=args.data_dir, name=args.train_set)
    print("Train U-I Dimensions: {}".format(R_train.shape))

    R_test = load_numpy(path=args.data_dir, name=args.test_set)
    print("Test U-I Dimensions: {}".format(R_test.shape))

    R_train_keyphrase = load_numpy(path=args.data_dir,
                                   name=args.train_keyphrase_set).toarray()
    print("Train Item Keyphrase U-I Dimensions: {}".format(
        R_train_keyphrase.shape))

    R_train_item_keyphrase = load_numpy(
        path=args.data_dir, name=args.train_item_keyphrase_set).toarray()

    table_path = load_yaml('config/global.yml', key='path')['tables']
    parameters = find_best_hyperparameters(table_path + args.dataset_name,
                                           'NDCG')
    parameters_row = parameters.loc[parameters['model'] == args.model]

    if args.dataset_name == "yelp/":
        R_train_item_keyphrase = R_train_item_keyphrase.T

    start_time = time.time()

    results = critiquing(
        matrix_Train=R_train,
        matrix_Test=R_test,
        keyphrase_freq=R_train_keyphrase,
        item_keyphrase_freq=R_train_item_keyphrase,
        num_users_sampled=args.num_users_sampled,
        num_items_sampled=args.num_items_sampled,
        max_iteration_threshold=args.max_iteration_threshold,
        dataset_name=args.dataset_name,
        model=models[args.model],
        parameters_row=parameters_row,
        critiquing_model_name=args.critiquing_model_name,
        keyphrase_selection_method=args.keyphrase_selection_method,
        topk=args.topk,
        lamb=args.lamb)

    print("Final Time Elapsed: {}".format(inhour(time.time() - start_time)))

    table_path = load_yaml('config/global.yml', key='path')['tables']
    save_dataframe_csv(results, table_path, args.save_path)
Exemplo n.º 4
0
def main(args):
    # Progress bar
    progress = WorkSplitter()

    # Show hyperparameter settings
    progress.section("Parameter Setting")
    print("Data Directory: {}".format(args.data_dir))
    print("Number of Users Sampled: {}".format(args.num_users_sampled))
    print("Number of Items Sampled: {}".format(args.num_items_sampled))
    print("Number of Max Allowed Iterations: {}".format(args.max_iteration_threshold))
    print("Critiquing Model: {}".format(args.critiquing_model_name))

    R_train = load_numpy(path=args.data_dir, name=args.train_set)
    print("Train U-I Dimensions: {}".format(R_train.shape))

    R_test = load_numpy(path=args.data_dir, name=args.test_set)
    print("Test U-I Dimensions: {}".format(R_test.shape))

    R_train_keyphrase = load_numpy(path=args.data_dir, name=args.train_keyphrase_set).toarray()
    print("Train Item Keyphrase U-I Dimensions: {}".format(R_train_keyphrase.shape))

    R_train_item_keyphrase = load_numpy(path=args.data_dir, name=args.train_item_keyphrase_set).toarray()

    table_path = load_yaml('config/global.yml', key='path')['tables']
    # parameters = find_best_hyperparameters(table_path+args.dataset_name, 'NDCG')
    # parameters_row = parameters.loc[parameters['model'] == args.model]

    parameters_row = {
        'iter' : 10,
        'lambda' : 200,
        'rank' : 200 
    }
    
    keyphrases_names = load_dataframe_csv(path = args.data_dir, name = "Keyphrases.csv")['Phrases'].tolist()
    
    results = critiquing(matrix_Train=R_train,
                         matrix_Test=R_test,
                         keyphrase_freq=R_train_keyphrase,
                         item_keyphrase_freq=R_train_item_keyphrase.T,
                         num_users_sampled=args.num_users_sampled,
                         num_items_sampled=args.num_items_sampled,
                         max_iteration_threshold=args.max_iteration_threshold,
                         dataset_name=args.dataset_name,
                         model=models[args.model],
                         parameters_row=parameters_row,
                         critiquing_model_name=args.critiquing_model_name,
                         lamb = args.lambdas,
                         keyphrases_names = keyphrases_names,
                         keyphrase_selection_method = args.keyphrase_selection_method)

    table_path = load_yaml('config/global.yml', key='path')['tables']
    save_dataframe_csv(results, table_path, args.save_path)
def main(args):
    # Progress bar
    progress = WorkSplitter()

    # Show hyperparameter settings
    progress.section("Parameter Setting")
    print("Data Directory: {}".format(args.data_dir))
    print("Number of Users Sampled: {}".format(args.num_users_sampled))
    print("Number of Items Sampled: {}".format(args.num_items_sampled))
    print("Number of Max Allowed Iterations: {}".format(
        args.max_iteration_threshold))
    print("Critiquing Model: {}".format(args.critiquing_model_name))

    R_train = load_numpy(path=args.data_dir, name=args.train_set)
    print("Train U-I Dimensions: {}".format(R_train.shape))

    R_test = load_numpy(path=args.data_dir, name=args.test_set)
    print("Test U-I Dimensions: {}".format(R_test.shape))

    R_train_keyphrase = load_numpy(path=args.data_dir,
                                   name=args.train_keyphrase_set).toarray()
    print("Train User Keyphrase U-I Dimensions: {}".format(
        R_train_keyphrase.shape))

    R_train_item_keyphrase = load_numpy(
        path=args.data_dir, name=args.train_item_keyphrase_set).toarray()
    print("Train Item Keyphrase U-I Dimensions: {}".format(
        R_train_item_keyphrase.shape))

    # table_path = load_yaml('config/global.yml', key='path')['tables']
    # parameters = find_best_hyperparameters(table_path+args.dataset_name, 'NDCG')
    # parameters_row = parameters.loc[parameters['model'] == args.model]
    parameters_row = pd.DataFrame({'iter': [4], 'lambda': [80], 'rank': [200]})

    results = critiquing(matrix_Train=R_train,
                         matrix_Test=R_test,
                         keyphrase_freq=R_train_keyphrase,
                         item_keyphrase_freq=R_train_item_keyphrase,
                         num_users_sampled=args.num_users_sampled,
                         num_items_sampled=args.num_items_sampled,
                         max_iteration_threshold=args.max_iteration_threshold,
                         dataset_name=args.dataset_name,
                         model=models[args.model],
                         parameters_row=parameters_row,
                         critiquing_model_name=args.critiquing_model_name)

    # table_path = load_yaml('config/global.yml', key='path')['tables']
    table_path = '/home/shuyang/data4/LatentLinearCritiquingforConvRecSys/'
    save_dataframe_csv(results, table_path, args.save_path)
def main(args):
    # Progress bar
    progress = WorkSplitter()

    # Show hyperparameter settings
    progress.section("Parameter Setting")
    print("Data Directory: {}".format(args.data_dir))
    print("Number of Users Sampled: {}".format(args.num_users_sampled))
    print("Number of Items Sampled: {}".format(args.num_items_sampled))
    print("Number of Max Allowed Iterations: {}".format(
        args.max_iteration_threshold))
    print("Critiquing Model: {}".format(args.critiquing_model_name))

    R_train = load_numpy(path=args.data_dir, name=args.train_set)
    print("Train U-I Dimensions: {}".format(R_train.shape))

    R_test = load_numpy(path=args.data_dir, name=args.test_set)
    print("Test U-I Dimensions: {}".format(R_test.shape))

    R_train_keyphrase = load_numpy(path=args.data_dir,
                                   name=args.train_keyphrase_set).toarray()
    print("Train Item Keyphrase U-I Dimensions: {}".format(
        R_train_keyphrase.shape))

    R_train_item_keyphrase = load_numpy(
        path=args.data_dir, name=args.train_item_keyphrase_set).toarray()

    table_path = load_yaml('config/global.yml', key='path')['tables']
    parameters = find_best_hyperparameters(table_path + args.dataset_name,
                                           'NDCG')
    parameters_row = parameters.loc[parameters['model'] == args.model]

    lambs = [
        0.001, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 30, 50, 70, 90, 100, 200, 500,
        1000, 10000, 100000
    ]
    topks = [10, 20, 50, 100]

    if args.dataset_name == "yelp/":
        R_train_item_keyphrase = R_train_item_keyphrase.T

    for topk in topks:
        for lamb in lambs:
            results = critiquing(
                matrix_Train=R_train,
                matrix_Test=R_test,
                keyphrase_freq=R_train_keyphrase,
                item_keyphrase_freq=R_train_item_keyphrase,
                num_users_sampled=args.num_users_sampled,
                num_items_sampled=args.num_items_sampled,
                max_iteration_threshold=args.max_iteration_threshold,
                dataset_name=args.dataset_name,
                model=models[args.model],
                parameters_row=parameters_row,
                critiquing_model_name=args.critiquing_model_name,
                keyphrase_selection_method=args.keyphrase_selection_method,
                topk=topk,
                lamb=lamb)
            table_path = load_yaml('config/global.yml', key='path')['tables']
            topk_path = "topk_" + str(topk) + "/"
            save_name = args.save_path + topk_path + "tuning_at_lamb_" + str(
                lamb) + "_with_" + args.keyphrase_selection_method + ".csv"
            save_dataframe_csv(results, table_path, save_name)