def main(args):

    settings_df = load_dataframe_csv(args.tab_path + args.setting_dir)

    R_train = load_numpy(path=args.data_dir, name=args.train_set)
    R_valid = load_numpy(path=args.data_dir, name=args.valid_set)
    R_test = load_numpy(path=args.data_dir, name=args.test_set)

    index_map = np.load(args.data_dir + args.index)

    item_names = None

    try:
        item_names = load_dataframe_csv(args.data_dir + args.names,
                                        delimiter="::",
                                        names=['ItemID', 'Name', 'Category'])
    except:
        print("Meta-data does not exist")

    attention(R_train,
              R_valid,
              R_test,
              index_map,
              item_names,
              args.tex_path,
              args.fig_path,
              settings_df,
              args.template_path,
              preference_analysis=args.preference_analysis,
              case_study=args.case_study,
              gpu_on=True)
예제 #2
0
def main(args):
    # Progress bar
    progress = WorkSplitter()

    # Show hyperparameter settings
    progress.section("Parameter Setting")
    print("Data Directory: {}".format(args.data_dir))
    print("Number of Users Sampled: {}".format(args.num_users_sampled))
    print("Number of Items Sampled: {}".format(args.num_items_sampled))
    print("Number of Max Allowed Iterations: {}".format(args.max_iteration_threshold))
    print("Critiquing Model: {}".format(args.critiquing_model_name))

    R_train = load_numpy(path=args.data_dir, name=args.train_set)
    print("Train U-I Dimensions: {}".format(R_train.shape))

    R_test = load_numpy(path=args.data_dir, name=args.test_set)
    print("Test U-I Dimensions: {}".format(R_test.shape))

    R_train_keyphrase = load_numpy(path=args.data_dir, name=args.train_keyphrase_set).toarray()
    print("Train Item Keyphrase U-I Dimensions: {}".format(R_train_keyphrase.shape))

    R_train_item_keyphrase = load_numpy(path=args.data_dir, name=args.train_item_keyphrase_set).toarray()

    table_path = load_yaml('config/global.yml', key='path')['tables']
    # parameters = find_best_hyperparameters(table_path+args.dataset_name, 'NDCG')
    # parameters_row = parameters.loc[parameters['model'] == args.model]

    parameters_row = {
        'iter' : 10,
        'lambda' : 200,
        'rank' : 200 
    }
    
    keyphrases_names = load_dataframe_csv(path = args.data_dir, name = "Keyphrases.csv")['Phrases'].tolist()
    
    results = critiquing(matrix_Train=R_train,
                         matrix_Test=R_test,
                         keyphrase_freq=R_train_keyphrase,
                         item_keyphrase_freq=R_train_item_keyphrase.T,
                         num_users_sampled=args.num_users_sampled,
                         num_items_sampled=args.num_items_sampled,
                         max_iteration_threshold=args.max_iteration_threshold,
                         dataset_name=args.dataset_name,
                         model=models[args.model],
                         parameters_row=parameters_row,
                         critiquing_model_name=args.critiquing_model_name,
                         lamb = args.lambdas,
                         keyphrases_names = keyphrases_names,
                         keyphrase_selection_method = args.keyphrase_selection_method)

    table_path = load_yaml('config/global.yml', key='path')['tables']
    save_dataframe_csv(results, table_path, args.save_path)
예제 #3
0
def hyper_parameter_tuning(train, validation, params, save_path):
    progress = WorkSplitter()
    table_path = load_yaml('config/global.yml', key='path')['tables']

    try:
        df = load_dataframe_csv(table_path, save_path)
    except:
        df = pd.DataFrame(columns=['model', 'k', 'topK'])

    num_user = train.shape[0]

    for algorithm in params['models']:

        for k in params['k']:

            if ((df['model'] == algorithm) & (df['k'] == k)).any():
                continue

            format = "model: {}, k: {}"
            progress.section(format.format(algorithm, k))

            progress.subsection("Training")
            model = params['models'][algorithm]()
            model.train(train)

            progress.subsection("Prediction")
            prediction_score = model.predict(train, k=k)

            prediction = predict(prediction_score=prediction_score,
                                 topK=params['topK'][-1],
                                 matrix_Train=train)

            progress.subsection("Evaluation")
            result = evaluate(prediction, validation, params['metric'],
                              params['topK'])

            result_dict = {'model': algorithm, 'k': k}

            for name in result.keys():
                result_dict[name] = [
                    round(result[name][0], 4),
                    round(result[name][1], 4)
                ]

            df = df.append(result_dict, ignore_index=True)

            save_dataframe_csv(df, table_path, save_path)
예제 #4
0
def hyper_parameter_tuning(train, validation, params, save_path, measure='Cosine', gpu_on=True):
    progress = WorkSplitter()
    table_path = load_yaml('config/global.yml', key='path')['tables']

    try:
        df = load_dataframe_csv(table_path, save_path)
    except:
        df = pd.DataFrame(columns=['model', 'similarity', 'alpha', 'batch_size',
                                   'corruption', 'epoch', 'iteration', 'key_dimension',
                                   'lambda', 'learning_rate', 'mode_dimension',
                                   'normalize', 'rank', 'root', 'topK'])

    num_user = train.shape[0]

    for algorithm in params['models']:

        for alpha in params['alpha']:

            for batch_size in params['batch_size']:

                for corruption in params['corruption']:

                    for epoch in params['epoch']:

                        for iteration in params['iteration']:

                            for key_dim in params['key_dimension']:

                                for lamb in params['lambda']:

                                    for learning_rate in params['learning_rate']:

                                        for mode_dim in params['mode_dimension']:

                                            for rank in params['rank']:

                                                for root in params['root']:

                                                    if ((df['model'] == algorithm) &
                                                        (df['alpha'] == alpha) &
                                                        (df['batch_size'] == batch_size) &
                                                        (df['corruption'] == corruption) &
                                                        (df['epoch'] == epoch) &
                                                        (df['iteration'] == iteration) &
                                                        (df['key_dimension'] == key_dim) &
                                                        (df['lambda'] == lamb) &
                                                        (df['learning_rate'] == learning_rate) &
                                                        (df['mode_dimension'] == mode_dim) &
                                                        (df['rank'] == rank) &
                                                        (df['root'] == root)).any():
                                                        continue

                                                    format = "model: {}, alpha: {}, batch_size: {}, corruption: {}, epoch: {}, iteration: {}, \
                                                        key_dimension: {}, lambda: {}, learning_rate: {}, mode_dimension: {}, rank: {}, root: {}"
                                                    progress.section(format.format(algorithm, alpha, batch_size, corruption, epoch, iteration,
                                                                                   key_dim, lamb, learning_rate, mode_dim, rank, root))
                                                    RQ, Yt, Bias = params['models'][algorithm](train,
                                                                                               embedded_matrix=np.empty((0)),
                                                                                               mode_dim=mode_dim,
                                                                                               key_dim=key_dim,
                                                                                               batch_size=batch_size,
                                                                                               learning_rate=learning_rate,
                                                                                               iteration=iteration,
                                                                                               epoch=epoch,
                                                                                               rank=rank,
                                                                                               corruption=corruption,
                                                                                               gpu_on=gpu_on,
                                                                                               lamb=lamb,
                                                                                               alpha=alpha,
                                                                                               root=root)
                                                    Y = Yt.T

                                                    progress.subsection("Prediction")

                                                    prediction = predict(matrix_U=RQ,
                                                                         matrix_V=Y,
                                                                         bias=Bias,
                                                                         topK=params['topK'][-1],
                                                                         matrix_Train=train,
                                                                         measure=measure,
                                                                         gpu=gpu_on)

                                                    progress.subsection("Evaluation")

                                                    result = evaluate(prediction,
                                                                      validation,
                                                                      params['metric'],
                                                                      params['topK'])

                                                    result_dict = {'model': algorithm,
                                                                   'alpha': alpha,
                                                                   'batch_size': batch_size,
                                                                   'corruption': corruption,
                                                                   'epoch': epoch,
                                                                   'iteration': iteration,
                                                                   'key_dimension': key_dim,
                                                                   'lambda': lamb,
                                                                   'learning_rate': learning_rate,
                                                                   'mode_dimension': mode_dim,
                                                                   'rank': rank,
                                                                   'similarity': params['similarity'],
                                                                   'root': root}

                                                    for name in result.keys():
                                                        result_dict[name] = [round(result[name][0], 4), round(result[name][1], 4)]

                                                    df = df.append(result_dict, ignore_index=True)

                                                    save_dataframe_csv(df, table_path, save_path)
예제 #5
0
def explanation_parameter_tuning(num_users, num_items, user_col, item_col,
                                 rating_col, keyphrase_vector_col, df_train,
                                 df_valid, keyphrase_names, params, save_path):
    progress = WorkSplitter()
    table_path = load_yaml('config/global.yml', key='path')['tables']
    try:
        df = load_dataframe_csv(table_path, save_path)
    except:
        df = pd.DataFrame(columns=[
            'model', 'rank', 'num_layers', 'train_batch_size',
            'predict_batch_size', 'lambda', 'topK', 'learning_rate', 'epoch',
            'negative_sampling_size'
        ])

    for algorithm in params['models']:

        for rank in params['rank']:

            for num_layers in params['num_layers']:

                for train_batch_size in params['train_batch_size']:

                    for predict_batch_size in params['predict_batch_size']:

                        for lamb in params['lambda']:

                            for learning_rate in params['learning_rate']:

                                for epoch in params['epoch']:

                                    for negative_sampling_size in params[
                                            'negative_sampling_size']:

                                        if ((df['model'] == algorithm) &
                                            (df['rank'] == rank) &
                                            (df['num_layers'] == num_layers) &
                                            (df['train_batch_size']
                                             == train_batch_size) &
                                            (df['predict_batch_size']
                                             == predict_batch_size) &
                                            (df['lambda'] == lamb) &
                                            (df['learning_rate']
                                             == learning_rate) &
                                            (df['epoch'] == epoch) &
                                            (df['negative_sampling_size']
                                             == negative_sampling_size)).any():
                                            continue

                                        format = "model: {0}, rank: {1}, num_layers: {2}, " \
                                                 "train_batch_size: {3}, predict_batch_size: {4}, " \
                                                 "lambda: {5}, learning_rate: {6}, epoch: {7}, " \
                                                 "negative_sampling_size: {8}"
                                        progress.section(
                                            format.format(
                                                algorithm, rank, num_layers,
                                                train_batch_size,
                                                predict_batch_size, lamb,
                                                learning_rate, epoch,
                                                negative_sampling_size))

                                        progress.subsection(
                                            "Initializing Negative Sampler")

                                        negative_sampler = Negative_Sampler(
                                            df_train[[
                                                user_col, item_col,
                                                keyphrase_vector_col
                                            ]],
                                            user_col,
                                            item_col,
                                            rating_col,
                                            keyphrase_vector_col,
                                            num_items=num_items,
                                            batch_size=train_batch_size,
                                            num_keyphrases=len(
                                                keyphrase_names),
                                            negative_sampling_size=
                                            negative_sampling_size)

                                        model = params['models'][algorithm](
                                            num_users=num_users,
                                            num_items=num_items,
                                            text_dim=len(keyphrase_names),
                                            embed_dim=rank,
                                            num_layers=num_layers,
                                            negative_sampler=negative_sampler,
                                            lamb=lamb,
                                            learning_rate=learning_rate)

                                        progress.subsection("Training")

                                        model.train_model(df_train,
                                                          user_col,
                                                          item_col,
                                                          rating_col,
                                                          epoch=epoch)

                                        progress.subsection("Prediction")

                                        df_valid_explanation = predict_explanation(
                                            model,
                                            df_valid,
                                            user_col,
                                            item_col,
                                            topk_keyphrase=params['topK'][-1])

                                        progress.subsection("Evaluation")

                                        explanation_result = evaluate_explanation(
                                            df_valid_explanation, df_valid,
                                            params['metric'], params['topK'],
                                            user_col, item_col, rating_col,
                                            keyphrase_vector_col)

                                        result_dict = {
                                            'model':
                                            algorithm,
                                            'rank':
                                            rank,
                                            'num_layers':
                                            num_layers,
                                            'train_batch_size':
                                            train_batch_size,
                                            'predict_batch_size':
                                            predict_batch_size,
                                            'lambda':
                                            lamb,
                                            'learning_rate':
                                            learning_rate,
                                            'epoch':
                                            epoch,
                                            'negative_sampling_size':
                                            negative_sampling_size
                                        }

                                        for name in explanation_result.keys():
                                            result_dict[name] = [
                                                round(
                                                    explanation_result[name]
                                                    [0], 4),
                                                round(
                                                    explanation_result[name]
                                                    [1], 4)
                                            ]

                                        df = df.append(result_dict,
                                                       ignore_index=True)

                                        model.sess.close()
                                        tf.reset_default_graph()

                                        save_dataframe_csv(
                                            df, table_path, save_path)
예제 #6
0
def hyper_parameter_tuning(train,
                           validation,
                           params,
                           save_path,
                           measure='Cosine',
                           gpu_on=True):
    progress = WorkSplitter()
    table_path = load_yaml('config/global.yml', key='path')['tables']

    try:
        df = load_dataframe_csv(table_path, save_path)
    except:
        df = pd.DataFrame(columns=[
            'model', 'rank', 'alpha', 'lambda', 'iter', 'similarity',
            'corruption', 'root', 'topK'
        ])

    num_user = train.shape[0]

    for algorithm in params['models']:

        for rank in params['rank']:

            for alpha in params['alpha']:

                for lam in params['lambda']:

                    for corruption in params['corruption']:

                        for root in params['root']:

                            if ((df['model'] == algorithm) &
                                (df['rank'] == rank) & (df['alpha'] == alpha) &
                                (df['lambda'] == lam) &
                                (df['corruption'] == corruption) &
                                (df['root'] == root)).any():
                                continue

                            format = "model: {0}, rank: {1}, alpha: {2}, lambda: {3}, corruption: {4}, root: {5}"
                            progress.section(
                                format.format(algorithm, rank, alpha, lam,
                                              corruption, root))
                            RQ, Yt, Bias = params['models'][algorithm](
                                train,
                                embeded_matrix=np.empty((0)),
                                iteration=params['iter'],
                                rank=rank,
                                lam=lam,
                                alpha=alpha,
                                corruption=corruption,
                                root=root,
                                gpu_on=gpu_on)
                            Y = Yt.T

                            progress.subsection("Prediction")

                            prediction = predict(matrix_U=RQ,
                                                 matrix_V=Y,
                                                 measure=measure,
                                                 bias=Bias,
                                                 topK=params['topK'][-1],
                                                 matrix_Train=train,
                                                 gpu=gpu_on)

                            progress.subsection("Evaluation")

                            result = evaluate(prediction, validation,
                                              params['metric'], params['topK'])

                            result_dict = {
                                'model': algorithm,
                                'rank': rank,
                                'alpha': alpha,
                                'lambda': lam,
                                'iter': params['iter'],
                                'similarity': params['similarity'],
                                'corruption': corruption,
                                'root': root
                            }

                            for name in result.keys():
                                result_dict[name] = [
                                    round(result[name][0], 4),
                                    round(result[name][1], 4)
                                ]

                            df = df.append(result_dict, ignore_index=True)

                            save_dataframe_csv(df, table_path, save_path)
예제 #7
0
def hyper_parameter_tuning(train, validation, params, save_path, gpu_on=True):
    progress = WorkSplitter()
    table_path = load_yaml('config/global.yml', key='path')['tables']

    try:
        df = load_dataframe_csv(table_path, save_path)
    except:
        df = pd.DataFrame(
            columns=['model', 'rank', 'lambda', 'epoch', 'corruption', 'topK'])

    num_user = train.shape[0]

    for algorithm in params['models']:

        for rank in params['rank']:

            for lamb in params['lambda']:

                for corruption in params['corruption']:

                    if ((df['model'] == algorithm) & (df['rank'] == rank) &
                        (df['lambda'] == lamb) &
                        (df['corruption'] == corruption)).any():
                        continue

                    format = "model: {}, rank: {}, lambda: {}, corruption: {}"
                    progress.section(
                        format.format(algorithm, rank, lamb, corruption))
                    RQ, Yt, Bias = params['models'][algorithm](
                        train,
                        epoch=params['epoch'],
                        lamb=lamb,
                        rank=rank,
                        corruption=corruption)
                    Y = Yt.T

                    progress.subsection("Prediction")

                    prediction = predict(matrix_U=RQ,
                                         matrix_V=Y,
                                         bias=Bias,
                                         topK=params['topK'][-1],
                                         matrix_Train=train,
                                         gpu=gpu_on)

                    progress.subsection("Evaluation")

                    result = evaluate(prediction, validation, params['metric'],
                                      params['topK'])

                    result_dict = {
                        'model': algorithm,
                        'rank': rank,
                        'lambda': lamb,
                        'epoch': params['epoch'],
                        'corruption': corruption
                    }

                    for name in result.keys():
                        result_dict[name] = [
                            round(result[name][0], 4),
                            round(result[name][1], 4)
                        ]

                    df = df.append(result_dict, ignore_index=True)

                    save_dataframe_csv(df, table_path, save_path)
import pandas as pd
from utils.io import load_dataframe_csv
from plots.rec_plots import precision_recall_curve

topK = [5, 10, 15, 20, 50]

df = load_dataframe_csv('tables/', 'movielens20m_result.csv')
precision_recall_curve(df,
                       topK,
                       save=True,
                       folder='analysis/' + 'movielens20m',
                       reloaded=True)

df = load_dataframe_csv('tables/', 'netflix_result.csv')
precision_recall_curve(df,
                       topK,
                       save=True,
                       folder='analysis/' + 'netflix',
                       reloaded=True)

df = load_dataframe_csv('tables/', 'yahoo_result.csv')
precision_recall_curve(df,
                       topK,
                       save=True,
                       folder='analysis/' + 'yahoo',
                       reloaded=True)
예제 #9
0
def general(train, test, keyphrase_train, keyphrase_test, params, save_path, final_explanation=False):
    progress = WorkSplitter()
    table_path = load_yaml('config/global.yml', key='path')['tables']
    df = find_best_hyperparameters(table_path + params['tuning_result_path'], 'NDCG')

    try:
        output_df = load_dataframe_csv(table_path, save_path)
    except:
        output_df = pd.DataFrame(columns=['model', 'rank', 'beta', 'lambda_l2', 'lambda_keyphrase', 'lambda_latent', 'lambda_rating', 'topK', 'learning_rate', 'epoch', 'corruption', 'optimizer'])

    for index, row in df.iterrows():

        algorithm = row['model']
        rank = row['rank']
        beta = row['beta']
        lamb_l2 = row['lambda_l2']
        lamb_keyphrase = row['lambda_keyphrase']
        lamb_latent = row['lambda_latent']
        lamb_rating = row['lambda_rating']
        learning_rate = row['learning_rate']
        epoch = row['epoch']
        corruption = row['corruption']
        optimizer = row['optimizer']

        row['topK'] = [5, 10, 15, 20, 50]
        row['metric'] = ['R-Precision', 'NDCG', 'Clicks', 'Recall', 'Precision', 'MAP']

        format = "model: {}, rank: {}, beta: {}, lambda_l2: {}, lambda_keyphrase: {}, " \
                 "lambda_latent: {}, lambda_rating: {}, learning_rate: {}, " \
                 "epoch: {}, corruption: {}, optimizer: {}"

        progress.section(format.format(algorithm, rank, beta, lamb_l2, lamb_keyphrase, lamb_latent, lamb_rating, learning_rate, epoch, corruption, optimizer))

        progress.subsection("Training")

        model = models[algorithm](matrix_train=train,
                                  epoch=epoch,
                                  lamb_l2=lamb_l2,
                                  lamb_keyphrase=lamb_keyphrase,
                                  lamb_latent=lamb_latent,
                                  lamb_rating=lamb_rating,
                                  beta=beta,
                                  learning_rate=learning_rate,
                                  rank=rank,
                                  corruption=corruption,
                                  optimizer=optimizer,
                                  matrix_train_keyphrase=keyphrase_train)

        progress.subsection("Prediction")

        rating_score, keyphrase_score = model.predict(train.todense())

        progress.subsection("Evaluation")

        if final_explanation:
            prediction = predict_keyphrase(keyphrase_score,
                                           topK=row['topK'][-2])

            result = evaluate_explanation(prediction,
                                          keyphrase_test,
                                          row['metric'],
                                          row['topK'])
        else:
            prediction = predict(rating_score,
                                 topK=row['topK'][-1],
                                 matrix_Train=train)

            result = evaluate(prediction, test, row['metric'], row['topK'])

        result_dict = {'model': algorithm,
                       'rank': rank,
                       'beta': beta,
                       'lambda_l2': lamb_l2,
                       'lambda_keyphrase': lamb_keyphrase,
                       'lambda_latent': lamb_latent,
                       'lambda_rating': lamb_rating,
                       'learning_rate': learning_rate,
                       'epoch': epoch,
                       'corruption': corruption,
                       'optimizer': optimizer}

        for name in result.keys():
            result_dict[name] = [round(result[name][0], 4),
                                 round(result[name][1], 4)]

        output_df = output_df.append(result_dict, ignore_index=True)

        model.sess.close()
        tf.reset_default_graph()

        save_dataframe_csv(output_df, table_path, save_path)

    return output_df
예제 #10
0
def hyper_parameter_tuning(train, validation, keyphrase_train, keyphrase_validation, params, save_path, tune_explanation=False):
    progress = WorkSplitter()
    table_path = load_yaml('config/global.yml', key='path')['tables']

    try:
        df = load_dataframe_csv(table_path, save_path)
    except:
        df = pd.DataFrame(columns=['model', 'rank', 'beta', 'lambda_l2', 'lambda_keyphrase', 'lambda_latent', 'lambda_rating', 'topK', 'learning_rate', 'epoch', 'corruption', 'optimizer'])

    for algorithm in params['models']:

        for rank in params['rank']:

            for beta in params['beta']:

                for lamb_l2 in params['lambda_l2']:

                    for lamb_keyphrase in params['lambda_keyphrase']:

                        for lamb_latent in params['lambda_latent']:

                            for lamb_rating in params['lambda_rating']:

                                for learning_rate in params['learning_rate']:

                                    for epoch in params['epoch']:

                                        for corruption in params['corruption']:

                                            for optimizer in params['optimizer']:

                                                if ((df['model'] == algorithm) &
                                                    (df['rank'] == rank) &
                                                    (df['beta'] == beta) &
                                                    (df['lambda_l2'] == lamb_l2) &
                                                    (df['lambda_keyphrase'] == lamb_keyphrase) &
                                                    (df['lambda_latent'] == lamb_latent) &
                                                    (df['lambda_rating'] == lamb_rating) &
                                                    (df['learning_rate'] == learning_rate) &
                                                    (df['epoch'] == epoch) &
                                                    (df['corruption'] == corruption) &
                                                    (df['optimizer'] == optimizer)).any() or (lamb_latent != lamb_keyphrase):
                                                    continue

                                                format = "model: {}, rank: {}, beta: {}, lambda_l2: {}, " \
                                                    "lambda_keyphrase: {}, lambda_latent: {}, lambda_rating: {}, " \
                                                    "learning_rate: {}, epoch: {}, corruption: {}, optimizer: {}"
                                                progress.section(format.format(algorithm,
                                                                               rank,
                                                                               beta,
                                                                               lamb_l2,
                                                                               lamb_keyphrase,
                                                                               lamb_latent,
                                                                               lamb_rating,
                                                                               learning_rate,
                                                                               epoch,
                                                                               corruption,
                                                                               optimizer))

                                                progress.subsection("Training")

                                                model = models[algorithm](matrix_train=train,
                                                                          epoch=epoch,
                                                                          lamb_l2=lamb_l2,
                                                                          lamb_keyphrase=lamb_keyphrase,
                                                                          lamb_latent=lamb_latent,
                                                                          lamb_rating=lamb_rating,
                                                                          beta=beta,
                                                                          learning_rate=learning_rate,
                                                                          rank=rank,
                                                                          corruption=corruption,
                                                                          optimizer=optimizer,
                                                                          matrix_train_keyphrase=keyphrase_train)

                                                progress.subsection("Prediction")

                                                rating_score, keyphrase_score = model.predict(train.todense())

                                                progress.subsection("Evaluation")

                                                if tune_explanation:
                                                    prediction = predict_keyphrase(keyphrase_score,
                                                                                   topK=params['topK'][-1])

                                                    result = evaluate(prediction,
                                                                      keyphrase_validation,
                                                                      params['metric'],
                                                                      params['topK'])
                                                else:
                                                    prediction = predict(rating_score,
                                                                         topK=params['topK'][-1],
                                                                         matrix_Train=train)

                                                    result = evaluate(prediction,
                                                                      validation,
                                                                      params['metric'],
                                                                      params['topK'])

                                                result_dict = {'model': algorithm,
                                                               'rank': rank,
                                                               'beta': beta,
                                                               'lambda_l2': lamb_l2,
                                                               'lambda_keyphrase': lamb_keyphrase,
                                                               'lambda_latent': lamb_latent,
                                                               'lambda_rating': lamb_rating,
                                                               'learning_rate': learning_rate,
                                                               'epoch': epoch,
                                                               'corruption': corruption,
                                                               'optimizer': optimizer}

                                                for name in result.keys():
                                                    result_dict[name] = [round(result[name][0], 4),
                                                                         round(result[name][1], 4)]

                                                df = df.append(result_dict, ignore_index=True)

                                                model.sess.close()
                                                tf.reset_default_graph()

                                                save_dataframe_csv(df, table_path, save_path)
예제 #11
0
def general(num_users, num_items, user_col, item_col, rating_col,
            keyphrase_vector_col, df_train, df_test, keyphrase_names, params,
            save_path):
    progress = WorkSplitter()
    table_path = load_yaml('config/global.yml', key='path')['tables']
    df = find_best_hyperparameters(table_path + params['tuning_result_path'],
                                   'NDCG')

    try:
        output_df = load_dataframe_csv(table_path, save_path)
    except:
        output_df = pd.DataFrame(columns=[
            'model', 'rank', 'num_layers', 'train_batch_size',
            'predict_batch_size', 'lambda', 'topK', 'learning_rate', 'epoch',
            'negative_sampling_size'
        ])

    for index, row in df.iterrows():

        algorithm = row['model']
        rank = row['rank']
        num_layers = row['num_layers']
        train_batch_size = row['train_batch_size']
        predict_batch_size = row['predict_batch_size']
        lamb = row['lambda']
        learning_rate = row['learning_rate']
        epoch = 300
        negative_sampling_size = row['negative_sampling_size']

        row['topK'] = [5, 10, 15, 20, 50]
        row['metric'] = [
            'R-Precision', 'NDCG', 'Clicks', 'Recall', 'Precision', 'MAP'
        ]

        format = "model: {0}, rank: {1}, num_layers: {2}, train_batch_size: {3}, " \
                 "predict_batch_size: {4}, lambda: {5}, learning_rate: {6}, epoch: {7}, negative_sampling_size: {8}"
        progress.section(
            format.format(algorithm, rank, num_layers, train_batch_size,
                          predict_batch_size, lamb, learning_rate, epoch,
                          negative_sampling_size))

        progress.subsection("Initializing Negative Sampler")

        negative_sampler = Negative_Sampler(
            df_train[[user_col, item_col, keyphrase_vector_col]],
            user_col,
            item_col,
            rating_col,
            keyphrase_vector_col,
            num_items=num_items,
            batch_size=train_batch_size,
            num_keyphrases=len(keyphrase_names),
            negative_sampling_size=negative_sampling_size)

        model = models[algorithm](num_users=num_users,
                                  num_items=num_items,
                                  text_dim=len(keyphrase_names),
                                  embed_dim=rank,
                                  num_layers=num_layers,
                                  negative_sampler=negative_sampler,
                                  lamb=lamb,
                                  learning_rate=learning_rate)

        progress.subsection("Training")

        pretrained_path = load_yaml('config/global.yml',
                                    key='path')['pretrained']
        # try:
        #     model.load_model(pretrained_path+params['tuning_result_path'], row['model'])
        # except:
        model.train_model(df_train,
                          user_col,
                          item_col,
                          rating_col,
                          epoch=epoch)
        # model.save_model(pretrained_path+params['tuning_result_path'], row['model'])

        progress.subsection("Prediction")

        prediction, explanation = predict_elementwise(
            model,
            df_train,
            user_col,
            item_col,
            row['topK'][-1],
            batch_size=row['predict_batch_size'],
            enable_explanation=False,
            keyphrase_names=keyphrase_names)

        R_test = to_sparse_matrix(df_test, num_users, num_items, user_col,
                                  item_col, rating_col)

        result = evaluate(prediction, R_test, row['metric'], row['topK'])

        # Note Finished yet
        result_dict = {
            'model': row['model'],
            'rank': row['rank'],
            'num_layers': row['num_layers'],
            'train_batch_size': row['train_batch_size'],
            'predict_batch_size': row['predict_batch_size'],
            'lambda': row['lambda'],
            'topK': row['topK'][-1],
            'learning_rate': row['learning_rate'],
            'epoch': epoch,
            'negative_sampling_size': row['negative_sampling_size'],
        }

        for name in result.keys():
            result_dict[name] = round(result[name][0], 4)
        output_df = output_df.append(result_dict, ignore_index=True)

        model.sess.close()
        tf.reset_default_graph()

        save_dataframe_csv(output_df, table_path, save_path)

    return output_df