Exemplo n.º 1
0
import pandas as pd
import gru4rec
import evaluation

PATH_TO_TRAIN = '/db_vol/hb_work/rnn/data/processed/recsys_challenge_train_full.txt'
PATH_TO_TEST = '/db_vol/hb_work/rnn/data/processed/recsys_challenge_test.txt'

if __name__ == '__main__':
    data = pd.read_csv(PATH_TO_TRAIN, sep='\t', dtype={'ItemId':np.int64})
    valid = pd.read_csv(PATH_TO_TEST, sep='\t', dtype={'ItemId':np.int64})
    
    #State-of-the-art results on RSC15 from "Recurrent Neural Networks with Top-k Gains for Session-based Recommendations" on RSC15 (http://arxiv.org/abs/1706.03847)
    #BPR-max, no embedding (R@20 = 0.7197, M@20 = 0.3157)
    gru = gru4rec.GRU4Rec(loss='bpr-max', final_act='elu-0.5', hidden_act='tanh', layers=[100], adapt='adagrad', n_epochs=10, batch_size=32, dropout_p_embed=0, dropout_p_hidden=0, learning_rate=0.2, momentum=0.3, n_sample=2048, sample_alpha=0, bpreg=1, constrained_embedding=False)
    gru.fit(data)
    res = evaluation.evaluate_gpu(gru, valid)
    print('Recall@20: {}'.format(res[0]))
    print('MRR@20: {}'.format(res[1]))

    #BPR-max, constrained embedding (R@20 = 0.7261, M@20 = 0.3124)
    gru = gru4rec.GRU4Rec(loss='bpr-max', final_act='elu-0.5', hidden_act='tanh', layers=[100], adapt='adagrad', n_epochs=10, batch_size=32, dropout_p_embed=0, dropout_p_hidden=0, learning_rate=0.2, momentum=0.1, n_sample=2048, sample_alpha=0, bpreg=0.5, constrained_embedding=True)
    gru.fit(data)
    res = evaluation.evaluate_gpu(gru, valid)
    print('Recall@20: {}'.format(res[0]))
    print('MRR@20: {}'.format(res[1]))

    #Cross-entropy (R@20 = 0.7180, M@20 = 0.3087)
    gru = gru4rec.GRU4Rec(loss='cross-entropy', final_act='softmax', hidden_act='tanh', layers=[100], adapt='adagrad', n_epochs=10, batch_size=32, dropout_p_embed=0, dropout_p_hidden=0.3, learning_rate=0.1, momentum=0.7, n_sample=2048, sample_alpha=0, bpreg=0, constrained_embedding=False)
    gru.fit(data)
    res = evaluation.evaluate_gpu(gru, valid)
    print('Recall@20: {}'.format(res[0]))
Exemplo n.º 2
0
        print(
            'ERROR. You musn\'t evaluate positive items agains less than 50000 items.'
        )
        sys.exit(1)
    print(
        'WARNING! You set the number of negative test items. You musn\'t evaluate positive items against a subset of all items unless the number of items in your data is too high (i.e. above a few millions) and evaluation takes too much time.'
    )
    supp = data.groupby('ItemId').size()
    supp.sort_values(inplace=True, ascending=False)
    items = supp[:args.test_against_items].index

if args.test is not None:
    for test_file in args.test:
        print('Loading test data...')
        test_data = load_data(test_file, gru)
        for c in args.measure:
            print(
                'Starting evaluation (cut-off={}, using {} mode for tiebreaking)'
                .format(c, args.eval_type))
            t0 = time.time()
            res = evaluation.evaluate_gpu(gru,
                                          test_data,
                                          items,
                                          batch_size=100,
                                          cut_off=c,
                                          mode=args.eval_type)
            t1 = time.time()
            print('Evaluation took {:.2f}s'.format(t1 - t0))
            print('Recall@{}: {:.6f} MRR@{}: {:.6f}'.format(
                c, res[0], c, res[1]))
Exemplo n.º 3
0
    gru = gru4rec.GRU4Rec(loss='top1',
                          final_act='tanh',
                          hidden_act='tanh',
                          layers=[100],
                          batch_size=50,
                          dropout_p_hidden=0.5,
                          learning_rate=0.01,
                          momentum=0.0,
                          time_sort=False,
                          session_key='SessionId',
                          item_key='ItemId',
                          time_key='Timestamp')
    gru.fit(data)
    res = evaluation.evaluate_gpu(gru,
                                  valid,
                                  session_key='SessionId',
                                  item_key='ItemId',
                                  time_key='Timestamp')
    print('Recall@20: {}'.format(res[0]))
    print('MRR@20: {}'.format(res[1]))
    print('-------------------------------------------------------')
    end = time.time()
    print("Run time: %f s" % (end - start))

    # #State-of-the-art results on RSC15 from "Recurrent Neural Networks with Top-k Gains for Session-based Recommendations" on RSC15 (http://arxiv.org/abs/1706.03847)
    # #BPR-max, no embedding (R@20 = 0.7197, M@20 = 0.3157)
    # gru = gru4rec.GRU4Rec(loss='bpr-max', final_act='elu-0.5', hidden_act='tanh', layers=[100], adapt='adagrad', n_epochs=10, batch_size=32, dropout_p_embed=0, dropout_p_hidden=0, learning_rate=0.2, momentum=0.3, n_sample=2048, sample_alpha=0, bpreg=1, constrained_embedding=False)
    # gru.fit(data)
    # res = evaluation.evaluate_gpu(gru, valid)
    # print('Recall@20: {}'.format(res[0]))
    # print('MRR@20: {}'.format(res[1]))
PATH_TO_TRAIN = '../data/rsc15_train_full.txt'
PATH_TO_TEST = '../data/rsc15_test.txt'

if __name__ == '__main__':
    data = pd.read_csv(PATH_TO_TRAIN, sep='\t', dtype={'ItemId': np.int64})
    valid = pd.read_csv(PATH_TO_TEST, sep='\t', dtype={'ItemId': np.int64})

    # State-of-the-art results on RSC15 from "Recurrent Neural Networks with Top-k Gains for Session-based Recommendations" on RSC15 (http://arxiv.org/abs/1706.03847)
    # BPR-max, no embedding (R@20 = 0.7197, M@20 = 0.3157)
    gru = gru4rec.GRU4Rec(loss='bpr-max',
                          final_act='elu-0.5',
                          hidden_act='tanh',
                          layers=[100],
                          adapt='adagrad',
                          n_epochs=10,
                          batch_size=32,
                          dropout_p_embed=0,
                          dropout_p_hidden=0,
                          learning_rate=0.2,
                          momentum=0.1,
                          n_sample=2048,
                          sample_alpha=0,
                          bpreg=0.5,
                          constrained_embedding=True)
    gru.fit(data)
    res = evaluation.evaluate_gpu(gru,
                                  valid,
                                  output_path='predictions_GRU4REC.csv',
                                  mode='standard')
    print('Recall@20: {}'.format(res[0]))
    print('MRR@20: {}'.format(res[1]))
Exemplo n.º 5
0
# PATH_TO_TRAIN = 'D:\\Mala GD projects\\Session based RNN recommendation\\GRU4Rec-master\\GD\\train_fewrows.csv'
PATH_TO_TEST = 'D:\\Mala GD projects\\Session based RNN recommendation\\GRU4Rec-master\\GD\\sessions_test_trimmed.csv'

if __name__ == '__main__':
    #    data = pd.read_csv(PATH_TO_TRAIN, sep='\t', dtype={'ItemId':np.int64})
    #    valid = pd.read_csv(PATH_TO_TEST, sep='\t', dtype={'ItemId':np.int64})
    data = pd.read_csv(PATH_TO_TRAIN, sep=',', dtype={'ItemId': np.int64})
    valid = pd.read_csv(PATH_TO_TEST, sep=',', dtype={'ItemId': np.int64})

    # State-of-the-art results on RSC15 from "Recurrent Neural Networks with Top-k Gains for Session-based Recommendations" on RSC15 (http://arxiv.org/abs/1706.03847)
    # BPR-max, no embedding (R@20 = 0.7197, M@20 = 0.3157)
    gru = gru4rec.GRU4Rec(loss='bpr-max', final_act='elu-0.5', hidden_act='tanh', layers=[100], adapt='adagrad',
                          n_epochs=10, batch_size=32, dropout_p_embed=0, dropout_p_hidden=0, learning_rate=0.2,
                          momentum=0.3, n_sample=2048, sample_alpha=0, bpreg=1, constrained_embedding=False)
    gru.fit(data)
    res = evaluation.evaluate_gpu(gru, valid)
    print('Recall@20: {}'.format(res[0]))
    print('MRR@20: {}'.format(res[1]))

    # BPR-max, constrained embedding (R@20 = 0.7261, M@20 = 0.3124)
    gru = gru4rec.GRU4Rec(loss='bpr-max', final_act='elu-0.5', hidden_act='tanh', layers=[100], adapt='adagrad',
                          n_epochs=10, batch_size=32, dropout_p_embed=0, dropout_p_hidden=0, learning_rate=0.2,
                          momentum=0.1, n_sample=2048, sample_alpha=0, bpreg=0.5, constrained_embedding=True)
    gru.fit(data)
    res = evaluation.evaluate_gpu(gru, valid)
    print('Recall@20: {}'.format(res[0]))
    print('MRR@20: {}'.format(res[1]))

    # Cross-entropy (R@20 = 0.7180, M@20 = 0.3087)
    gru = gru4rec.GRU4Rec(loss='cross-entropy', final_act='softmax', hidden_act='tanh', layers=[100], adapt='adagrad',
                          n_epochs=10, batch_size=32, dropout_p_embed=0, dropout_p_hidden=0.3, learning_rate=0.1,
Exemplo n.º 6
0
    if args.classify:
        print("Starting evaluation phase...")
        classification(test_loader,
                       model,
                       cmc_rank=args.rank,
                       n_classes=args.train_max - 1)
    else:
        if args.restart:
            print("Restarting evaluation from dump...")
        else:
            print("Starting evaluation phase...")
        if args.eval == "gpu":
            evaluate_gpu(train_dataset,
                         test_dataset,
                         model,
                         thresh=args.thresh,
                         cmc_rank=args.rank,
                         restart=args.restart)
        if args.eval == "cpu":
            evaluate(train_dataset,
                     test_dataset,
                     model,
                     thresh=args.thresh,
                     cmc_rank=args.rank,
                     restart=args.restart)
        if args.eval == "vram-opt":
            evaluate_vram_opt(train_dataset,
                              test_dataset,
                              model,
                              thresh=args.thresh,
                              cmc_rank=args.rank,