Exemplo n.º 1
0
    def runGRU4Rec(self):   
        import gru4rec
        session_key = "Sid" #"Aid" # Or Sid
        time_key = "TimeStamp"
        item_key = "QueryName" #"QueryName"

        data = self.dataManager.loadData([time_key,item_key,session_key,"Aid","Sid"],removeFirstK=5) 

        train, test = self.dataManager.splitData(data,isRandom=False)

        print('Training GRU4Rec') 
        
        batch_size = 200
        for layers in [100,1000]:
            for loss_type in ['top1']:
                for momentum in [3,4,5]:
                    for dropOut in [3,4,5]:
                        try:
                            print('Batch Size: ' + str(batch_size) + ' Dropout: ' + str(float(dropOut)/10.0) + ' Momentum: ' + str(float(momentum)/10.0))
                            gru = gru4rec.GRU4Rec(layers=[layers], loss=loss_type, batch_size=batch_size, dropout_p_hidden=float(dropOut)/10.0, learning_rate=0.05, momentum=float(momentum)/10.0
                                                    ,n_epochs=3,hidden_act = 'tanh', final_act='tanh'
                                                    ,session_key=session_key, item_key=item_key, time_key=time_key)
                            gru.fit(train)
        
                            res = gru.evaluate_sessions_batch(test, cut_off=2, batch_size=batch_size, 
                                                        session_key=session_key, item_key=item_key, time_key=time_key)

                            print('Recall : {}'.format(res[0]))
                            
                        except:
                            print("Unexpected error")
Exemplo n.º 2
0
    def runGRU4RecForSpecificAid(self,numOfAids=10):   
    
        import gru4rec
        session_key = "Sid" #"Aid" # Or Sid
        time_key = "TimeStamp"
        item_key = "QueryName"

        data = self.dataManager.loadData([time_key,item_key,"Aid","Sid"],removeFirstK=5,onlyFirstFile=True) 

        result = data.groupby('Aid').apply(
             lambda group: (group.Sid.nunique())
         )

        result.sort_values(inplace=True,ascending=False)
        
        for selectedAid in result.keys()[2:(numOfAids+2)]:
            idx = self.dataManager.fields.index("Aid");
            ret = self.dataManager.mcle.all_encoders_[idx].inverse_transform(selectedAid);
            print(str(ret))    
            aidData = data.loc[data['Aid'] == selectedAid]

            train, test = self.dataManager.splitData(aidData,isRandom=False)

            print('Training GRU4Rec')    
            
            batch_size = 5
            momentum = 0.4
            dropOut = 0.5

            print('Batch Size: ' + str(batch_size) + ' Dropout: ' + str(float(dropOut)) + ' Momentum: ' + str(float(momentum)))
            gru = gru4rec.GRU4Rec(layers=[1000], loss='top1', batch_size=batch_size, dropout_p_hidden=float(dropOut), learning_rate=0.05, momentum=float(momentum)
                                    ,n_epochs=10,hidden_act = 'tanh', final_act='tanh'
                                    ,session_key=session_key, item_key=item_key, time_key=time_key)
            gru.fit(train)
                            
            test.is_copy = False
            test.sort_values([time_key,session_key], inplace=True) # Sort by time_key first and then by session_key
 
            specificSession = test[test[session_key] == test[session_key].values[0]]
            preds = gru.predict_next_batch(specificSession[session_key], specificSession[item_key], None, len(specificSession))
                    
       
            print('Correct: {}'.format(correct))
            if(len(test)-batch_size-1 > 0):
                print('Accuracy: {}'.format(float(correct)/float(len(test)-batch_size-1)))
Exemplo n.º 3
0
    def learnFromExperiencedGRU(self):   
        import gru4rec
        session_key = "Sid" #"Aid" # Or Sid
        time_key = "TimeStamp"
        item_key = "QueryName" #"QueryName"

        data = self.dataManager.loadData([time_key,item_key,session_key,"Aid","Sid"],removeFirstK=5) 

                
        result = data.groupby('Aid').apply(
             lambda group: (group.Sid.nunique() <= 3) 
         )
        resultBelow = result[result == True]
        resultAbove = result[result == False]

        dataBelow = data[data.Aid.isin(resultBelow.index)]        
        dataAbove = data[data.Aid.isin(resultAbove.index)]

        train, test = self.dataManager.splitData(dataAbove,isRandom=False)
        testBelow = dataBelow[:len(test)]

        print('Training GRU4Rec') 
        
        batch_size = 200
        for layers in [1000]:
            for loss_type in ['top1']:
                for momentum in [3,4,5]:
                    for dropOut in [3]:
                        try:
                            print('Batch Size: ' + str(batch_size) + ' Dropout: ' + str(float(dropOut)/10.0) + ' Momentum: ' + str(float(momentum)/10.0))
                            gru = gru4rec.GRU4Rec(layers=[1000], loss=loss_type, batch_size=batch_size, dropout_p_hidden=float(dropOut)/10.0, learning_rate=0.05, momentum=float(momentum)/10.0
                                                    ,n_epochs=3,hidden_act = 'tanh', final_act='tanh'
                                                    ,session_key=session_key, item_key=item_key, time_key=time_key)
                            gru.fit(train)
        
                            res = gru.evaluate_sessions_batch(test, cut_off=2, batch_size=batch_size, 
                                                        session_key=session_key, item_key=item_key, time_key=time_key)

                            print('Above Test Recall : {}'.format(res[0]))
                            
                            res = gru.evaluate_sessions_batch(testBelow, cut_off=2, batch_size=batch_size, 
                                                        session_key=session_key, item_key=item_key, time_key=time_key)
                            print('Below Recall : {}'.format(res[0]))
                        except:
                            print("Unexpected error")
Exemplo n.º 4
0
    valid = pd.read_csv(PATH_TO_TEST, sep='\t', dtype={'ItemId': np.int64})

    print(data.head())
    print(valid.head())

    # OUTDATED!!!
    # Reproducing results from the original paperr"Session-based Recommendations with Recurrent Neural Networks" on RSC15 (http://arxiv.org/abs/1511.06939)
    print('Training GRU4Rec with 100 hidden units')
    start = time.time()

    gru = gru4rec.GRU4Rec(loss='top1',
                          final_act='tanh',
                          hidden_act='tanh',
                          layers=[100],
                          batch_size=50,
                          dropout_p_hidden=0.5,
                          learning_rate=0.01,
                          momentum=0.0,
                          time_sort=False,
                          session_key='SessionId',
                          item_key='ItemId',
                          time_key='Timestamp')
    gru.fit(data)
    res = evaluation.evaluate_gpu(gru,
                                  valid,
                                  session_key='SessionId',
                                  item_key='ItemId',
                                  time_key='Timestamp')
    print('Recall@20: {}'.format(res[0]))
    print('MRR@20: {}'.format(res[1]))
    print('-------------------------------------------------------')
    end = time.time()
PATH_TO_TRAIN = '../data/rsc15_train_full.txt'
PATH_TO_TEST = '../data/rsc15_test.txt'

if __name__ == '__main__':
    data = pd.read_csv(PATH_TO_TRAIN, sep='\t', dtype={'ItemId': np.int64})
    valid = pd.read_csv(PATH_TO_TEST, sep='\t', dtype={'ItemId': np.int64})

    # State-of-the-art results on RSC15 from "Recurrent Neural Networks with Top-k Gains for Session-based Recommendations" on RSC15 (http://arxiv.org/abs/1706.03847)
    # BPR-max, no embedding (R@20 = 0.7197, M@20 = 0.3157)
    gru = gru4rec.GRU4Rec(loss='bpr-max',
                          final_act='elu-0.5',
                          hidden_act='tanh',
                          layers=[100],
                          adapt='adagrad',
                          n_epochs=10,
                          batch_size=32,
                          dropout_p_embed=0,
                          dropout_p_hidden=0,
                          learning_rate=0.2,
                          momentum=0.1,
                          n_sample=2048,
                          sample_alpha=0,
                          bpreg=0.5,
                          constrained_embedding=True)
    gru.fit(data)
    res = evaluation.evaluate_gpu(gru,
                                  valid,
                                  output_path='predictions_GRU4REC.csv',
                                  mode='standard')
    print('Recall@20: {}'.format(res[0]))
    print('MRR@20: {}'.format(res[1]))
Exemplo n.º 6
0
    valid = pd.read_csv(PATH_TO_TEST, sep='\t', dtype={'ItemId': np.int64})

    #Reproducing results from "Session-based Recommendations with Recurrent Neural Networks" on RSC15 (http://arxiv.org/abs/1511.06939)

    #adapt : None, 'adagrad', 'rmsprop', 'adam', 'adadelta'
    logger.info(
        'rnn_size:%3d  batch_size:%s  hidden_act:%s  dropout_p_hidden:%s  final_act:%s  optimizer:%s  learning_rate:%s',
        FLAGS.rnn_size, FLAGS.batch_size, FLAGS.hidden_act,
        FLAGS.dropout_p_hidden, FLAGS.final_act, FLAGS.optimizer,
        FLAGS.learning_rate)
    gru = gru4rec.GRU4Rec(loss=FLAGS.loss,
                          final_act=FLAGS.final_act,
                          hidden_act=FLAGS.hidden_act,
                          layers=[FLAGS.rnn_size],
                          batch_size=FLAGS.batch_size,
                          dropout_p_hidden=FLAGS.dropout_p_hidden,
                          learning_rate=FLAGS.learning_rate,
                          momentum=0.0,
                          time_sort=False,
                          n_epochs=FLAGS.n_epochs,
                          adapt=FLAGS.optimizer)
    gru.fit(data, valid)

    #Reproducing results from "Recurrent Neural Networks with Top-k Gains for Session-based Recommendations" on RSC15 (http://arxiv.org/abs/1706.03847)

    #print('Training GRU4Rec with 100 hidden units')

    #gru = gru4rec.GRU4Rec(loss='bpr-max-0.5', final_act='linear', hidden_act='tanh', layers=[100], batch_size=32, dropout_p_hidden=0.0, learning_rate=0.2, momentum=0.5, n_sample=2048, sample_alpha=0, time_sort=True)
    #gru.fit(data)
    #
    #res = evaluation.evaluate_sessions_batch(gru, valid, None)
Exemplo n.º 7
0
PATH_TO_TEST = '../report_labels_test_openoffice'

if __name__ == '__main__':
    data = pd.read_csv(PATH_TO_TRAIN, sep='\t')
    valid = pd.read_csv(PATH_TO_TEST, sep='\t')

    #Reproducing results from "Session-based Recommendations with Recurrent Neural Networks" on RSC15 (http://arxiv.org/abs/1511.06939)

    #print('Training GRU4Rec with 100 hidden units')
    top = np.array([1, 5, 10, 15, 20])
    #top=11
    gru = gru4rec.GRU4Rec(n_epochs=10,
                          loss='top1',
                          final_act='tanh',
                          hidden_act='relu',
                          layers=[512],
                          batch_size=32,
                          dropout_p_hidden=0.5,
                          learning_rate=0.01,
                          momentum=0.0,
                          time_sort=False)
    gru.fit(data)
    #cf=baselines.ItemKNN()
    #cf.fit(data)
    #mf=baselines.BPR()
    #mf.fit(data)
    #acc_gru=[]
    #acc_cf=[]
    #acc_mf=[]
    mrr_gru = []
    #mrr_cf=[]
    #mrr_mf=[]
Exemplo n.º 8
0
EPOCHS = 1
BATCH_SIZE = 50

if __name__ == '__main__':
    data = pd.read_csv(PATH_TO_TRAIN, sep='\t', dtype={'ItemId': np.int64})
    valid = pd.read_csv(PATH_TO_TEST, sep='\t', dtype={'ItemId': np.int64})

    print('Training GRU4Rec with ' + str(LAYERS) + ' hidden units and ' +
          str(EPOCHS) + ' epochs')

    #Reproducing results from "Session-based Recommendations with Recurrent Neural Networks" on RSC15 (http://arxiv.org/abs/1511.06939)
    gru = gru4rec.GRU4Rec(loss='top1',
                          final_act='tanh',
                          hidden_act='tanh',
                          layers=[LAYERS],
                          batch_size=BATCH_SIZE,
                          dropout_p_hidden=0.5,
                          learning_rate=0.01,
                          momentum=0.0,
                          n_epochs=EPOCHS,
                          time_sort=False)
    gru.fit(data)

    res = evaluation.evaluate_sessions_batch(gru, valid, None)
    print('Recall@20: {}'.format(res[0]))
    print('MRR@20: {}'.format(res[1]))

    #Reproducing results from "Recurrent Neural Networks with Top-k Gains for Session-based Recommendations" on RSC15 (http://arxiv.org/abs/1706.03847)
    # gru = gru4rec.GRU4Rec(loss='bpr-max-0.5',
    #     final_act='linear',
    #     hidden_act='tanh',
    #     layers=[LAYERS],
Exemplo n.º 9
0
"""

import sys
sys.path.append('../..')

import numpy as np
import pandas as pd
import gru4rec
import evaluation

PATH_TO_TRAIN = '/path/to/rsc15_train_full.txt'
PATH_TO_TEST = '/path/to/rsc15_test.txt'

if __name__ == '__main__':
    data = pd.read_csv(PATH_TO_TRAIN, sep='\t', dtype={'ItemId': np.int64})
    valid = pd.read_csv(PATH_TO_TEST, sep='\t', dtype={'ItemId': np.int64})

    print('Training GRU4Rec with 100 hidden units')

    gru = gru4rec.GRU4Rec(layers=[100],
                          loss='top1',
                          batch_size=50,
                          dropout_p_hidden=0.5,
                          learning_rate=0.01,
                          momentum=0.0)
    gru.fit(data)

    res = evaluation.evaluate_sessions_batch(gru, valid, None)
    print('Recall@20: {}'.format(res[0]))
    print('MRR@20: {}'.format(res[1]))
Exemplo n.º 10
0
EPOCHS = 1
BATCH_SIZE = 50

if __name__ == '__main__':
    data = pd.read_csv(PATH_TO_TRAIN, sep='\t', dtype={'ItemId': np.int64})
    valid = pd.read_csv(PATH_TO_TEST, sep='\t', dtype={'ItemId': np.int64})

    print('Training GRU4Rec with ' + str(LAYERS) + ' hidden units and ' +
          str(EPOCHS) + ' epochs')

    #Reproducing results from "Session-based Recommendations with Recurrent Neural Networks" on RSC15 (http://arxiv.org/abs/1511.06939)
    gru = gru4rec.GRU4Rec(loss='top1',
                          final_act='tanh',
                          hidden_act='tanh',
                          layers=[LAYERS],
                          batch_size=BATCH_SIZE,
                          dropout_p_hidden=0.5,
                          learning_rate=0.01,
                          momentum=0.0,
                          n_epochs=EPOCHS,
                          time_sort=False)
    gru.fit(data)

    res = evaluation.evaluate_sessions_batch(gru, valid, None)
    print('Recall@20: {}'.format(res[0]))
    print('MRR@20: {}'.format(res[1]))

    #Reproducing results from "Recurrent Neural Networks with Top-k Gains for Session-based Recommendations" on RSC15 (http://arxiv.org/abs/1706.03847)
    gru = gru4rec.GRU4Rec(loss='bpr-max-0.5',
                          final_act='linear',
                          hidden_act='tanh',
                          layers=[LAYERS],
Exemplo n.º 11
0
PATH_TO_TRAIN = 'reddit_train_full.txt'
PATH_TO_TEST = 'reddit_test.txt'

if __name__ == '__main__':
    data = pd.read_csv(PATH_TO_TRAIN, sep='\t', dtype={'ItemId': np.int64})
    valid = pd.read_csv(PATH_TO_TEST, sep='\t', dtype={'ItemId': np.int64})

    #Reproducing results from "Session-based Recommendations with Recurrent Neural Networks" on RSC15 (http://arxiv.org/abs/1511.06939)
    for x in tqdm(range(10, 100)):
        print('Training GRU4Rec with ' + str(x) + ' hidden units')
        for y in range(1, 50):
            gru = gru4rec.GRU4Rec(loss='top1',
                                  final_act='tanh',
                                  hidden_act='tanh',
                                  layers=[x],
                                  batch_size=y,
                                  dropout_p_hidden=0.5,
                                  learning_rate=0.01,
                                  momentum=0.0,
                                  time_sort=False)
            gru.fit(data)
            print('---------------------')
            print()
            print('Training params')
            print('Layers: ' + str(x))
            print('Batch size: ' + str(y))
            print('Final act: tanh')
            print('Hidden act: tanh')
            print('Learning rate: 0.01')
            print('dropout p: 0.5')
            print('Time sort: False')
Exemplo n.º 12
0
    '''    
    print('Training GRU4Rec with 100 hidden units')    
    
    gru = gru4rec.GRU4Rec(loss='top1', final_act='tanh', hidden_act='tanh', layers=[100], batch_size=50, dropout_p_hidden=0.5, learning_rate=0.01, momentum=0.0, time_sort=False)
    gru.fit(data)
    
    res = evaluation.evaluate_sessions_batch(gru, valid, None)
    print('Recall@20: {}'.format(res[0]))
    print('MRR@20: {}'.format(res[1]))
    
    '''
    #Reproducing results from "Recurrent Neural Networks with Top-k Gains for Session-based Recommendations" on RSC15 (http://arxiv.org/abs/1706.03847)
    
    print('Training GRU4Rec with 100 hidden units')
    start_time = time.time()
    gru = gru4rec.GRU4Rec(loss='bpr', final_act='linear', hidden_act='tanh', layers=[256], batch_size=200, embedding = length, dropout_p_hidden=0.2, n_sample=10, learning_rate=0.001, momentum=0.1, sample_alpha=0, time_sort=True, n_epochs=10, train_random_order=True)
    gru.fit(data, ItemEmbedding)
    ItemFile = 'item_embedding'
    if len(sys.argv)>2:
	ItemFile = sys.argv[2]
    gru.save_ItemEmbedding(data, ItemFile)#'item.embedding')

    UserFile = 'user.embedding'
    if len(sys.argv)>3:
	UserFile = sys.argv[3]
    evaluation.evaluate_sessions_batch(gru, valid, None, SaveUserFile = UserFile)#'user.embedding')
    end_time = time.time()
    print start_time, end_time
    print (start_time - end_time)
    #print('Recall@20: {}'.format(res[0]))
    #print('MRR@20: {}'.format(res[1]))
Exemplo n.º 13
0
PATH_TO_TRAIN = ''
PATH_TO_TEST = ''

if __name__ == '__main__':
    data = pd.read_csv(PATH_TO_TRAIN, sep='\t', dtype={'ItemId': np.int64})
    valid = pd.read_csv(PATH_TO_TEST, sep='\t', dtype={'ItemId': np.int64})

    ##cross-entropy
    gru = gru4rec.GRU4Rec(loss='cross-entropy',
                          final_act='softmax',
                          hidden_act='tanh',
                          layers=[100],
                          adapt='adagrad',
                          n_epochs=10,
                          batch_size=32,
                          dropout_p_embed=0,
                          dropout_p_hidden=0.3,
                          learning_rate=0.1,
                          momentum=0.7,
                          n_sample=2048,
                          sample_alpha=0,
                          bpreg=0,
                          constrained_embedding=False)
    gru.fit(data)
    res = evaluation.evaluate_sessions_batch(gru, valid, cut_off=5)
    print('cross-entropy-sample0')
    print('HR@5: {}'.format(res[0]))
    print('MRR@5: {}'.format(res[1]))
    print('MAP@5: {}'.format(res[2]))
    print('NDCG@5: {}'.format(res[3]))
    print('PRECISION@5: {}'.format(res[4]))