示例#1
0
def test_item_knn():
    ibknn = pyreclab.ItemKnn(dataset='dataset/u1.base',
                             dlmchar=b'\t',
                             header=False,
                             usercol=0,
                             itemcol=1,
                             ratingcol=2)

    ibknn.train(k=100, similarity='pearson')

    pred = ibknn.predict('457', '443')
    assert abs(pred - expected_prediction) < prediction_epsilon

    ranking = ibknn.recommend('457', 5, includeRated=False)
    assert ranking == expected_ranking

    predlist, mae, rmse = ibknn.test(input_file='dataset/u1.test',
                                     dlmchar=b'\t',
                                     header=False,
                                     usercol=0,
                                     itemcol=1,
                                     ratingcol=2,
                                     output_file='predictions.csv')

    assert abs(mae - expected_mae) < mae_epsilon
    assert abs(rmse - expected_rmse) < rmse_epsilon
示例#2
0
def main(k=100):
    ibknn = pyreclab.ItemKnn(dataset='dataset/u1.base',
                             dlmchar=b'\t',
                             header=False,
                             usercol=0,
                             itemcol=1,
                             ratingcol=2)

    print('-> training model')
    start = time.clock()
    ibknn.train(k=k, similarity='pearson')
    end = time.clock()
    print('training time: ' + str(end - start))

    print('-> individual test')
    pred = ibknn.predict('457', '443')
    print('user 457, item 443, prediction ' + str(pred))

    ranking = ibknn.recommend('457', 5, includeRated=False)
    print('recommendation for user 457: ' + str(ranking))

    print('-> prediction test')
    start = time.clock()
    predlist, mae, rmse = ibknn.test(input_file='dataset/u1.test',
                                     dlmchar=b'\t',
                                     header=False,
                                     usercol=0,
                                     itemcol=1,
                                     ratingcol=2,
                                     output_file='predictions.csv')
    end = time.clock()
    print('prediction time: ' + str(end - start))

    print('MAE: ' + str(mae))
    print('RMSE: ' + str(rmse))

    print('-> recommendation test')
    start = time.clock()
    recommendList = ibknn.testrec(input_file='dataset/u1.test',
                                  dlmchar=b'\t',
                                  header=False,
                                  usercol=0,
                                  itemcol=1,
                                  ratingcol=2,
                                  topn=10,
                                  output_file='ranking.json',
                                  includeRated=False)
    end = time.clock()
    print('recommendation time: ' + str(end - start))

    return mae, rmse
示例#3
0
import time
import pyreclab

if __name__ == '__main__':

    ibknn = pyreclab.ItemKnn(dataset='dataset/u1.base',
                             dlmchar=b'\t',
                             header=False,
                             usercol=0,
                             itemcol=1,
                             ratingcol=2)

    print('-> training model')
    start = time.clock()
    ibknn.train(k=100, similarity='pearson')
    end = time.clock()
    print('training time: ' + str(end - start))

    print('-> individual test')
    pred = ibknn.predict('457', '443')
    print('user 457, item 443, prediction ' + str(pred))

    ranking = ibknn.recommend('457', 5, includeRated=False)
    print('recommendation for user 457: ' + str(ranking))

    print('-> prediction test')
    start = time.clock()
    predlist, mae, rmse = ibknn.test(input_file='dataset/u1.test',
                                     dlmchar=b'\t',
                                     header=False,
                                     usercol=0,
prediction_filename = data_url + 'predictionsItemKnn'  #5.csv'
ordenada_filename = data_url + 'ordenadasItemKnn'  #5.csv'

print 'ItemKnn'
for i in [1, 5]:  #range(1, data_chunks + 1):
    f_t = training_filename + str(i) + ".txt"
    f_p = probe_filename + str(i) + ".txt"
    f_pred = prediction_filename + str(i) + ".csv"
    f_ord = ordenada_filename + str(i) + ".csv"

    print "Corriendo experimento ", i, "..."
    print 'Entrenando...'
    obj = pyreclab.ItemKnn(dataset=f_t,
                           dlmchar=b'\t',
                           header=True,
                           usercol=0,
                           itemcol=1,
                           ratingcol=2)
    obj.train(10, 'pearson')
    print 'Prediciendo...'
    #prediction = obj.predict( "630685", "1")
    #ranking = obj.recommend( "630685", 10, True)
    #print prediction
    #print ranking

    predictionList, mae, rmse = obj.test(input_file=f_p,
                                         dlmchar=b'\t',
                                         header=False,
                                         usercol=0,
                                         itemcol=1,
                                         ratingcol=2,