def test_RANK_Kendall_diff_elems(self): TEST_DECISION = ['class', 'invented', 'baro', 'instru'] GT_DECISION = ['classical', 'instrumental', 'piano', 'baroque'] kendall = KendallTau() kendall.load_ground_truth(self.GT_DECISION) kendall.load_test(self.TEST_DECISION) assert_raises(ValueError, kendall.compute) #Different elements
def __init__(self): super(TestRanking, self).__init__() # Rank-based metrics: KendallTau, SpearmanRho, MeanReciprocalRank, ReciprocalRank self.kendall = KendallTau() self.kendall.load(self.GT_RANKING, self.TEST_RANKING) self.spearman = SpearmanRho() self.spearman.load(self.GT_RANKING, self.TEST_RANKING) self.mrr = MeanReciprocalRank() for elem in self.TEST_DECISION: self.mrr.load(self.GT_DECISION, elem)
def test_RANK_Kendall_load_ground_truth(self): kendall = KendallTau() kendall.load_ground_truth(self.GT_DATA) assert_equal(len(kendall.get_ground_truth()), len(self.GT_DATA))
def test_RANK_Kendall_load_test(self): kendall = KendallTau() kendall.load_test(self.TEST_DATA) assert_equal(len(kendall.get_test()), len(self.TEST_DATA))
def test_RANK_Kendall_compute_floats(self): kendall = KendallTau(self.DATA_PRED) assert_equal(kendall.compute(), 0.888889)
class TestRanking(Test): def __init__(self): super(TestRanking, self).__init__() # Rank-based metrics: KendallTau, SpearmanRho, MeanReciprocalRank, ReciprocalRank self.kendall = KendallTau() self.kendall.load(self.GT_RANKING, self.TEST_RANKING) self.spearman = SpearmanRho() self.spearman.load(self.GT_RANKING, self.TEST_RANKING) self.mrr = MeanReciprocalRank() for elem in self.TEST_DECISION: self.mrr.load(self.GT_DECISION, elem) # TEST_CORR Spearman def test_RANK_Spearman_compute_all(self): assert_equal(self.spearman.compute(), 0.5) #0.55 ? #def test_RANK_Spearman_compute_tied_ranks(): # assert_equal(spearman.compute(tied_ranks=True), 0.5) #In fact, it uses Pearsonr corr. of the ranks def test_RANK_Spearman_compute_floats(self): spearman = SpearmanRho(self.DATA_PRED) assert_equal(spearman.compute(), 0.947368) #0.95 ? #def test_RANK_Spearman_compute_floats_tied_ranks(): # spearman = SpearmanRho(self.DATA_PRED) # assert_equal(spearman.compute(tied_ranks=True), 0.930024) #In fact, it uses Pearsonr corr. of the ranks def test_RANK_Spearman_load_test(self): spearman = SpearmanRho() spearman.load_test(self.TEST_DATA) assert_equal(len(spearman.get_test()), len(self.TEST_DATA)) def test_RANK_Spearman_load_ground_truth(self): spearman = SpearmanRho() spearman.load_ground_truth(self.GT_DATA) assert_equal(len(spearman.get_ground_truth()), len(self.TEST_DATA)) def test_RANK_Spearman_add_entry(self): self.spearman.add(('guitar', 4), ('guitar', 4)) #add tag 'guitar' at rank-4 assert_equal(len(self.spearman.get_test()), len(self.TEST_RANKING)+1) assert_equal(len(self.spearman.get_ground_truth()), len(self.GT_RANKING)+1) assert_equal(self.spearman.compute(), 0.763158) #0.775 ? def test_RANK_Spearman_different_list_sizes(self): TEST_DATA = ['classical', 'invented', 'baroque'] GT_DATA = ['classical', 'instrumental', 'piano', 'baroque'] spearman = SpearmanRho() spearman.load_ground_truth(GT_DATA) spearman.load_test(TEST_DATA) assert_raises(ValueError, spearman.compute) #Raise: GT & TEST list have different sizes # TEST_CORR Kendall def test_RANK_Kendall_compute_all(self): assert_equal(self.kendall.compute(), 0.4) def test_RANK_Kendall_compute_floats(self): kendall = KendallTau(self.DATA_PRED) assert_equal(kendall.compute(), 0.888889) def test_RANK_Kendall_load_test(self): kendall = KendallTau() kendall.load_test(self.TEST_DATA) assert_equal(len(kendall.get_test()), len(self.TEST_DATA)) def test_RANK_Kendall_load_ground_truth(self): kendall = KendallTau() kendall.load_ground_truth(self.GT_DATA) assert_equal(len(kendall.get_ground_truth()), len(self.GT_DATA)) def test_RANK_Kendall_add_entry(self): self.kendall.add(('guitar', 4.0), ('guitar', 4.0)) #add tag 'guitar' assert_equal(len(self.kendall.get_test()), len(self.TEST_RANKING)+1) assert_equal(len(self.kendall.get_ground_truth()), len(self.GT_RANKING)+1) assert_equal(self.kendall.compute(), 0.666667) def test_RANK_Kendall_diff_elems(self): TEST_DECISION = ['class', 'invented', 'baro', 'instru'] GT_DECISION = ['classical', 'instrumental', 'piano', 'baroque'] kendall = KendallTau() kendall.load_ground_truth(self.GT_DECISION) kendall.load_test(self.TEST_DECISION) assert_raises(ValueError, kendall.compute) #Different elements # TEST_RANK ReciprocalRank def test_RANK_ReciprocalRank_compute(self): rr = ReciprocalRank() QUERY = 'instrumental' assert_equal(rr.compute(self.GT_DECISION, QUERY), 0.5) def test_RANK_ReciprocalRank_add_entry(self): rr= ReciprocalRank() QUERY = 'invented' rr.load(self.GT_DECISION, QUERY) assert_equal(rr.compute(), 0.0) # TEST_RANK MeanReciprocalRank # Internally, MeanReciprocalRank uses a list of ReciprocalRank results def test_RANK_MeanReciprocalRank_compute_all(self): assert_equal(self.mrr.compute(), 0.4375) def test_RANK_MeanReciprocalRank_compute_one(self): mrr = MeanReciprocalRank() QUERY = 'instrumental' assert_equal(mrr.compute(self.GT_DECISION, QUERY), 0.5) def test_RANK_MeanReciprocalRank_load(self): mrr = MeanReciprocalRank() assert_raises(ValueError, mrr.load, self.GT_DECISION, self.TEST_RANKING) def test_RANK_MeanReciprocalRank_load_test(self): mrr = MeanReciprocalRank() assert_raises(NotImplementedError, mrr.load_test, self.TEST_RANKING) def test_RANK_MeanReciprocalRank_load_ground_truth(self): mrr = MeanReciprocalRank() assert_raises(NotImplementedError, mrr.load_ground_truth, self.GT_RANKING) def test_RANK_MeanReciprocalRank_add_entry(self): mrr = MeanReciprocalRank() QUERY = 'invented' mrr.load(self.GT_DECISION, QUERY) assert_equal(mrr.compute(), 0.0) #mAP tests def test_RANK_AveragePrecision(self): GT_DECISION = [1, 2, 4] TEST_DECISION = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] avgp = AveragePrecision() avgp.load(GT_DECISION, TEST_DECISION) assert_equal(round(avgp.compute(), 4), 0.9167) GT_DECISION = [1, 4, 8] avgp = AveragePrecision() avgp.load(GT_DECISION, TEST_DECISION) assert_equal(round(avgp.compute(), 4), 0.625) GT_DECISION = [3, 5, 9, 25, 39, 44, 56, 71, 89, 123] TEST_DECISION = [123, 84, 56, 6, 8, 9, 511, 129, 187, 25, 38, 48, 250, 113, 3] avgp = AveragePrecision() avgp.load(GT_DECISION, TEST_DECISION) assert_equal(avgp.compute(), 0.58) #mAP tests def test_RANK_MeanAveragePrecision(self): mavgp = MeanAveragePrecision() GT_DECISION = [1, 2, 4] TEST_DECISION = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] mavgp.load(GT_DECISION, TEST_DECISION) GT_DECISION = [1, 4, 8] mavgp.load(GT_DECISION, TEST_DECISION) GT_DECISION = [3, 5, 9, 25, 39, 44, 56, 71, 89, 123] TEST_DECISION = [123, 84, 56, 6, 8, 9, 511, 129, 187, 25, 38, 48, 250, 113, 3] mavgp.load(GT_DECISION, TEST_DECISION) assert_equal(mavgp.compute(), 0.707222)
class TestRanking(Test): def __init__(self): super(TestRanking, self).__init__() # Rank-based metrics: KendallTau, SpearmanRho, MeanReciprocalRank, ReciprocalRank self.kendall = KendallTau() self.kendall.load(self.GT_RANKING, self.TEST_RANKING) self.spearman = SpearmanRho() self.spearman.load(self.GT_RANKING, self.TEST_RANKING) self.mrr = MeanReciprocalRank() for elem in self.TEST_DECISION: self.mrr.load(self.GT_DECISION, elem) # TEST_CORR Spearman def test_RANK_Spearman_compute_all(self): assert_equal(self.spearman.compute(), 0.5) #0.55 ? #def test_RANK_Spearman_compute_tied_ranks(): # assert_equal(spearman.compute(tied_ranks=True), 0.5) #In fact, it uses Pearsonr corr. of the ranks def test_RANK_Spearman_compute_floats(self): spearman = SpearmanRho(self.DATA_PRED) assert_equal(spearman.compute(), 0.947368) #0.95 ? #def test_RANK_Spearman_compute_floats_tied_ranks(): # spearman = SpearmanRho(self.DATA_PRED) # assert_equal(spearman.compute(tied_ranks=True), 0.930024) #In fact, it uses Pearsonr corr. of the ranks def test_RANK_Spearman_load_test(self): spearman = SpearmanRho() spearman.load_test(self.TEST_DATA) assert_equal(len(spearman.get_test()), len(self.TEST_DATA)) def test_RANK_Spearman_load_ground_truth(self): spearman = SpearmanRho() spearman.load_ground_truth(self.GT_DATA) assert_equal(len(spearman.get_ground_truth()), len(self.TEST_DATA)) def test_RANK_Spearman_add_entry(self): self.spearman.add(('guitar', 4), ('guitar', 4)) #add tag 'guitar' at rank-4 assert_equal(len(self.spearman.get_test()), len(self.TEST_RANKING) + 1) assert_equal(len(self.spearman.get_ground_truth()), len(self.GT_RANKING) + 1) assert_equal(self.spearman.compute(), 0.763158) #0.775 ? def test_RANK_Spearman_different_list_sizes(self): TEST_DATA = ['classical', 'invented', 'baroque'] GT_DATA = ['classical', 'instrumental', 'piano', 'baroque'] spearman = SpearmanRho() spearman.load_ground_truth(GT_DATA) spearman.load_test(TEST_DATA) assert_raises( ValueError, spearman.compute) #Raise: GT & TEST list have different sizes # TEST_CORR Kendall def test_RANK_Kendall_compute_all(self): assert_equal(self.kendall.compute(), 0.4) def test_RANK_Kendall_compute_floats(self): kendall = KendallTau(self.DATA_PRED) assert_equal(kendall.compute(), 0.888889) def test_RANK_Kendall_load_test(self): kendall = KendallTau() kendall.load_test(self.TEST_DATA) assert_equal(len(kendall.get_test()), len(self.TEST_DATA)) def test_RANK_Kendall_load_ground_truth(self): kendall = KendallTau() kendall.load_ground_truth(self.GT_DATA) assert_equal(len(kendall.get_ground_truth()), len(self.GT_DATA)) def test_RANK_Kendall_add_entry(self): self.kendall.add(('guitar', 4.0), ('guitar', 4.0)) #add tag 'guitar' assert_equal(len(self.kendall.get_test()), len(self.TEST_RANKING) + 1) assert_equal(len(self.kendall.get_ground_truth()), len(self.GT_RANKING) + 1) assert_equal(self.kendall.compute(), 0.666667) def test_RANK_Kendall_diff_elems(self): TEST_DECISION = ['class', 'invented', 'baro', 'instru'] GT_DECISION = ['classical', 'instrumental', 'piano', 'baroque'] kendall = KendallTau() kendall.load_ground_truth(self.GT_DECISION) kendall.load_test(self.TEST_DECISION) assert_raises(ValueError, kendall.compute) #Different elements # TEST_RANK ReciprocalRank def test_RANK_ReciprocalRank_compute(self): rr = ReciprocalRank() QUERY = 'instrumental' assert_equal(rr.compute(self.GT_DECISION, QUERY), 0.5) def test_RANK_ReciprocalRank_add_entry(self): rr = ReciprocalRank() QUERY = 'invented' rr.load(self.GT_DECISION, QUERY) assert_equal(rr.compute(), 0.0) # TEST_RANK MeanReciprocalRank # Internally, MeanReciprocalRank uses a list of ReciprocalRank results def test_RANK_MeanReciprocalRank_compute_all(self): assert_equal(self.mrr.compute(), 0.4375) def test_RANK_MeanReciprocalRank_compute_one(self): mrr = MeanReciprocalRank() QUERY = 'instrumental' assert_equal(mrr.compute(self.GT_DECISION, QUERY), 0.5) def test_RANK_MeanReciprocalRank_load(self): mrr = MeanReciprocalRank() assert_raises(ValueError, mrr.load, self.GT_DECISION, self.TEST_RANKING) def test_RANK_MeanReciprocalRank_load_test(self): mrr = MeanReciprocalRank() assert_raises(NotImplementedError, mrr.load_test, self.TEST_RANKING) def test_RANK_MeanReciprocalRank_load_ground_truth(self): mrr = MeanReciprocalRank() assert_raises(NotImplementedError, mrr.load_ground_truth, self.GT_RANKING) def test_RANK_MeanReciprocalRank_add_entry(self): mrr = MeanReciprocalRank() QUERY = 'invented' mrr.load(self.GT_DECISION, QUERY) assert_equal(mrr.compute(), 0.0) #mAP tests def test_RANK_AveragePrecision(self): GT_DECISION = [1, 2, 4] TEST_DECISION = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] avgp = AveragePrecision() avgp.load(GT_DECISION, TEST_DECISION) assert_equal(round(avgp.compute(), 4), 0.9167) GT_DECISION = [1, 4, 8] avgp = AveragePrecision() avgp.load(GT_DECISION, TEST_DECISION) assert_equal(round(avgp.compute(), 4), 0.625) GT_DECISION = [3, 5, 9, 25, 39, 44, 56, 71, 89, 123] TEST_DECISION = [ 123, 84, 56, 6, 8, 9, 511, 129, 187, 25, 38, 48, 250, 113, 3 ] avgp = AveragePrecision() avgp.load(GT_DECISION, TEST_DECISION) assert_equal(avgp.compute(), 0.58) #mAP tests def test_RANK_MeanAveragePrecision(self): mavgp = MeanAveragePrecision() GT_DECISION = [1, 2, 4] TEST_DECISION = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] mavgp.load(GT_DECISION, TEST_DECISION) GT_DECISION = [1, 4, 8] mavgp.load(GT_DECISION, TEST_DECISION) GT_DECISION = [3, 5, 9, 25, 39, 44, 56, 71, 89, 123] TEST_DECISION = [ 123, 84, 56, 6, 8, 9, 511, 129, 187, 25, 38, 48, 250, 113, 3 ] mavgp.load(GT_DECISION, TEST_DECISION) assert_equal(mavgp.compute(), 0.707222)
MAX_RATING = 5.0 ITEMID = 1 USERID = 1 print svd.predict(ITEMID, USERID, MIN_RATING, MAX_RATING) # predicted rating value print svd.get_matrix().value(ITEMID, USERID) # real rating value print '' print 'GENERATING RECOMMENDATION' print svd.recommend(USERID, n=5, only_unknowns=True, is_row=False) #Evaluation using prediction-based metrics rmse = RMSE() mae = MAE() spearman = SpearmanRho() kendall = KendallTau() #decision = PrecisionRecallF1() for rating, item_id, user_id in test.get(): try: pred_rating = svd.predict(item_id, user_id) rmse.add(rating, pred_rating) mae.add(rating, pred_rating) spearman.add(rating, pred_rating) kendall.add(rating, pred_rating) except KeyError: continue print '' print 'EVALUATION RESULT' print 'RMSE=%s' % rmse.compute() print 'MAE=%s' % mae.compute()