def test_list_app_ids(self): ranking_A = 'acbd' ranking_B = 'aefg' references = convert_ranking.list_app_ids(ranking_A, ranking_B, verbose=True) self.assertEqual(len(references), 7)
def test_convert_ranking_to_vector_of_scores(self): ranking = ["a", "c", "b", "d"] references = convert_ranking.list_app_ids(ranking, []) scores = convert_ranking.convert_ranking_to_vector_of_scores( ranking, references) self.assertEqual(len(scores), len(ranking)) self.assertGreaterEqual(min(scores), 0) self.assertLessEqual(max(scores), len(ranking))
def test_convert_ranking_for_scipy(self): ranking = ["a", "c", "b", "d"] references = convert_ranking.list_app_ids(ranking, []) ranks = convert_ranking.convert_ranking_for_scipy(ranking, references, reverse_order=False) self.assertEqual(len(ranks), len(ranking)) self.assertGreaterEqual(min(ranks), 1) self.assertLessEqual(max(ranks), len(ranking) + 1)
def compute_tau(ranking_A, ranking_B): # Arrays of ranks app_ids = list_app_ids(ranking_A, ranking_B) x = convert_ranking_to_vector_of_ranks(ranking_A, app_ids=app_ids) y = convert_ranking_to_vector_of_ranks(ranking_B, app_ids=app_ids) # NB: we would get the same tau with arrays of scores. tau, p_value = stats.kendalltau(x, y) print('Kendall rank-order correlation coefficient: {:.4f}'.format(tau)) print('p-value to test for non-correlation: {:.4f}'.format(p_value)) return tau, p_value
def compute_rho(ranking_A, ranking_B): # Arrays of ranks app_ids = list_app_ids(ranking_A, ranking_B, verbose=True) x = convert_ranking_to_vector_of_ranks(ranking_A, app_ids=app_ids) y = convert_ranking_to_vector_of_ranks(ranking_B, app_ids=app_ids) # NB: we would get the same rho with arrays of scores. rho, p_value = stats.spearmanr(x, y) print('Spearman rank-order correlation coefficient: {:.4f}'.format(rho)) print('p-value to test for non-correlation: {:.4f}'.format(p_value)) return rho, p_value
def compute_weighted_tau(ranking_A, ranking_B): # Arrays of scores app_ids = list_app_ids(ranking_A, ranking_B) x = convert_ranking_to_vector_of_scores(ranking_A, app_ids=app_ids) y = convert_ranking_to_vector_of_scores(ranking_B, app_ids=app_ids) # NB: it is important NOT to feed arrays of ranks for the weighted tau! # # > Note that if you are computing the weighted on arrays of ranks, rather than of scores (i.e., a larger value # > implies a lower rank) you must negate the ranks, so that elements of higher rank are associated with a larger # > value. # # Reference: http://scipy.github.io/devdocs/generated/scipy.stats.weightedtau.html#scipy.stats.weightedtau weighted_tau, p_value = stats.weightedtau(x, y) print('Weighted Kendall rank-order correlation coefficient: {:.4f}'.format( weighted_tau)) print('p-value to test for non-correlation: {:.4f}'.format(p_value)) return weighted_tau, p_value