def test_get_ER(self): test_dict = {1: [("foo", 0.3), ("bar", 0.7)], 2: [("baz", 0.8), ("bah", 0.2)]} ans_dict = {1: [], 2: ["all", "work", "baz"]} # key's ans_dict value is [] self.assertAlmostEqual(utils.get_ER([1, 2], test_dict, ans_dict), 1.0 / 6) # query_list is empty self.assertAlmostEqual(utils.get_ER([], test_dict, ans_dict), 0)
def calc_stats(test_label, results_file, human_suggestions_file, stats_file): """Calc stats and write to results_file. Arguments: - `query_list`: - `suggestion_dict`: """ query_list, suggestion_dict = get_output_from_file(test_label, results_file) human_suggestion_dict = get_human_suggestions(test_label, human_suggestions_file) dummy_spell_checker = spell_checker.SpellChecker() args = [query_list, suggestion_dict, human_suggestion_dict] EP = utils.get_EP(*args) ER = utils.get_ER(*args) EF1 = utils.get_HM(EP, ER) stats = [EP, ER, EF1] # print 'stats', stats f = open(stats_file, 'a') stats_str = 'Timestamp: {0}\tLabel: {1}\tEP: {3}\tER: {4}\t \ EF1: {5}\tNum queries: {2}\n'.format( str(datetime.now()), test_label, len(query_list), *stats) print 'stats_str', stats_str f.write(stats_str) f.close()
def get_EF1_measure(self, human_suggestion_dict): """Return EF1 value for the performance as judged by human_suggestion_dict. Arguments: - `human_suggestion_dict`: dict of query -> list of human-annotated suggestions """ args = [self.query_list, self.suggestion_dict, human_suggestion_dict] return utils.get_HM (utils.get_EP(*args), utils.get_ER(*args))
def get_all_stats(self, human_suggestion_dict, query_list = None, suggestion_dict = None): """Return [EP, ER, EF1] for performance as judged by human_suggestion_dict. Arguments: - `human_suggestion_dict`: """ if query_list == None: query_list = self.query_list if suggestion_dict == None: suggestion_dict = self.suggestion_dict args = [self.query_list, self.suggestion_dict, human_suggestion_dict] return [utils.get_EP(*args), utils.get_ER(*args), self.get_EF1_measure(human_suggestion_dict)]
def test_get_ER(self): test_dict = {1: [("foo", 0.3), ("bar", 0.7)], 2: [("baz", 0.8), ("bah", 0.2)]} ans_dict = {1: ["bar", "jack"], 2: ["all", "work", "baz"]} self.assertAlmostEqual(utils.get_ER([1, 2], test_dict, ans_dict), 5.0 / 12) self.assertAlmostEqual(utils.get_ER([1], test_dict, ans_dict), 1.0 / 2) self.assertAlmostEqual(utils.get_ER([2], test_dict, ans_dict), 1.0 / 3)