def test_build_class_graph(self): annotator = Annotator(endpoint=commons.ENDPOINT) d = { "entityA": ["classA1", "classA2", "classA3"], "entityB": ["classB1", "classB2"] } for ent in d: for class_uri in d[ent]: annotator.tgraph.add_class(class_uri) annotator.tgraph.add_parent("classA3", "classA2") annotator.tgraph.add_parent("classA2", "classA1") annotator.tgraph.add_parent("classB2", "classB1") annotator.tgraph.add_class("Thing") annotator.tgraph.add_parent("classB1", "Thing") annotator.tgraph.add_parent("classA1", "Thing") annotator.build_class_graph(d) annotator.build_ancestors_lookup() annotator.remove_unwanted_parent_classes_for_cell(d) self.assertCountEqual(annotator.ancestors["classA3"], ["classA1", "classA2", "Thing"]) self.assertCountEqual(annotator.ancestors["classA2"], ["classA1", "Thing"]) self.assertCountEqual(annotator.ancestors["classB2"], ["classB1", "Thing"]) # print("ancestors: ") # print(annotator.ancestors.keys()) self.assertCountEqual(annotator.ancestors["Thing"], [])
def test_coverage_no_annotation(self): annotator = Annotator() annotator.cell_ent_class = { "CellAB": { "entityA": ["classA1", "classA2", "classA3"], "entityB": ["classB1", "classB2"] }, "CellX": { "EntityX": ["classX", "classA3"], "EntityY": [], } } annotator.tgraph.add_class("Thing") annotator.tgraph.add_class("classA1") annotator.tgraph.add_class("classA2") annotator.tgraph.add_class("classA3") annotator.tgraph.add_class("classB1") annotator.tgraph.add_class("classB2") annotator.tgraph.add_class("classX") annotator.tgraph.add_parent("classA3", "classA2") annotator.tgraph.add_parent("classA2", "classA1") annotator.tgraph.add_parent("classB2", "classB1") annotator.tgraph.add_parent("classB1", "Thing") annotator.tgraph.add_parent("classA1", "Thing") annotator.tgraph.add_parent("classX", "Thing") for cell in annotator.cell_ent_class: d = annotator.cell_ent_class[cell] annotator.build_ancestors_lookup() new_d = annotator.remove_unwanted_parent_classes_for_cell(d) # print("before") # print(d) # print("after") # print(new_d) annotator.cell_ent_class[cell] = new_d annotator.compute_coverage() annotator.clear_for_reuse() annotator.cell_ent_class = {"CellAB": {}, "CellX": {}} annotator.compute_coverage() self.assertEqual(len(annotator.cell_ent_class), 2)
def __init__(self, log_fname=None, title_case=False): self.annotator = Annotator(endpoint=ENDPOINT, title_case=title_case, class_prefs=[ "http://dbpedia.org/ontology/", "http://www.w3.org/2002/07/owl#Thing" ]) self.not_founds = [] self.total_processed = 0 self.col_id = None self.fpath = None self.k = dict() self.log_fname = log_fname if log_fname: f = open(log_fname, "w") f.write(",".join(["fname", "colid", "fs", "alpha", "k"]) + "\n") f.close() for i in range(1, 6): self.k[i] = dict()
def test_remove_unwanted_parent(self): annotator = Annotator(endpoint=commons.ENDPOINT) d = dict() d["DDD"] = {"DD": True, "D": True} d["DD"] = {"D": True} d["D"] = dict() d["BB"] = {"B": True} d["B"] = dict() annotator.ancestors = d wanted_classes = annotator.remove_unwanted_parent_classes_for_entity( ["D", "DD"]) self.assertCountEqual(wanted_classes, ["DD"]) wanted_classes = annotator.remove_unwanted_parent_classes_for_entity( ["D", "DDD"]) self.assertCountEqual(wanted_classes, ["DDD"]) wanted_classes = annotator.remove_unwanted_parent_classes_for_entity( ["D", "DDD"]) self.assertCountEqual(wanted_classes, ["DDD"]) wanted_classes = annotator.remove_unwanted_parent_classes_for_entity( ["D", "B", "DDD"]) self.assertCountEqual(wanted_classes, ["DDD", "B"]) wanted_classes = annotator.remove_unwanted_parent_classes_for_entity( ["D", "B", "DDD", "BB"]) self.assertCountEqual(wanted_classes, ["DDD", "BB"])
def test_coverage(self): annotator = Annotator() annotator.cell_ent_class = { "CellAB": { "entityA": ["classA1", "classA2", "classA3"], "entityB": ["classB1", "classB2"] }, "CellX": { "EntityX": ["classX", "classA3"], "EntityY": [], } } annotator.tgraph.add_class("Thing") annotator.tgraph.add_class("classA1") annotator.tgraph.add_class("classA2") annotator.tgraph.add_class("classA3") annotator.tgraph.add_class("classB1") annotator.tgraph.add_class("classB2") annotator.tgraph.add_class("classX") annotator.tgraph.add_parent("classA3", "classA2") annotator.tgraph.add_parent("classA2", "classA1") annotator.tgraph.add_parent("classB2", "classB1") annotator.tgraph.add_parent("classB1", "Thing") annotator.tgraph.add_parent("classA1", "Thing") annotator.tgraph.add_parent("classX", "Thing") for cell in annotator.cell_ent_class: d = annotator.cell_ent_class[cell] annotator.build_ancestors_lookup() new_d = annotator.remove_unwanted_parent_classes_for_cell(d) # print("before") # print(d) # print("after") # print(new_d) annotator.cell_ent_class[cell] = new_d annotator.compute_coverage() self.assertCountEqual(annotator.ancestors["classA3"], ["classA1", "classA2", "Thing"]) self.assertCountEqual(annotator.ancestors["classA2"], ["classA1", "Thing"]) self.assertCountEqual(annotator.ancestors["classB2"], ["classB1", "Thing"]) self.assertCountEqual(annotator.ancestors["classX"], ["Thing"]) self.assertCountEqual(annotator.ancestors["Thing"], []) # Ic self.assertAlmostEqual(annotator.tgraph.nodes["classX"].Ic, 0.25) self.assertAlmostEqual(annotator.tgraph.nodes["classA3"].Ic, 0.75) self.assertAlmostEqual(annotator.tgraph.nodes["classB2"].Ic, 0.5) self.assertAlmostEqual(annotator.tgraph.nodes["classA1"].Ic, 0) # Lc self.assertAlmostEqual(annotator.tgraph.nodes["classX"].Lc, 0.25) self.assertAlmostEqual(annotator.tgraph.nodes["classA3"].Lc, 0.75) self.assertAlmostEqual(annotator.tgraph.nodes["classA2"].Lc, 0.75) self.assertAlmostEqual(annotator.tgraph.nodes["classA1"].Lc, 0.75) self.assertAlmostEqual(annotator.tgraph.nodes["classB2"].Lc, 0.5) self.assertAlmostEqual(annotator.tgraph.nodes["classB1"].Lc, 0.5) self.assertAlmostEqual( annotator.tgraph.nodes["Thing"].Lc, annotator.tgraph.nodes["classX"].Lc + annotator.tgraph.nodes["classA3"].Lc + annotator.tgraph.nodes["classB2"].Lc) # m self.assertEqual(annotator.tgraph.m, 2) # fc self.assertAlmostEqual(annotator.tgraph.nodes["classX"].fc, 0.25 / 2) self.assertAlmostEqual(annotator.tgraph.nodes["classA3"].fc, 0.75 / 2) self.assertAlmostEqual(annotator.tgraph.nodes["classA2"].fc, 0.75 / 2) self.assertAlmostEqual(annotator.tgraph.nodes["classA1"].fc, 0.75 / 2) self.assertAlmostEqual(annotator.tgraph.nodes["classB2"].fc, 0.5 / 2) self.assertAlmostEqual(annotator.tgraph.nodes["classB1"].fc, 0.5 / 2) self.assertAlmostEqual(annotator.tgraph.nodes["Thing"].fc, 1.5 / 2)
def test_f(self): annotator = Annotator() annotator.cell_ent_class = { "CellAB": { "entityA": ["classA1", "classA2", "classA3"], "entityB": ["classB1", "classB2"] }, "CellX": { "EntityX": ["classX", "classA3"], "EntityY": [], } } annotator.tgraph.add_class("Thing") annotator.tgraph.add_class("classA1") annotator.tgraph.add_class("classA2") annotator.tgraph.add_class("classA3") annotator.tgraph.add_class("classB1") annotator.tgraph.add_class("classB2") annotator.tgraph.add_class("classX") annotator.tgraph.add_parent("classA3", "classA2") annotator.tgraph.add_parent("classA2", "classA1") annotator.tgraph.add_parent("classB2", "classB1") annotator.tgraph.add_parent("classB1", "Thing") annotator.tgraph.add_parent("classA1", "Thing") annotator.tgraph.add_parent("classX", "Thing") for cell in annotator.cell_ent_class: d = annotator.cell_ent_class[cell] annotator.build_ancestors_lookup() new_d = annotator.remove_unwanted_parent_classes_for_cell(d) annotator.cell_ent_class[cell] = new_d annotator.classes_counts["classB2"] = 35 annotator.classes_counts[ "classB1"] = annotator.classes_counts["classB2"] + 17 annotator.classes_counts["classA3"] = 30 annotator.classes_counts[ "classA2"] = annotator.classes_counts["classA3"] + 10 annotator.classes_counts[ "classA1"] = annotator.classes_counts["classA2"] + 10 annotator.classes_counts["classX"] = 10 annotator.classes_counts["Thing"] = annotator.classes_counts["classX"] + annotator.classes_counts["classA1"] + \ annotator.classes_counts["classB1"] annotator.compute_specificity() annotator.tgraph.nodes["classX"].fs[3] = -10.0 / 112 + 1 annotator.tgraph.nodes["classA3"].fs[ 3] = -50.0 / 112 * 40.0 / 50 * 30.0 / 40 + 1 annotator.tgraph.nodes["classA2"].fs[3] = -50.0 / 112 * 40.0 / 50 annotator.tgraph.nodes["classA1"].fs[3] = -50.0 / 112 + 1 annotator.tgraph.nodes["classB2"].fs[3] = -35.0 / 52 * 52.0 / 112 + 1 annotator.tgraph.nodes["classB1"].fs[3] = -52.0 / 112 + 1 annotator.tgraph.nodes["Thing"].fs[3] = 0 # fs 1 Ls = annotator.tgraph.nodes["classX"].Ls annotator.tgraph.nodes["classX"].fs[1] = math.sqrt(1 - Ls * Ls) Ls = annotator.tgraph.nodes["classA3"].Ls annotator.tgraph.nodes["classA3"].fs[1] = math.sqrt(1 - Ls * Ls) Ls = annotator.tgraph.nodes["classA2"].Ls annotator.tgraph.nodes["classA2"].fs[1] = math.sqrt(1 - Ls * Ls) Ls = annotator.tgraph.nodes["classA1"].Ls annotator.tgraph.nodes["classA1"].fs[1] = math.sqrt(1 - Ls * Ls) Ls = annotator.tgraph.nodes["classB2"].Ls annotator.tgraph.nodes["classB2"].fs[1] = math.sqrt(1 - Ls * Ls) Ls = annotator.tgraph.nodes["classB1"].Ls annotator.tgraph.nodes["classB1"].fs[1] = math.sqrt(1 - Ls * Ls) annotator.tgraph.nodes["Thing"].fs[1] = 0 # fs 2 Ls = annotator.tgraph.nodes["classX"].Ls annotator.tgraph.nodes["classX"].fs[2] = -1 * Ls * Ls + 1 Ls = annotator.tgraph.nodes["classA3"].Ls annotator.tgraph.nodes["classA3"].fs[2] = -1 * Ls * Ls + 1 Ls = annotator.tgraph.nodes["classA2"].Ls annotator.tgraph.nodes["classA2"].fs[2] = -1 * Ls * Ls + 1 Ls = annotator.tgraph.nodes["classA1"].Ls annotator.tgraph.nodes["classA1"].fs[2] = -1 * Ls * Ls + 1 Ls = annotator.tgraph.nodes["classB2"].Ls annotator.tgraph.nodes["classB2"].fs[2] = -1 * Ls * Ls + 1 Ls = annotator.tgraph.nodes["classB1"].Ls annotator.tgraph.nodes["classB1"].fs[2] = -1 * Ls * Ls + 1 annotator.tgraph.nodes["Thing"].fs[2] = 0 # fs 4 Ls = annotator.tgraph.nodes["classX"].Ls annotator.tgraph.nodes["classX"].fs[4] = 1 - math.sqrt(Ls) Ls = annotator.tgraph.nodes["classA3"].Ls annotator.tgraph.nodes["classA3"].fs[4] = 1 - math.sqrt(Ls) Ls = annotator.tgraph.nodes["classA2"].Ls annotator.tgraph.nodes["classA2"].fs[4] = 1 - math.sqrt(Ls) Ls = annotator.tgraph.nodes["classA1"].Ls annotator.tgraph.nodes["classA1"].fs[4] = 1 - math.sqrt(Ls) Ls = annotator.tgraph.nodes["classB2"].Ls annotator.tgraph.nodes["classB2"].fs[4] = 1 - math.sqrt(Ls) Ls = annotator.tgraph.nodes["classB1"].Ls annotator.tgraph.nodes["classB1"].fs[4] = 1 - math.sqrt(Ls) annotator.tgraph.nodes["Thing"].fs[4] = 0 # fs 5 Ls = annotator.tgraph.nodes["classX"].Ls annotator.tgraph.nodes["classX"].fs[5] = (1 - math.sqrt(Ls))**2 Ls = annotator.tgraph.nodes["classA3"].Ls annotator.tgraph.nodes["classA3"].fs[5] = (1 - math.sqrt(Ls))**2 Ls = annotator.tgraph.nodes["classA2"].Ls annotator.tgraph.nodes["classA2"].fs[5] = (1 - math.sqrt(Ls))**2 Ls = annotator.tgraph.nodes["classA1"].Ls annotator.tgraph.nodes["classA1"].fs[5] = (1 - math.sqrt(Ls))**2 Ls = annotator.tgraph.nodes["classB2"].Ls annotator.tgraph.nodes["classB2"].fs[5] = (1 - math.sqrt(Ls))**2 Ls = annotator.tgraph.nodes["classB1"].Ls annotator.tgraph.nodes["classB1"].fs[5] = (1 - math.sqrt(Ls))**2 annotator.tgraph.nodes["Thing"].fs[5] = 0 annotator.tgraph.nodes["classX"].fc = 0.25 / 2 annotator.tgraph.nodes["classA3"].fc = 0.75 / 2 annotator.tgraph.nodes["classA2"].fc = 0.75 / 2 annotator.tgraph.nodes["classA1"].fc = 0.75 / 2 annotator.tgraph.nodes["classB2"].fc = 0.5 / 2 annotator.tgraph.nodes["classB1"].fc = 0.5 / 2 annotator.tgraph.nodes["Thing"].fc = 1.5 / 2 annotator.compute_f(0.5) self.assertCountEqual(annotator.get_top_k(k=1, fsid=3), ["classA3"]) annotator.clear_for_reuse() annotator.cell_ent_class = {"CellAB": {}, "CellX": {}} annotator.compute_coverage() annotator.compute_specificity() annotator.compute_f(0.5) self.assertEqual(annotator.tgraph.m, 0) self.assertEqual(len(annotator.get_top_k(k=1, fsid=3)), 0)
def test_specificity(self): annotator = Annotator() annotator.cell_ent_class = { "CellAB": { "entityA": ["classA1", "classA2", "classA3"], "entityB": ["classB1", "classB2"] }, "CellX": { "EntityX": ["classX", "classA3"], "EntityY": [], } } annotator.tgraph.add_class("Thing") annotator.tgraph.add_class("classA1") annotator.tgraph.add_class("classA2") annotator.tgraph.add_class("classA3") annotator.tgraph.add_class("classB1") annotator.tgraph.add_class("classB2") annotator.tgraph.add_class("classX") annotator.tgraph.add_parent("classA3", "classA2") annotator.tgraph.add_parent("classA2", "classA1") annotator.tgraph.add_parent("classB2", "classB1") annotator.tgraph.add_parent("classB1", "Thing") annotator.tgraph.add_parent("classA1", "Thing") annotator.tgraph.add_parent("classX", "Thing") for cell in annotator.cell_ent_class: d = annotator.cell_ent_class[cell] annotator.build_ancestors_lookup() new_d = annotator.remove_unwanted_parent_classes_for_cell(d) annotator.cell_ent_class[cell] = new_d annotator.classes_counts["classB2"] = 35 annotator.classes_counts[ "classB1"] = annotator.classes_counts["classB2"] + 17 annotator.classes_counts["classA3"] = 30 annotator.classes_counts[ "classA2"] = annotator.classes_counts["classA3"] + 10 annotator.classes_counts[ "classA1"] = annotator.classes_counts["classA2"] + 10 annotator.classes_counts["classX"] = 10 annotator.classes_counts["Thing"] = annotator.classes_counts["classX"] + annotator.classes_counts["classA1"] + \ annotator.classes_counts["classB1"] annotator.compute_Is() annotator.compute_Ls() annotator.compute_fs() # Is self.assertAlmostEqual(annotator.tgraph.nodes["classX"].Is, 10.0 / 112) self.assertAlmostEqual(annotator.tgraph.nodes["classA1"].Is, 50.0 / 112) self.assertAlmostEqual(annotator.tgraph.nodes["classA3"].Is, 30.0 / 40) self.assertAlmostEqual(annotator.tgraph.nodes["classB2"].Is, 35.0 / 52) # Ls self.assertAlmostEqual(annotator.tgraph.nodes["classX"].Ls, 10.0 / 112) self.assertAlmostEqual(annotator.tgraph.nodes["classA3"].Ls, 50.0 / 112 * 40.0 / 50 * 30.0 / 40) self.assertAlmostEqual(annotator.tgraph.nodes["classA1"].Ls, 50.0 / 112) self.assertAlmostEqual(annotator.tgraph.nodes["classB2"].Ls, 35.0 / 52 * 52.0 / 112) self.assertAlmostEqual(annotator.tgraph.nodes["classB1"].Ls, 52.0 / 112) # fs 3 self.assertAlmostEqual(annotator.tgraph.nodes["classX"].fs[3], -10.0 / 112 + 1) self.assertAlmostEqual(annotator.tgraph.nodes["classA3"].fs[3], -50.0 / 112 * 40.0 / 50 * 30.0 / 40 + 1) self.assertAlmostEqual(annotator.tgraph.nodes["classA2"].fs[3], -50.0 / 112 * 40.0 / 50 + 1) self.assertAlmostEqual(annotator.tgraph.nodes["classA1"].fs[3], -50.0 / 112 + 1) self.assertAlmostEqual(annotator.tgraph.nodes["classB2"].fs[3], -35.0 / 52 * 52.0 / 112 + 1) self.assertAlmostEqual(annotator.tgraph.nodes["classB1"].fs[3], -52.0 / 112 + 1) self.assertAlmostEqual(annotator.tgraph.nodes["Thing"].fs[3], 0) # fs 1 Ls = annotator.tgraph.nodes["classX"].Ls self.assertAlmostEqual(annotator.tgraph.nodes["classX"].fs[1], math.sqrt(1 - Ls * Ls)) Ls = annotator.tgraph.nodes["classA3"].Ls self.assertAlmostEqual(annotator.tgraph.nodes["classA3"].fs[1], math.sqrt(1 - Ls * Ls)) Ls = annotator.tgraph.nodes["classA2"].Ls self.assertAlmostEqual(annotator.tgraph.nodes["classA2"].fs[1], math.sqrt(1 - Ls * Ls)) Ls = annotator.tgraph.nodes["classA1"].Ls self.assertAlmostEqual(annotator.tgraph.nodes["classA1"].fs[1], math.sqrt(1 - Ls * Ls)) Ls = annotator.tgraph.nodes["classB2"].Ls self.assertAlmostEqual(annotator.tgraph.nodes["classB2"].fs[1], math.sqrt(1 - Ls * Ls)) Ls = annotator.tgraph.nodes["classB1"].Ls self.assertAlmostEqual(annotator.tgraph.nodes["classB1"].fs[1], math.sqrt(1 - Ls * Ls)) self.assertAlmostEqual(annotator.tgraph.nodes["Thing"].fs[1], 0) # fs 2 Ls = annotator.tgraph.nodes["classX"].Ls self.assertAlmostEqual(annotator.tgraph.nodes["classX"].fs[2], -1 * Ls * Ls + 1) Ls = annotator.tgraph.nodes["classA3"].Ls self.assertAlmostEqual(annotator.tgraph.nodes["classA3"].fs[2], -1 * Ls * Ls + 1) Ls = annotator.tgraph.nodes["classA2"].Ls self.assertAlmostEqual(annotator.tgraph.nodes["classA2"].fs[2], -1 * Ls * Ls + 1) Ls = annotator.tgraph.nodes["classA1"].Ls self.assertAlmostEqual(annotator.tgraph.nodes["classA1"].fs[2], -1 * Ls * Ls + 1) Ls = annotator.tgraph.nodes["classB2"].Ls self.assertAlmostEqual(annotator.tgraph.nodes["classB2"].fs[2], -1 * Ls * Ls + 1) Ls = annotator.tgraph.nodes["classB1"].Ls self.assertAlmostEqual(annotator.tgraph.nodes["classB1"].fs[2], -1 * Ls * Ls + 1) self.assertAlmostEqual(annotator.tgraph.nodes["Thing"].fs[2], 0) # fs 4 Ls = annotator.tgraph.nodes["classX"].Ls self.assertAlmostEqual(annotator.tgraph.nodes["classX"].fs[4], 1 - math.sqrt(Ls)) Ls = annotator.tgraph.nodes["classA3"].Ls self.assertAlmostEqual(annotator.tgraph.nodes["classA3"].fs[4], 1 - math.sqrt(Ls)) Ls = annotator.tgraph.nodes["classA2"].Ls self.assertAlmostEqual(annotator.tgraph.nodes["classA2"].fs[4], 1 - math.sqrt(Ls)) Ls = annotator.tgraph.nodes["classA1"].Ls self.assertAlmostEqual(annotator.tgraph.nodes["classA1"].fs[4], 1 - math.sqrt(Ls)) Ls = annotator.tgraph.nodes["classB2"].Ls self.assertAlmostEqual(annotator.tgraph.nodes["classB2"].fs[4], 1 - math.sqrt(Ls)) Ls = annotator.tgraph.nodes["classB1"].Ls self.assertAlmostEqual(annotator.tgraph.nodes["classB1"].fs[4], 1 - math.sqrt(Ls)) self.assertAlmostEqual(annotator.tgraph.nodes["Thing"].fs[4], 0) # fs 5 Ls = annotator.tgraph.nodes["classX"].Ls self.assertAlmostEqual(annotator.tgraph.nodes["classX"].fs[5], (1 - math.sqrt(Ls))**2) Ls = annotator.tgraph.nodes["classA3"].Ls self.assertAlmostEqual(annotator.tgraph.nodes["classA3"].fs[5], (1 - math.sqrt(Ls))**2) Ls = annotator.tgraph.nodes["classA2"].Ls self.assertAlmostEqual(annotator.tgraph.nodes["classA2"].fs[5], (1 - math.sqrt(Ls))**2) Ls = annotator.tgraph.nodes["classA1"].Ls self.assertAlmostEqual(annotator.tgraph.nodes["classA1"].fs[5], (1 - math.sqrt(Ls))**2) Ls = annotator.tgraph.nodes["classB2"].Ls self.assertAlmostEqual(annotator.tgraph.nodes["classB2"].fs[5], (1 - math.sqrt(Ls))**2) Ls = annotator.tgraph.nodes["classB1"].Ls self.assertAlmostEqual(annotator.tgraph.nodes["classB1"].fs[5], (1 - math.sqrt(Ls))**2) self.assertAlmostEqual(annotator.tgraph.nodes["Thing"].fs[5], 0)
class ExperimentBase: def __init__(self, log_fname=None, title_case=False): self.annotator = Annotator(endpoint=ENDPOINT, title_case=title_case, class_prefs=[ "http://dbpedia.org/ontology/", "http://www.w3.org/2002/07/owl#Thing" ]) self.not_founds = [] self.total_processed = 0 self.col_id = None self.fpath = None self.k = dict() self.log_fname = log_fname if log_fname: f = open(log_fname, "w") f.write(",".join(["fname", "colid", "fs", "alpha", "k"]) + "\n") f.close() for i in range(1, 6): self.k[i] = dict() def append_line(self, line): if self.log_fname: f = open(self.log_fname, "a") f.write(line + "\n") f.close() def clear(self): self.not_founds = [] self.fpath = None self.col_id = None self.k = dict() for i in range(1, 6): self.k[i] = dict() def annotate_single(self, fpath, col_id): self.annotator.clear_for_reuse() self.fpath = fpath self.col_id = col_id self.annotator.annotate_table(file_dir=fpath, subject_col_id=col_id) def validate_with_opt_alpha(self, correct_candidates, alpha_inc=0.001): print("\n") print("fpath: %s - col id: %d" % (self.fpath, self.col_id)) for i in range(1, 6): self.validate_with_opt_alpha_fsid(correct_candidates, fsid=i, alpha_inc=alpha_inc) def validate_with_opt_alpha_fsid(self, correct_candidates, fsid, alpha_inc=0.001): alpha = 0.0 kmin = None max_alpha = None while alpha < 1: alpha += alpha_inc self.annotator.compute_f(alpha) candidates = self.annotator.get_top_k(fsid=fsid) if len(candidates) == 0: if self.fpath not in self.not_founds: self.not_founds.append(self.fpath) return for idx, c in enumerate(candidates): k = idx + 1 if c in correct_candidates: if kmin is None or k < kmin: kmin = k max_alpha = alpha if k == 1: self.append_line(",".join([ self.fpath.split("/")[-1], str(self.col_id), str(fsid), str(max_alpha), str(kmin) ])) # self.append_line(",".join([self.fpath.split("/")[-1], str(fsid), str(max_alpha), str(kmin)])) print("candidates (%d): %s" % (fsid, str(candidates[:3]))) self.k[fsid][self.fpath + str(self.col_id)] = k return # Ahmad check case # if kmin is None: # self.not_founds.append(self.fpath) # return if kmin is None: print("Special case: " + self.fpath) kmin = 999 self.k[fsid][self.fpath + str(self.col_id)] = kmin self.append_line(",".join([ self.fpath.split("/")[-1], str(self.col_id), str(fsid), str(max_alpha), str(kmin) ])) print("max alpha: %s (fs%d)" % (str(max_alpha), fsid)) def get_scores(self, k): for i in range(1, 6): self.get_scores_fsid(k, i) total_processed = len(self.not_founds) + len(self.k[1]) print("total processed: %d\n\n" % total_processed) self.total_processed = total_processed def get_scores_fsid(self, k, fsid): corr = 0.0 incorr = 0.0 notf = len(self.not_founds) for f in self.k[fsid]: if self.k[fsid][f] <= k: corr += 1 else: incorr += 1 if (corr + incorr) > 0: prec = corr / (corr + incorr) else: prec = 0 if (corr + notf) > 0: rec = corr / (corr + notf) else: rec = 0 if (prec + rec) > 0: f1 = 2.0 * prec * rec / (prec + rec) else: f1 = 0 print("fsid: %d" % fsid) print("precision: %.2f" % prec) print("recall: %.2f" % rec) print("F1: %.2f" % f1) return prec, rec, f1