示例#1
0
 def test_build_class_graph(self):
     annotator = Annotator(endpoint=commons.ENDPOINT)
     d = {
         "entityA": ["classA1", "classA2", "classA3"],
         "entityB": ["classB1", "classB2"]
     }
     for ent in d:
         for class_uri in d[ent]:
             annotator.tgraph.add_class(class_uri)
     annotator.tgraph.add_parent("classA3", "classA2")
     annotator.tgraph.add_parent("classA2", "classA1")
     annotator.tgraph.add_parent("classB2", "classB1")
     annotator.tgraph.add_class("Thing")
     annotator.tgraph.add_parent("classB1", "Thing")
     annotator.tgraph.add_parent("classA1", "Thing")
     annotator.build_class_graph(d)
     annotator.build_ancestors_lookup()
     annotator.remove_unwanted_parent_classes_for_cell(d)
     self.assertCountEqual(annotator.ancestors["classA3"],
                           ["classA1", "classA2", "Thing"])
     self.assertCountEqual(annotator.ancestors["classA2"],
                           ["classA1", "Thing"])
     self.assertCountEqual(annotator.ancestors["classB2"],
                           ["classB1", "Thing"])
     # print("ancestors: ")
     # print(annotator.ancestors.keys())
     self.assertCountEqual(annotator.ancestors["Thing"], [])
示例#2
0
    def test_coverage_no_annotation(self):
        annotator = Annotator()

        annotator.cell_ent_class = {
            "CellAB": {
                "entityA": ["classA1", "classA2", "classA3"],
                "entityB": ["classB1", "classB2"]
            },
            "CellX": {
                "EntityX": ["classX", "classA3"],
                "EntityY": [],
            }
        }

        annotator.tgraph.add_class("Thing")
        annotator.tgraph.add_class("classA1")
        annotator.tgraph.add_class("classA2")
        annotator.tgraph.add_class("classA3")
        annotator.tgraph.add_class("classB1")
        annotator.tgraph.add_class("classB2")
        annotator.tgraph.add_class("classX")

        annotator.tgraph.add_parent("classA3", "classA2")
        annotator.tgraph.add_parent("classA2", "classA1")
        annotator.tgraph.add_parent("classB2", "classB1")
        annotator.tgraph.add_parent("classB1", "Thing")
        annotator.tgraph.add_parent("classA1", "Thing")
        annotator.tgraph.add_parent("classX", "Thing")
        for cell in annotator.cell_ent_class:
            d = annotator.cell_ent_class[cell]
            annotator.build_ancestors_lookup()
            new_d = annotator.remove_unwanted_parent_classes_for_cell(d)
            # print("before")
            # print(d)
            # print("after")
            # print(new_d)
            annotator.cell_ent_class[cell] = new_d

        annotator.compute_coverage()
        annotator.clear_for_reuse()

        annotator.cell_ent_class = {"CellAB": {}, "CellX": {}}
        annotator.compute_coverage()
        self.assertEqual(len(annotator.cell_ent_class), 2)
示例#3
0
 def __init__(self, log_fname=None, title_case=False):
     self.annotator = Annotator(endpoint=ENDPOINT,
                                title_case=title_case,
                                class_prefs=[
                                    "http://dbpedia.org/ontology/",
                                    "http://www.w3.org/2002/07/owl#Thing"
                                ])
     self.not_founds = []
     self.total_processed = 0
     self.col_id = None
     self.fpath = None
     self.k = dict()
     self.log_fname = log_fname
     if log_fname:
         f = open(log_fname, "w")
         f.write(",".join(["fname", "colid", "fs", "alpha", "k"]) + "\n")
         f.close()
     for i in range(1, 6):
         self.k[i] = dict()
示例#4
0
 def test_remove_unwanted_parent(self):
     annotator = Annotator(endpoint=commons.ENDPOINT)
     d = dict()
     d["DDD"] = {"DD": True, "D": True}
     d["DD"] = {"D": True}
     d["D"] = dict()
     d["BB"] = {"B": True}
     d["B"] = dict()
     annotator.ancestors = d
     wanted_classes = annotator.remove_unwanted_parent_classes_for_entity(
         ["D", "DD"])
     self.assertCountEqual(wanted_classes, ["DD"])
     wanted_classes = annotator.remove_unwanted_parent_classes_for_entity(
         ["D", "DDD"])
     self.assertCountEqual(wanted_classes, ["DDD"])
     wanted_classes = annotator.remove_unwanted_parent_classes_for_entity(
         ["D", "DDD"])
     self.assertCountEqual(wanted_classes, ["DDD"])
     wanted_classes = annotator.remove_unwanted_parent_classes_for_entity(
         ["D", "B", "DDD"])
     self.assertCountEqual(wanted_classes, ["DDD", "B"])
     wanted_classes = annotator.remove_unwanted_parent_classes_for_entity(
         ["D", "B", "DDD", "BB"])
     self.assertCountEqual(wanted_classes, ["DDD", "BB"])
示例#5
0
    def test_coverage(self):
        annotator = Annotator()
        annotator.cell_ent_class = {
            "CellAB": {
                "entityA": ["classA1", "classA2", "classA3"],
                "entityB": ["classB1", "classB2"]
            },
            "CellX": {
                "EntityX": ["classX", "classA3"],
                "EntityY": [],
            }
        }

        annotator.tgraph.add_class("Thing")
        annotator.tgraph.add_class("classA1")
        annotator.tgraph.add_class("classA2")
        annotator.tgraph.add_class("classA3")
        annotator.tgraph.add_class("classB1")
        annotator.tgraph.add_class("classB2")
        annotator.tgraph.add_class("classX")

        annotator.tgraph.add_parent("classA3", "classA2")
        annotator.tgraph.add_parent("classA2", "classA1")
        annotator.tgraph.add_parent("classB2", "classB1")
        annotator.tgraph.add_parent("classB1", "Thing")
        annotator.tgraph.add_parent("classA1", "Thing")
        annotator.tgraph.add_parent("classX", "Thing")
        for cell in annotator.cell_ent_class:
            d = annotator.cell_ent_class[cell]
            annotator.build_ancestors_lookup()
            new_d = annotator.remove_unwanted_parent_classes_for_cell(d)
            # print("before")
            # print(d)
            # print("after")
            # print(new_d)
            annotator.cell_ent_class[cell] = new_d
        annotator.compute_coverage()
        self.assertCountEqual(annotator.ancestors["classA3"],
                              ["classA1", "classA2", "Thing"])
        self.assertCountEqual(annotator.ancestors["classA2"],
                              ["classA1", "Thing"])
        self.assertCountEqual(annotator.ancestors["classB2"],
                              ["classB1", "Thing"])
        self.assertCountEqual(annotator.ancestors["classX"], ["Thing"])
        self.assertCountEqual(annotator.ancestors["Thing"], [])
        # Ic
        self.assertAlmostEqual(annotator.tgraph.nodes["classX"].Ic, 0.25)
        self.assertAlmostEqual(annotator.tgraph.nodes["classA3"].Ic, 0.75)
        self.assertAlmostEqual(annotator.tgraph.nodes["classB2"].Ic, 0.5)
        self.assertAlmostEqual(annotator.tgraph.nodes["classA1"].Ic, 0)
        # Lc
        self.assertAlmostEqual(annotator.tgraph.nodes["classX"].Lc, 0.25)
        self.assertAlmostEqual(annotator.tgraph.nodes["classA3"].Lc, 0.75)
        self.assertAlmostEqual(annotator.tgraph.nodes["classA2"].Lc, 0.75)
        self.assertAlmostEqual(annotator.tgraph.nodes["classA1"].Lc, 0.75)
        self.assertAlmostEqual(annotator.tgraph.nodes["classB2"].Lc, 0.5)
        self.assertAlmostEqual(annotator.tgraph.nodes["classB1"].Lc, 0.5)
        self.assertAlmostEqual(
            annotator.tgraph.nodes["Thing"].Lc,
            annotator.tgraph.nodes["classX"].Lc +
            annotator.tgraph.nodes["classA3"].Lc +
            annotator.tgraph.nodes["classB2"].Lc)
        # m
        self.assertEqual(annotator.tgraph.m, 2)
        # fc
        self.assertAlmostEqual(annotator.tgraph.nodes["classX"].fc, 0.25 / 2)
        self.assertAlmostEqual(annotator.tgraph.nodes["classA3"].fc, 0.75 / 2)
        self.assertAlmostEqual(annotator.tgraph.nodes["classA2"].fc, 0.75 / 2)
        self.assertAlmostEqual(annotator.tgraph.nodes["classA1"].fc, 0.75 / 2)
        self.assertAlmostEqual(annotator.tgraph.nodes["classB2"].fc, 0.5 / 2)
        self.assertAlmostEqual(annotator.tgraph.nodes["classB1"].fc, 0.5 / 2)
        self.assertAlmostEqual(annotator.tgraph.nodes["Thing"].fc, 1.5 / 2)
示例#6
0
    def test_f(self):
        annotator = Annotator()
        annotator.cell_ent_class = {
            "CellAB": {
                "entityA": ["classA1", "classA2", "classA3"],
                "entityB": ["classB1", "classB2"]
            },
            "CellX": {
                "EntityX": ["classX", "classA3"],
                "EntityY": [],
            }
        }

        annotator.tgraph.add_class("Thing")
        annotator.tgraph.add_class("classA1")
        annotator.tgraph.add_class("classA2")
        annotator.tgraph.add_class("classA3")
        annotator.tgraph.add_class("classB1")
        annotator.tgraph.add_class("classB2")
        annotator.tgraph.add_class("classX")

        annotator.tgraph.add_parent("classA3", "classA2")
        annotator.tgraph.add_parent("classA2", "classA1")
        annotator.tgraph.add_parent("classB2", "classB1")
        annotator.tgraph.add_parent("classB1", "Thing")
        annotator.tgraph.add_parent("classA1", "Thing")
        annotator.tgraph.add_parent("classX", "Thing")
        for cell in annotator.cell_ent_class:
            d = annotator.cell_ent_class[cell]
            annotator.build_ancestors_lookup()
            new_d = annotator.remove_unwanted_parent_classes_for_cell(d)
            annotator.cell_ent_class[cell] = new_d

        annotator.classes_counts["classB2"] = 35
        annotator.classes_counts[
            "classB1"] = annotator.classes_counts["classB2"] + 17
        annotator.classes_counts["classA3"] = 30
        annotator.classes_counts[
            "classA2"] = annotator.classes_counts["classA3"] + 10
        annotator.classes_counts[
            "classA1"] = annotator.classes_counts["classA2"] + 10
        annotator.classes_counts["classX"] = 10
        annotator.classes_counts["Thing"] = annotator.classes_counts["classX"] + annotator.classes_counts["classA1"] + \
                                            annotator.classes_counts["classB1"]

        annotator.compute_specificity()
        annotator.tgraph.nodes["classX"].fs[3] = -10.0 / 112 + 1
        annotator.tgraph.nodes["classA3"].fs[
            3] = -50.0 / 112 * 40.0 / 50 * 30.0 / 40 + 1
        annotator.tgraph.nodes["classA2"].fs[3] = -50.0 / 112 * 40.0 / 50
        annotator.tgraph.nodes["classA1"].fs[3] = -50.0 / 112 + 1
        annotator.tgraph.nodes["classB2"].fs[3] = -35.0 / 52 * 52.0 / 112 + 1
        annotator.tgraph.nodes["classB1"].fs[3] = -52.0 / 112 + 1
        annotator.tgraph.nodes["Thing"].fs[3] = 0

        # fs 1
        Ls = annotator.tgraph.nodes["classX"].Ls
        annotator.tgraph.nodes["classX"].fs[1] = math.sqrt(1 - Ls * Ls)
        Ls = annotator.tgraph.nodes["classA3"].Ls
        annotator.tgraph.nodes["classA3"].fs[1] = math.sqrt(1 - Ls * Ls)
        Ls = annotator.tgraph.nodes["classA2"].Ls
        annotator.tgraph.nodes["classA2"].fs[1] = math.sqrt(1 - Ls * Ls)
        Ls = annotator.tgraph.nodes["classA1"].Ls
        annotator.tgraph.nodes["classA1"].fs[1] = math.sqrt(1 - Ls * Ls)
        Ls = annotator.tgraph.nodes["classB2"].Ls
        annotator.tgraph.nodes["classB2"].fs[1] = math.sqrt(1 - Ls * Ls)
        Ls = annotator.tgraph.nodes["classB1"].Ls
        annotator.tgraph.nodes["classB1"].fs[1] = math.sqrt(1 - Ls * Ls)
        annotator.tgraph.nodes["Thing"].fs[1] = 0
        # fs 2
        Ls = annotator.tgraph.nodes["classX"].Ls
        annotator.tgraph.nodes["classX"].fs[2] = -1 * Ls * Ls + 1
        Ls = annotator.tgraph.nodes["classA3"].Ls
        annotator.tgraph.nodes["classA3"].fs[2] = -1 * Ls * Ls + 1
        Ls = annotator.tgraph.nodes["classA2"].Ls
        annotator.tgraph.nodes["classA2"].fs[2] = -1 * Ls * Ls + 1
        Ls = annotator.tgraph.nodes["classA1"].Ls
        annotator.tgraph.nodes["classA1"].fs[2] = -1 * Ls * Ls + 1
        Ls = annotator.tgraph.nodes["classB2"].Ls
        annotator.tgraph.nodes["classB2"].fs[2] = -1 * Ls * Ls + 1
        Ls = annotator.tgraph.nodes["classB1"].Ls
        annotator.tgraph.nodes["classB1"].fs[2] = -1 * Ls * Ls + 1
        annotator.tgraph.nodes["Thing"].fs[2] = 0
        # fs 4
        Ls = annotator.tgraph.nodes["classX"].Ls
        annotator.tgraph.nodes["classX"].fs[4] = 1 - math.sqrt(Ls)
        Ls = annotator.tgraph.nodes["classA3"].Ls
        annotator.tgraph.nodes["classA3"].fs[4] = 1 - math.sqrt(Ls)
        Ls = annotator.tgraph.nodes["classA2"].Ls
        annotator.tgraph.nodes["classA2"].fs[4] = 1 - math.sqrt(Ls)
        Ls = annotator.tgraph.nodes["classA1"].Ls
        annotator.tgraph.nodes["classA1"].fs[4] = 1 - math.sqrt(Ls)
        Ls = annotator.tgraph.nodes["classB2"].Ls
        annotator.tgraph.nodes["classB2"].fs[4] = 1 - math.sqrt(Ls)
        Ls = annotator.tgraph.nodes["classB1"].Ls
        annotator.tgraph.nodes["classB1"].fs[4] = 1 - math.sqrt(Ls)
        annotator.tgraph.nodes["Thing"].fs[4] = 0
        # fs 5
        Ls = annotator.tgraph.nodes["classX"].Ls
        annotator.tgraph.nodes["classX"].fs[5] = (1 - math.sqrt(Ls))**2
        Ls = annotator.tgraph.nodes["classA3"].Ls
        annotator.tgraph.nodes["classA3"].fs[5] = (1 - math.sqrt(Ls))**2
        Ls = annotator.tgraph.nodes["classA2"].Ls
        annotator.tgraph.nodes["classA2"].fs[5] = (1 - math.sqrt(Ls))**2
        Ls = annotator.tgraph.nodes["classA1"].Ls
        annotator.tgraph.nodes["classA1"].fs[5] = (1 - math.sqrt(Ls))**2
        Ls = annotator.tgraph.nodes["classB2"].Ls
        annotator.tgraph.nodes["classB2"].fs[5] = (1 - math.sqrt(Ls))**2
        Ls = annotator.tgraph.nodes["classB1"].Ls
        annotator.tgraph.nodes["classB1"].fs[5] = (1 - math.sqrt(Ls))**2
        annotator.tgraph.nodes["Thing"].fs[5] = 0

        annotator.tgraph.nodes["classX"].fc = 0.25 / 2
        annotator.tgraph.nodes["classA3"].fc = 0.75 / 2
        annotator.tgraph.nodes["classA2"].fc = 0.75 / 2
        annotator.tgraph.nodes["classA1"].fc = 0.75 / 2
        annotator.tgraph.nodes["classB2"].fc = 0.5 / 2
        annotator.tgraph.nodes["classB1"].fc = 0.5 / 2
        annotator.tgraph.nodes["Thing"].fc = 1.5 / 2

        annotator.compute_f(0.5)
        self.assertCountEqual(annotator.get_top_k(k=1, fsid=3), ["classA3"])
        annotator.clear_for_reuse()
        annotator.cell_ent_class = {"CellAB": {}, "CellX": {}}
        annotator.compute_coverage()
        annotator.compute_specificity()
        annotator.compute_f(0.5)
        self.assertEqual(annotator.tgraph.m, 0)
        self.assertEqual(len(annotator.get_top_k(k=1, fsid=3)), 0)
示例#7
0
    def test_specificity(self):
        annotator = Annotator()
        annotator.cell_ent_class = {
            "CellAB": {
                "entityA": ["classA1", "classA2", "classA3"],
                "entityB": ["classB1", "classB2"]
            },
            "CellX": {
                "EntityX": ["classX", "classA3"],
                "EntityY": [],
            }
        }
        annotator.tgraph.add_class("Thing")
        annotator.tgraph.add_class("classA1")
        annotator.tgraph.add_class("classA2")
        annotator.tgraph.add_class("classA3")
        annotator.tgraph.add_class("classB1")
        annotator.tgraph.add_class("classB2")
        annotator.tgraph.add_class("classX")

        annotator.tgraph.add_parent("classA3", "classA2")
        annotator.tgraph.add_parent("classA2", "classA1")
        annotator.tgraph.add_parent("classB2", "classB1")
        annotator.tgraph.add_parent("classB1", "Thing")
        annotator.tgraph.add_parent("classA1", "Thing")
        annotator.tgraph.add_parent("classX", "Thing")
        for cell in annotator.cell_ent_class:
            d = annotator.cell_ent_class[cell]
            annotator.build_ancestors_lookup()
            new_d = annotator.remove_unwanted_parent_classes_for_cell(d)
            annotator.cell_ent_class[cell] = new_d

        annotator.classes_counts["classB2"] = 35
        annotator.classes_counts[
            "classB1"] = annotator.classes_counts["classB2"] + 17

        annotator.classes_counts["classA3"] = 30
        annotator.classes_counts[
            "classA2"] = annotator.classes_counts["classA3"] + 10
        annotator.classes_counts[
            "classA1"] = annotator.classes_counts["classA2"] + 10

        annotator.classes_counts["classX"] = 10

        annotator.classes_counts["Thing"] = annotator.classes_counts["classX"] + annotator.classes_counts["classA1"] + \
                                            annotator.classes_counts["classB1"]

        annotator.compute_Is()
        annotator.compute_Ls()
        annotator.compute_fs()
        # Is
        self.assertAlmostEqual(annotator.tgraph.nodes["classX"].Is, 10.0 / 112)
        self.assertAlmostEqual(annotator.tgraph.nodes["classA1"].Is,
                               50.0 / 112)
        self.assertAlmostEqual(annotator.tgraph.nodes["classA3"].Is, 30.0 / 40)
        self.assertAlmostEqual(annotator.tgraph.nodes["classB2"].Is, 35.0 / 52)
        # Ls
        self.assertAlmostEqual(annotator.tgraph.nodes["classX"].Ls, 10.0 / 112)
        self.assertAlmostEqual(annotator.tgraph.nodes["classA3"].Ls,
                               50.0 / 112 * 40.0 / 50 * 30.0 / 40)
        self.assertAlmostEqual(annotator.tgraph.nodes["classA1"].Ls,
                               50.0 / 112)
        self.assertAlmostEqual(annotator.tgraph.nodes["classB2"].Ls,
                               35.0 / 52 * 52.0 / 112)
        self.assertAlmostEqual(annotator.tgraph.nodes["classB1"].Ls,
                               52.0 / 112)
        # fs 3
        self.assertAlmostEqual(annotator.tgraph.nodes["classX"].fs[3],
                               -10.0 / 112 + 1)
        self.assertAlmostEqual(annotator.tgraph.nodes["classA3"].fs[3],
                               -50.0 / 112 * 40.0 / 50 * 30.0 / 40 + 1)
        self.assertAlmostEqual(annotator.tgraph.nodes["classA2"].fs[3],
                               -50.0 / 112 * 40.0 / 50 + 1)
        self.assertAlmostEqual(annotator.tgraph.nodes["classA1"].fs[3],
                               -50.0 / 112 + 1)
        self.assertAlmostEqual(annotator.tgraph.nodes["classB2"].fs[3],
                               -35.0 / 52 * 52.0 / 112 + 1)
        self.assertAlmostEqual(annotator.tgraph.nodes["classB1"].fs[3],
                               -52.0 / 112 + 1)
        self.assertAlmostEqual(annotator.tgraph.nodes["Thing"].fs[3], 0)
        # fs 1
        Ls = annotator.tgraph.nodes["classX"].Ls
        self.assertAlmostEqual(annotator.tgraph.nodes["classX"].fs[1],
                               math.sqrt(1 - Ls * Ls))
        Ls = annotator.tgraph.nodes["classA3"].Ls
        self.assertAlmostEqual(annotator.tgraph.nodes["classA3"].fs[1],
                               math.sqrt(1 - Ls * Ls))
        Ls = annotator.tgraph.nodes["classA2"].Ls
        self.assertAlmostEqual(annotator.tgraph.nodes["classA2"].fs[1],
                               math.sqrt(1 - Ls * Ls))
        Ls = annotator.tgraph.nodes["classA1"].Ls
        self.assertAlmostEqual(annotator.tgraph.nodes["classA1"].fs[1],
                               math.sqrt(1 - Ls * Ls))
        Ls = annotator.tgraph.nodes["classB2"].Ls
        self.assertAlmostEqual(annotator.tgraph.nodes["classB2"].fs[1],
                               math.sqrt(1 - Ls * Ls))
        Ls = annotator.tgraph.nodes["classB1"].Ls
        self.assertAlmostEqual(annotator.tgraph.nodes["classB1"].fs[1],
                               math.sqrt(1 - Ls * Ls))
        self.assertAlmostEqual(annotator.tgraph.nodes["Thing"].fs[1], 0)
        # fs 2
        Ls = annotator.tgraph.nodes["classX"].Ls
        self.assertAlmostEqual(annotator.tgraph.nodes["classX"].fs[2],
                               -1 * Ls * Ls + 1)
        Ls = annotator.tgraph.nodes["classA3"].Ls
        self.assertAlmostEqual(annotator.tgraph.nodes["classA3"].fs[2],
                               -1 * Ls * Ls + 1)
        Ls = annotator.tgraph.nodes["classA2"].Ls
        self.assertAlmostEqual(annotator.tgraph.nodes["classA2"].fs[2],
                               -1 * Ls * Ls + 1)
        Ls = annotator.tgraph.nodes["classA1"].Ls
        self.assertAlmostEqual(annotator.tgraph.nodes["classA1"].fs[2],
                               -1 * Ls * Ls + 1)
        Ls = annotator.tgraph.nodes["classB2"].Ls
        self.assertAlmostEqual(annotator.tgraph.nodes["classB2"].fs[2],
                               -1 * Ls * Ls + 1)
        Ls = annotator.tgraph.nodes["classB1"].Ls
        self.assertAlmostEqual(annotator.tgraph.nodes["classB1"].fs[2],
                               -1 * Ls * Ls + 1)
        self.assertAlmostEqual(annotator.tgraph.nodes["Thing"].fs[2], 0)
        # fs 4
        Ls = annotator.tgraph.nodes["classX"].Ls
        self.assertAlmostEqual(annotator.tgraph.nodes["classX"].fs[4],
                               1 - math.sqrt(Ls))
        Ls = annotator.tgraph.nodes["classA3"].Ls
        self.assertAlmostEqual(annotator.tgraph.nodes["classA3"].fs[4],
                               1 - math.sqrt(Ls))
        Ls = annotator.tgraph.nodes["classA2"].Ls
        self.assertAlmostEqual(annotator.tgraph.nodes["classA2"].fs[4],
                               1 - math.sqrt(Ls))
        Ls = annotator.tgraph.nodes["classA1"].Ls
        self.assertAlmostEqual(annotator.tgraph.nodes["classA1"].fs[4],
                               1 - math.sqrt(Ls))
        Ls = annotator.tgraph.nodes["classB2"].Ls
        self.assertAlmostEqual(annotator.tgraph.nodes["classB2"].fs[4],
                               1 - math.sqrt(Ls))
        Ls = annotator.tgraph.nodes["classB1"].Ls
        self.assertAlmostEqual(annotator.tgraph.nodes["classB1"].fs[4],
                               1 - math.sqrt(Ls))
        self.assertAlmostEqual(annotator.tgraph.nodes["Thing"].fs[4], 0)
        # fs 5
        Ls = annotator.tgraph.nodes["classX"].Ls
        self.assertAlmostEqual(annotator.tgraph.nodes["classX"].fs[5],
                               (1 - math.sqrt(Ls))**2)
        Ls = annotator.tgraph.nodes["classA3"].Ls
        self.assertAlmostEqual(annotator.tgraph.nodes["classA3"].fs[5],
                               (1 - math.sqrt(Ls))**2)
        Ls = annotator.tgraph.nodes["classA2"].Ls
        self.assertAlmostEqual(annotator.tgraph.nodes["classA2"].fs[5],
                               (1 - math.sqrt(Ls))**2)
        Ls = annotator.tgraph.nodes["classA1"].Ls
        self.assertAlmostEqual(annotator.tgraph.nodes["classA1"].fs[5],
                               (1 - math.sqrt(Ls))**2)
        Ls = annotator.tgraph.nodes["classB2"].Ls
        self.assertAlmostEqual(annotator.tgraph.nodes["classB2"].fs[5],
                               (1 - math.sqrt(Ls))**2)
        Ls = annotator.tgraph.nodes["classB1"].Ls
        self.assertAlmostEqual(annotator.tgraph.nodes["classB1"].fs[5],
                               (1 - math.sqrt(Ls))**2)
        self.assertAlmostEqual(annotator.tgraph.nodes["Thing"].fs[5], 0)
示例#8
0
class ExperimentBase:
    def __init__(self, log_fname=None, title_case=False):
        self.annotator = Annotator(endpoint=ENDPOINT,
                                   title_case=title_case,
                                   class_prefs=[
                                       "http://dbpedia.org/ontology/",
                                       "http://www.w3.org/2002/07/owl#Thing"
                                   ])
        self.not_founds = []
        self.total_processed = 0
        self.col_id = None
        self.fpath = None
        self.k = dict()
        self.log_fname = log_fname
        if log_fname:
            f = open(log_fname, "w")
            f.write(",".join(["fname", "colid", "fs", "alpha", "k"]) + "\n")
            f.close()
        for i in range(1, 6):
            self.k[i] = dict()

    def append_line(self, line):
        if self.log_fname:
            f = open(self.log_fname, "a")
            f.write(line + "\n")
            f.close()

    def clear(self):
        self.not_founds = []
        self.fpath = None
        self.col_id = None
        self.k = dict()
        for i in range(1, 6):
            self.k[i] = dict()

    def annotate_single(self, fpath, col_id):
        self.annotator.clear_for_reuse()
        self.fpath = fpath
        self.col_id = col_id
        self.annotator.annotate_table(file_dir=fpath, subject_col_id=col_id)

    def validate_with_opt_alpha(self, correct_candidates, alpha_inc=0.001):
        print("\n")
        print("fpath: %s - col id: %d" % (self.fpath, self.col_id))
        for i in range(1, 6):
            self.validate_with_opt_alpha_fsid(correct_candidates,
                                              fsid=i,
                                              alpha_inc=alpha_inc)

    def validate_with_opt_alpha_fsid(self,
                                     correct_candidates,
                                     fsid,
                                     alpha_inc=0.001):
        alpha = 0.0
        kmin = None
        max_alpha = None
        while alpha < 1:
            alpha += alpha_inc
            self.annotator.compute_f(alpha)
            candidates = self.annotator.get_top_k(fsid=fsid)

            if len(candidates) == 0:
                if self.fpath not in self.not_founds:
                    self.not_founds.append(self.fpath)
                return
            for idx, c in enumerate(candidates):
                k = idx + 1
                if c in correct_candidates:
                    if kmin is None or k < kmin:
                        kmin = k
                        max_alpha = alpha
                    if k == 1:
                        self.append_line(",".join([
                            self.fpath.split("/")[-1],
                            str(self.col_id),
                            str(fsid),
                            str(max_alpha),
                            str(kmin)
                        ]))
                        # self.append_line(",".join([self.fpath.split("/")[-1], str(fsid), str(max_alpha), str(kmin)]))
                        print("candidates (%d): %s" %
                              (fsid, str(candidates[:3])))
                        self.k[fsid][self.fpath + str(self.col_id)] = k
                        return
        # Ahmad check case
        # if kmin is None:
        #     self.not_founds.append(self.fpath)
        #     return
        if kmin is None:
            print("Special case: " + self.fpath)
            kmin = 999

        self.k[fsid][self.fpath + str(self.col_id)] = kmin
        self.append_line(",".join([
            self.fpath.split("/")[-1],
            str(self.col_id),
            str(fsid),
            str(max_alpha),
            str(kmin)
        ]))
        print("max alpha: %s (fs%d)" % (str(max_alpha), fsid))

    def get_scores(self, k):
        for i in range(1, 6):
            self.get_scores_fsid(k, i)
        total_processed = len(self.not_founds) + len(self.k[1])
        print("total processed: %d\n\n" % total_processed)
        self.total_processed = total_processed

    def get_scores_fsid(self, k, fsid):
        corr = 0.0
        incorr = 0.0
        notf = len(self.not_founds)
        for f in self.k[fsid]:
            if self.k[fsid][f] <= k:
                corr += 1
            else:
                incorr += 1
        if (corr + incorr) > 0:
            prec = corr / (corr + incorr)
        else:
            prec = 0
        if (corr + notf) > 0:
            rec = corr / (corr + notf)
        else:
            rec = 0
        if (prec + rec) > 0:
            f1 = 2.0 * prec * rec / (prec + rec)
        else:
            f1 = 0
        print("fsid: %d" % fsid)
        print("precision: %.2f" % prec)
        print("recall: %.2f" % rec)
        print("F1: %.2f" % f1)
        return prec, rec, f1