Пример #1
0
 def input_edge(self, edge):
     if (not edge.is_atom() and
             len(edge) == 2 and
             edge[0].is_atom() and
             edge[0].root() == 'the' and
             has_proper_concept(edge[1])):
         make_corefs(self.hg, edge, edge[1])
         self.corefs += 1
Пример #2
0
    def test_connect_coref_sets(self):
        concepts = self.concepts

        # paris set
        make_corefs(self.hg, concepts[0], concepts[1])
        make_corefs(self.hg, concepts[1], concepts[2])

        # berlin set
        make_corefs(self.hg, concepts[3], concepts[4])
        make_corefs(self.hg, concepts[4], concepts[5])

        self.assertTrue(are_corefs(self.hg, concepts[0], concepts[1]))
        self.assertTrue(are_corefs(self.hg, concepts[0], concepts[2]))
        self.assertEqual(coref_id(self.hg, concepts[0]),
                         coref_id(self.hg, concepts[1]))
        self.assertEqual(coref_id(self.hg, concepts[0]),
                         coref_id(self.hg, concepts[2]))
        self.assertIsNotNone(coref_id(self.hg, concepts[0]))
        self.assertIsNotNone(coref_id(self.hg, concepts[1]))
        self.assertIsNotNone(coref_id(self.hg, concepts[2]))
        self.assertEqual(coref_set(self.hg, concepts[0]),
                         {concepts[0], concepts[1], concepts[2]})

        self.assertTrue(are_corefs(self.hg, concepts[3], concepts[4]))
        self.assertTrue(are_corefs(self.hg, concepts[3], concepts[5]))
        self.assertEqual(coref_id(self.hg, concepts[3]),
                         coref_id(self.hg, concepts[4]))
        self.assertEqual(coref_id(self.hg, concepts[3]),
                         coref_id(self.hg, concepts[5]))
        self.assertIsNotNone(coref_id(self.hg, concepts[3]))
        self.assertIsNotNone(coref_id(self.hg, concepts[4]))
        self.assertIsNotNone(coref_id(self.hg, concepts[5]))
        self.assertEqual(coref_set(self.hg, concepts[3]),
                         {concepts[3], concepts[4], concepts[5]})

        self.assertFalse(are_corefs(self.hg, concepts[0], concepts[4]))
        self.assertFalse(are_corefs(self.hg, concepts[1], concepts[5]))
        self.assertNotEqual(coref_id(self.hg, concepts[0]),
                            coref_id(self.hg, concepts[4]))
        self.assertNotEqual(coref_id(self.hg, concepts[1]),
                            coref_id(self.hg, concepts[5]))

        # connect both
        make_corefs(self.hg, concepts[0], concepts[5])

        self.assertTrue(are_corefs(self.hg, concepts[0], concepts[4]))
        self.assertTrue(are_corefs(self.hg, concepts[1], concepts[5]))
        self.assertEqual(coref_id(self.hg, concepts[0]),
                         coref_id(self.hg, concepts[4]))
        self.assertEqual(coref_id(self.hg, concepts[1]),
                         coref_id(self.hg, concepts[5]))
        for concept in concepts:
            self.assertEqual(coref_set(self.hg, concept), set(concepts))
Пример #3
0
    def test_make_2_corefs(self):
        concepts = self.concepts

        make_corefs(self.hg, concepts[0], concepts[1])

        self.assertTrue(are_corefs(self.hg, concepts[0], concepts[1]))
        self.assertFalse(are_corefs(self.hg, concepts[0], concepts[2]))
        self.assertEqual(coref_id(self.hg, concepts[0]),
                         coref_id(self.hg, concepts[1]))
        self.assertIsNotNone(coref_id(self.hg, concepts[0]))
        self.assertIsNone(coref_id(self.hg, concepts[2]))
        self.assertEqual(coref_set(self.hg, concepts[0]),
                         {concepts[0], concepts[1]})
Пример #4
0
    def test_main_coref(self):
        concepts = self.concepts

        self.assertEqual(main_coref(self.hg, concepts[0]), concepts[0])
        self.assertEqual(main_coref(self.hg, concepts[1]), concepts[1])
        self.assertEqual(main_coref(self.hg, concepts[2]), concepts[2])

        make_corefs(self.hg, concepts[0], concepts[1])
        make_corefs(self.hg, concepts[1], concepts[2])

        self.assertEqual(main_coref(self.hg, concepts[0]), concepts[1])
        self.assertEqual(main_coref(self.hg, concepts[1]), concepts[1])
        self.assertEqual(main_coref(self.hg, concepts[2]), concepts[1])
Пример #5
0
    def input_edge(self, edge):
        if edge.type()[0] == 'c':
            subs = tuple(subtypes(self.hg, edge))

            # check if the concept should be assigned to a synonym set
            if len(subs) > 0:
                # find set with the highest degree and normalize set
                # degrees by total degree
                sub_degs = [self.hg.degree(sub) for sub in subs]
                total_deg = sum(sub_degs)
                total_deg = 1 if total_deg == 0 else total_deg
                sub_ratios = [sub_deg / total_deg for sub_deg in sub_degs]
                max_ratio = 0.
                best_pos = -1
                for pos, ratio in enumerate(sub_ratios):
                    if ratio > max_ratio:
                        max_ratio = ratio
                        best_pos = pos

                # compute some degree-related metrics
                sdd = self.hg.deep_degree(subs[best_pos])
                _, rdd = self.hg.root_degrees(edge)
                sub_to_root_dd = \
                    0. if rdd == 0 else float(sdd) / float(rdd)
                d = self.hg.degree(edge)
                dd = self.hg.deep_degree(edge)
                r = float(d) / float(dd)
                ld, ldd = self.hg.lemma_degrees(edge)
                lr = float(ld) / float(ldd)

                # use metric to decide
                if (rdd > 5 and max_ratio >= .7 and r >= .05 and
                        lr >= .05 and sub_to_root_dd >= .1 and
                        (not edge.is_atom() or len(edge.root()) > 2)):

                    make_corefs(self.hg, edge, subs[best_pos])
                    self.corefs += 1
Пример #6
0
 def input_edge(self, edge):
     uedge = unidecode_edge(edge)
     if uedge != edge and self.hg.exists(uedge):
         make_corefs(self.hg, edge, uedge)
         self.corefs += 1