示例#1
0
 def setUp(self):
     self.the_cat = examples_dmrs.the_cat().convert_to(
         abstractSortDictDmrs(node_key=span_pred_key))
     # Checks if the matching code converts to SortDictDmrs with span_pred_key
     self.the_cat_chases_the_dog = examples_dmrs.the_cat_chases_the_dog().convert_to(
         abstractSortDictDmrs(node_key=span_pred_key))
     self.the_dog_chases_the_cat = examples_dmrs.the_dog_chases_the_cat().convert_to(
         abstractSortDictDmrs(node_key=span_pred_key))
示例#2
0
 def setUp(self):
     self.the_cat = examples_dmrs.the_cat().convert_to(
         abstractSortDictDmrs(node_key=span_pred_key))
     # Checks if the matching code converts to SortDictDmrs with span_pred_key
     self.the_cat_chases_the_dog = examples_dmrs.the_cat_chases_the_dog(
     ).convert_to(abstractSortDictDmrs(node_key=span_pred_key))
     self.the_dog_chases_the_cat = examples_dmrs.the_dog_chases_the_cat(
     ).convert_to(abstractSortDictDmrs(node_key=span_pred_key))
     self.the_mouse = examples_dmrs.the_mouse() \
         .convert_to(abstractSortDictDmrs(node_key=span_pred_key))
     self.dog_cat = examples_dmrs.dog_cat() \
         .convert_to(abstractSortDictDmrs(node_key=span_pred_key))
示例#3
0
def get_matching_nodeids(small_dmrs, large_dmrs, all_surface=False, large_excluded=None):
    """ Finds matching pairs of nodeids between small_dmrs and large_dmrs. Starts by matching all
        nodes but quantifiers, then matches quantifiers for nouns with matches.
        :param small_dmrs A DMRS object used as a match query,
        :param large_dmrs A DMRS object to be searched for a match.
        :param all_surface If true, include all nodes from the aligned surface region.
                           If false, find only the nodes with equivalents in small_dmrs.
        :param large_excluded The nodeids from the large DMRS to be ignored during matching.

        :return A list of lists of matched nodeid pairs (small_dmrs nodeid, large_dmrs nodeid).
                A list of lists, in case more than one best match found.
    """
    # Convert DMRSs to SortDictDmrs with span_pred_key node if needed.
    if not isinstance(small_dmrs, SortDictDmrs) or (small_dmrs.node_key != span_pred_key):
        small_dmrs = small_dmrs.convert_to(abstractSortDictDmrs(node_key=span_pred_key))
    if not isinstance(large_dmrs, SortDictDmrs) or (large_dmrs.node_key != span_pred_key):
        large_dmrs = large_dmrs.convert_to(abstractSortDictDmrs(node_key=span_pred_key))

    # Filter quantifiers.
    small_no_qs = [n for n in small_dmrs.nodes if not small_dmrs.is_quantifier(n.nodeid)]
    large_no_qs = [n for n in large_dmrs.nodes if not large_dmrs.is_quantifier(n.nodeid)]
    # Filter compound_name and compund predicates.
    filtered_pred = ['compound', 'compound_name']
    filtered_small = [n for n in small_no_qs if str(n.pred) not in filtered_pred]
    filtered_large = [n for n in large_no_qs if str(n.pred) not in filtered_pred]

    longest_matches = match_nodes(filtered_small, filtered_large,
                                  excluded=large_excluded)  # list of lists of nodeid pairs
    add_quantifier_matches(small_dmrs, large_dmrs, longest_matches)
    add_compound_matches(small_dmrs, large_dmrs, longest_matches, filtered_pred)
    max_len = len(max(longest_matches, key=len)) if longest_matches else 0
    longest_matches = [m for m in longest_matches if len(m) == max_len]
    # Returned in reverse span_pred_key order.
    all_matched_nodeids = []
    for match in longest_matches:
        matched_large_nodeids = list(reversed((list(zip(*match))[1])))  # span_pred_key order

        if all_surface:
            extra_overlap_nodeids = find_extra_surface_nodeids(matched_large_nodeids,
                                                               large_dmrs)
            match.extend([(None, nodeid) for nodeid in extra_overlap_nodeids])
        all_matched_nodeids.append(match)

    return all_matched_nodeids
示例#4
0
    def test_get_matching_nodeids(self):
        # Match "the cat" onto "the dog chases the cat" (exact fit)
        matches1 = aligned_matching.get_matching_nodeids(self.the_cat, self.the_dog_chases_the_cat)
        self.assertEqual(len(matches1), 2)
        self.assertCountEqual(matches1[0], [(2, 5), (1, 1)])
        self.assertCountEqual(matches1[1], [(2, 5), (1, 4)])

        # all_surface = True
        all_matches1 = aligned_matching.get_matching_nodeids(self.the_cat,
                                                             self.the_dog_chases_the_cat,
                                                             all_surface=True)
        self.assertListEqual(matches1[1], all_matches1[1])
        # Extra surface nodes
        self.assertCountEqual(all_matches1[0], [(2, 5), (1, 1), (None, 2), (None, 3), (None, 4)])

        # Match "the dog chases the cat" onto "the cat chases the dog" (inexact fit)
        matches2 = aligned_matching.get_matching_nodeids(self.the_dog_chases_the_cat,
                                                         self.the_cat_chases_the_dog)
        self.assertEqual(len(matches2), 1)
        self.assertCountEqual(matches2[0], [(4, 4), (3, 3), (1, 1)])
        all_matches2 = aligned_matching.get_matching_nodeids(self.the_dog_chases_the_cat,
                                                             self.the_cat_chases_the_dog,
                                                             all_surface=True)
        self.assertEqual(len(all_matches2), 1)
        self.assertCountEqual(all_matches2[0], [(4, 4), (3, 3), (1, 1), (None, 2)])

        # No match found
        the_mouse = examples_dmrs.the_mouse() \
            .convert_to(abstractSortDictDmrs(node_key=span_pred_key))
        dog_cat = examples_dmrs.dog_cat() \
            .convert_to(abstractSortDictDmrs(node_key=span_pred_key))
        matches = aligned_matching.get_matching_nodeids(the_mouse, dog_cat)
        self.assertListEqual(matches, [])

        # Should be the same as 'the cat'.
        mixed_cat = ListDmrs(surface='the cat')
        mixed_cat.add_node(Node(nodeid=2, pred=RealPred('cat', 'n', '1'), cfrom=4, cto=7,
                                sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')))
        mixed_cat.add_node(Node(nodeid=1, pred=RealPred('the', 'q'), cfrom=0, cto=3))
        mixed_cat.add_link(Link(start=1, end=2, rargname='RSTR', post='H'))
        mixed = aligned_matching.get_matching_nodeids(mixed_cat, self.the_dog_chases_the_cat)
        self.assertListEqual(mixed, matches1)
示例#5
0
def get_matching_nodeids(small_dmrs, large_dmrs, all_surface=False, large_excluded=None):
    """ Finds matching pairs of nodeids between small_dmrs and large_dmrs. Starts by matching all
        nodes but quantifiers, then matches quantifiers for nouns with matches.
        :param small_dmrs A DMRS object used as a match query,
        :param large_dmrs A DMRS object to be searched for a match.
        :param all_surface If true, include all nodes from the aligned surface region.
                           If false, find only the nodes with equivalents in small_dmrs.
        :param large_excluded The nodeids from the large DMRS to be ignored during matching.

        :return A list of lists of matched nodeid pairs (small_dmrs nodeid, large_dmrs nodeid).
                A list of lists, in case more than one best match found.
    """
    # Convert DMRSs to SortDictDmrs with span_pred_key node if needed.
    if not isinstance(small_dmrs, SortDictDmrs) or (small_dmrs.node_key != span_pred_key):
        small_dmrs = small_dmrs.convert_to(abstractSortDictDmrs(node_key=span_pred_key))
    if not isinstance(large_dmrs, SortDictDmrs) or (large_dmrs.node_key != span_pred_key):
        large_dmrs = large_dmrs.convert_to(abstractSortDictDmrs(node_key=span_pred_key))

    # Filter quantifiers.
    small_no_qs = [n for n in small_dmrs.nodes if not small_dmrs.is_quantifier(n.nodeid)]
    large_no_qs = [n for n in large_dmrs.nodes if not large_dmrs.is_quantifier(n.nodeid)]

    longest_matches = match_nodes(small_no_qs, large_no_qs,
                                  excluded=large_excluded)  # list of lists of nodeid pairs
    add_quantifier_matches(small_dmrs, large_dmrs, longest_matches)
    max_len = len(max(longest_matches, key=len)) if longest_matches else 0
    longest_matches = [m for m in longest_matches if len(m) == max_len]
    # Returned in reverse span_pred_key order.
    all_matched_nodeids = []
    for match in longest_matches:
        matched_large_nodeids = list(reversed((list(zip(*match))[1])))  # span_pred_key order

        if all_surface:
            extra_overlap_nodeids = find_extra_surface_nodeids(matched_large_nodeids,
                                                               large_dmrs)
            match.extend([(None, nodeid) for nodeid in extra_overlap_nodeids])
        all_matched_nodeids.append(match)

    return all_matched_nodeids