示例#1
0
 def test_Node_isgpred_realpred_node(self):
     gnode = Node(pred='the_q')
     realnode = Node(pred='_cat_n')
     self.assertTrue(gnode.is_gpred_node)
     self.assertTrue(realnode.is_realpred_node)
     self.assertFalse(gnode.is_realpred_node)
     self.assertFalse(realnode.is_gpred_node)
示例#2
0
 def test_Node_str(self):
     node = Node()
     self.assertEqual(str(node), "None")
     node = Node(nodeid=2,
                 pred='_dog_n_1',
                 sortinfo=dict(cvarsort='i', pers='3', num='sg', ind='+'),
                 carg='Pat')
     self.assertEqual(str(node), '_dog_n_1(Pat) x[pers=3, num=sg, ind=+]')
示例#3
0
def the_dog_chases_the_cat_and_the_mouse():
    return DictDmrs(nodes=[
        Node(nodeid=1, pred=RealPred('the', 'q')),
        Node(nodeid=2,
             pred=RealPred('dog', 'n', '1'),
             sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')),
        Node(nodeid=3,
             pred=RealPred('chase', 'v', '1'),
             sortinfo=EventSortinfo(sf='prop', tense='pres',
                                    mood='indicative')),
        Node(nodeid=4, pred=RealPred('the', 'q')),
        Node(nodeid=5,
             pred=RealPred('cat', 'n', '1'),
             sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')),
        Node(nodeid=6, pred=GPred('udef_q')),
        Node(nodeid=7,
             pred=RealPred('and', 'c'),
             sortinfo=InstanceSortinfo(pers='3', num='pl')),
        Node(nodeid=8, pred=RealPred('the', 'q')),
        Node(nodeid=9,
             pred=RealPred('mouse', 'n', '1'),
             sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+'))
    ],
                    links=[
                        Link(start=1, end=2, rargname='RSTR', post='H'),
                        Link(start=3, end=2, rargname='ARG1', post='NEQ'),
                        Link(start=3, end=7, rargname='ARG2', post='NEQ'),
                        Link(start=4, end=5, rargname='RSTR', post='H'),
                        Link(start=6, end=7, rargname='RSTR', post='H'),
                        Link(start=7, end=5, rargname='L-INDEX', post='NEQ'),
                        Link(start=7, end=9, rargname='R-INDEX', post='NEQ'),
                        Link(start=8, end=9, rargname='RSTR', post='H')
                    ],
                    index=3,
                    top=3)
示例#4
0
def the_mouse():
    dmrs = DictDmrs(surface='the mouse')
    dmrs.add_node(Node(nodeid=1, pred=RealPred('the', 'q'), cfrom=0, cto=3))
    dmrs.add_node(
        Node(nodeid=2,
             pred=RealPred('mouse', 'n', '1'),
             cfrom=4,
             cto=9,
             sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')))
    dmrs.add_link(Link(start=1, end=2, rargname='RSTR', post='H'))
    return dmrs
示例#5
0
def dog_cat():
    dmrs = DictDmrs(surface='dog cat')
    dmrs.add_node(
        Node(pred=RealPred('dog', 'n', '1'),
             cfrom=0,
             cto=3,
             sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')))
    dmrs.add_node(
        Node(pred=RealPred('cat', 'n', '1'),
             cfrom=4,
             cto=7,
             sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')))
    return dmrs
示例#6
0
    def test_Node_init(self):
        node = Node(
            nodeid=13,
            pred='the_q',
            surface='cat',
            base='x',
            cfrom=23,
            cto=27,
            carg='Kim',
        )
        self.assertEqual(node.nodeid, 13)
        self.assertEqual(node.surface, 'cat')
        self.assertEqual(node.base, 'x')

        self.assertEqual(node.cfrom, 23)
        self.assertEqual(node.cto, 27)
        # Incorrect span
        with self.assertRaises(PydmrsValueError):
            Node(cfrom=22, cto=7)

        self.assertEqual(node.carg, 'Kim')
        # Fix carg with  "".
        self.assertEqual(Node(carg='"Kim"').carg, 'Kim')
        # Unaccounted " in carg
        with self.assertRaises(PydmrsValueError):
            Node(carg='"Kim')

        # String pred.
        self.assertEqual(node.pred, GPred('the_q'))
        # Other pred
        self.assertEqual(Node(pred=GPred('the_q')).pred, GPred('the_q'))

        # Allow None for sortinfo.
        self.assertEqual(Node().sortinfo, None)
        # Dict sortinfo
        self.assertEqual(
            Node(sortinfo={
                'cvarsort': 'i',
                'pers': '3'
            }).sortinfo, InstanceSortinfo(pers='3'))
        # Sortinfo sortinfo
        self.assertEqual(
            Node(sortinfo=InstanceSortinfo(pers='3')).sortinfo,
            InstanceSortinfo(pers='3'))
        # List sortinfo
        self.assertEqual(
            Node(sortinfo=[('cvarsort', 'i'), ('pers', '3')]).sortinfo,
            InstanceSortinfo(pers='3'))
        # But nothing else.
        with self.assertRaises(PydmrsTypeError):
            Node(sortinfo="x[pers=3, num=sg, ind=+]")
示例#7
0
 def test_get_matched_subgraph(self):
     match = general_matching.find_best_matches(self.cat_dmrs,
                                                self.small_dmrs)[0]
     subgraph = general_matching.get_matched_subgraph(
         self.small_dmrs, match)
     expected = DictDmrs(
         nodes=[
             Node(nodeid=4, pred=RealPred('the', 'q')),
             Node(nodeid=5,
                  pred=RealPred('cat', 'n', '1'),
                  sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+'))
         ],
         links=[Link(start=4, end=5, rargname='RSTR', post='H')])
     self.assertListEqual(subgraph.nodes, expected.nodes)
     self.assertListEqual(subgraph.links, expected.links)
示例#8
0
    def test_get_matching_nodeids(self):
        # Match "the cat" onto "the dog chases the cat" (exact fit, only one match)
        matches1 = aligned_matching.get_matching_nodeids(
            self.the_cat, self.the_dog_chases_the_cat)
        self.assertEqual(len(matches1), 1)
        self.assertCountEqual(matches1[0], [(2, 5), (1, 4)])

        # all_surface = True
        all_matches1 = aligned_matching.get_matching_nodeids(
            self.the_cat, self.the_dog_chases_the_cat, all_surface=True)
        # The same as earlier
        self.assertListEqual(matches1[0], all_matches1[0])
        # Extra surface nodes: between dog and cat

        all_matches1 = aligned_matching.get_matching_nodeids(
            self.dog_cat, self.the_dog_chases_the_cat, all_surface=True)
        self.assertCountEqual(all_matches1[0], [(2, 5), (1, 2), (None, 3),
                                                (None, 4)])

        # Match "the dog chases the cat" onto "the cat chases the dog" (inexact fit)
        matches2 = aligned_matching.get_matching_nodeids(
            self.the_dog_chases_the_cat, self.the_cat_chases_the_dog)
        # Two options: "the dog" matches or "the cat" matches, 'chases' doesn't because it's not part of the longest match
        self.assertEqual(len(matches2), 2)
        self.assertCountEqual(matches2, [[(5, 2), (4, 1)], [(2, 5), (1, 4)]])

        # No match found
        matches = aligned_matching.get_matching_nodeids(
            self.the_mouse, self.dog_cat)
        self.assertListEqual(matches, [])

        # Should be the same as 'the cat'.
        mixed_cat = ListDmrs(surface='the cat')
        mixed_cat.add_node(
            Node(nodeid=2,
                 pred=RealPred('cat', 'n', '1'),
                 cfrom=4,
                 cto=7,
                 sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')))
        mixed_cat.add_node(
            Node(nodeid=1, pred=RealPred('the', 'q'), cfrom=0, cto=3))
        mixed_cat.add_link(Link(start=1, end=2, rargname='RSTR', post='H'))
        mixed = aligned_matching.get_matching_nodeids(
            mixed_cat, self.the_dog_chases_the_cat)
        self.assertListEqual(mixed, matches1)
示例#9
0
def the_cat_chases_the_dog():
    return DictDmrs(surface='the cat chases the dog',
                    nodes=[
                        Node(nodeid=1,
                             pred=RealPred('the', 'q'),
                             cfrom=0,
                             cto=3),
                        Node(nodeid=2,
                             pred=RealPred('cat', 'n', '1'),
                             cfrom=4,
                             cto=7,
                             sortinfo=InstanceSortinfo(pers='3',
                                                       num='sg',
                                                       ind='+')),
                        Node(nodeid=3,
                             pred=RealPred('chase', 'v', '1'),
                             cfrom=8,
                             cto=14,
                             sortinfo=EventSortinfo(sf='prop',
                                                    tense='pres',
                                                    mood='indicative')),
                        Node(nodeid=4,
                             pred=RealPred('the', 'q'),
                             cfrom=15,
                             cto=18),
                        Node(nodeid=5,
                             pred=RealPred('dog', 'n', '1'),
                             cfrom=19,
                             cto=22,
                             sortinfo=InstanceSortinfo(pers='3',
                                                       num='sg',
                                                       ind='+'))
                    ],
                    links=[
                        Link(start=1, end=2, rargname='RSTR', post='H'),
                        Link(start=3, end=2, rargname='ARG1', post='NEQ'),
                        Link(start=3, end=5, rargname='ARG2', post='NEQ'),
                        Link(start=4, end=5, rargname='RSTR', post='H')
                    ],
                    index=3,
                    top=3)
示例#10
0
def loads_xml(bytestring,
              encoding=None,
              cls=ListDmrs,
              convert_legacy_prontype=True,
              **kwargs):
    """
    Currently processes "<dmrs>...</dmrs>"
    To be updated for "<dmrslist>...</dmrslist>"...
    Expects a bytestring; to load from a string instead, specify encoding
    Produces a ListDmrs by default; for a different type, specify cls
    """
    if encoding:
        bytestring = bytestring.encode(encoding)
    xml = ET.XML(bytestring)

    dmrs = cls(**kwargs)

    dmrs.cfrom = int(xml.get('cfrom')) if 'cfrom' in xml.attrib else None
    dmrs.cto = int(xml.get('cto')) if 'cto' in xml.attrib else None
    dmrs.surface = xml.get('surface')
    dmrs.ident = int(xml.get('ident')) if 'ident' in xml.attrib else None
    index_id = int(xml.get('index')) if 'index' in xml.attrib else None
    top_id = None

    for elem in xml:
        if elem.tag == 'node':
            node = Node.from_xml(elem, convert_legacy_prontype)
            dmrs.add_node(node)

        elif elem.tag == 'link':
            link = Link.from_xml(elem)
            if link.start == 0:
                top_id = link.end
            else:
                dmrs.add_link(link)
        else:
            raise PydmrsValueError(elem.tag)

    if top_id:
        dmrs.top = dmrs[top_id]
    if index_id:
        dmrs.index = dmrs[index_id]

    return dmrs
示例#11
0
def loads_xml(bytestring, encoding=None, cls=ListDmrs, convert_legacy_prontype=True, **kwargs):
    """
    Currently processes "<dmrs>...</dmrs>"
    To be updated for "<dmrslist>...</dmrslist>"...
    Expects a bytestring; to load from a string instead, specify encoding
    Produces a ListDmrs by default; for a different type, specify cls
    """
    if encoding:
        bytestring = bytestring.encode(encoding)
    xml = ET.XML(bytestring)

    dmrs = cls(**kwargs)

    dmrs.cfrom = int(xml.get('cfrom')) if 'cfrom' in xml.attrib else None
    dmrs.cto = int(xml.get('cto')) if 'cto' in xml.attrib else None
    dmrs.surface = xml.get('surface')
    dmrs.ident = int(xml.get('ident')) if 'ident' in xml.attrib else None
    # top may be set as a graph attribute or as a link (see below)
    top_id = int(xml.get('top')) if 'top' in xml.attrib else None
    index_id = int(xml.get('index')) if 'index' in xml.attrib else None

    for elem in xml:
        if elem.tag == 'node':
            node = Node.from_xml(elem, convert_legacy_prontype)
            dmrs.add_node(node)

        elif elem.tag == 'link':
            link = Link.from_xml(elem)
            if link.start == 0:
                # this would overwrite any graph-level top attribute
                # (see above), but let's assume we won't encounter
                # both in the same graph
                top_id = link.end
            else:
                dmrs.add_link(link)
        else:
            raise PydmrsValueError(elem.tag)

    if top_id:
        dmrs.top = dmrs[top_id]
    if index_id:
        dmrs.index = dmrs[index_id]

    return dmrs
示例#12
0
    def test_Node_eq(self):
        # Unspecified nodes are always equal.
        node1 = Node()
        node2 = Node()
        self.assertEqual(node1, node2)

        sortinfo1 = {'cvarsort': 'e', 'tense': 'past'}
        sortinfo2 = {'cvarsort': 'e', 'tense': 'pres'}

        # Two nodes are equal if they have the same pred, sortinfo and carg,
        # even if all the other elements are different
        node1 = Node(nodeid=23,
                     pred='the_q',
                     sortinfo=sortinfo1,
                     cfrom=2,
                     cto=22,
                     carg='Kim',
                     surface='cat',
                     base='x')
        node2 = Node(nodeid=25,
                     pred='the_q',
                     sortinfo=sortinfo1,
                     cfrom=15,
                     carg='Kim',
                     surface='mad',
                     base='w')
        self.assertEqual(node1, node2)

        # Different carg
        node2 = Node(pred='the_q', sortinfo=sortinfo1, carg='Jane')
        self.assertNotEqual(node1, node2)

        # Different pred
        node2 = Node(pred='_smile_v', sortinfo=sortinfo1, carg='Kim')
        self.assertNotEqual(node1, node2)

        # Different sortinfo.
        node2 = Node(pred='_the_q', sortinfo=sortinfo2, carg='Kim')
        self.assertNotEqual(node1, node2)
示例#13
0
 def test_Node_span(self):
     node = Node(cfrom=2, cto=15)
     self.assertEqual(node.span, (2, 15))
示例#14
0
 def test_Node_underspecification(self):
     with self.assertRaises(TypeError):
         Node(pred='_the_q').is_more_specific(4)
     # complete underspecification
     self.assertFalse(Node().is_more_specific(Node()))
     self.assertFalse(Node().is_less_specific(Node()))
     # pred underspecification
     self.assertFalse(Node(pred=Pred()).is_more_specific(Node()))
     self.assertTrue(Node(pred=Pred()).is_less_specific(Node()))
     self.assertTrue(Node().is_more_specific(Node(pred=Pred())))
     self.assertFalse(Node().is_less_specific(Node(pred=Pred())))
     self.assertFalse(Node(pred=Pred()).is_more_specific(Node(pred=Pred())))
     self.assertFalse(Node(pred=Pred()).is_less_specific(Node(pred=Pred())))
     self.assertFalse(
         Node(pred=Pred()).is_more_specific(Node(pred=GPred(name='abc'))))
     self.assertTrue(
         Node(pred=Pred()).is_less_specific(Node(pred=GPred(name='abc'))))
     self.assertTrue(
         Node(pred=GPred(name='abc')).is_more_specific(Node(pred=Pred())))
     self.assertFalse(
         Node(pred=GPred(name='abc')).is_less_specific(Node(pred=Pred())))
     # carg underspecification
     self.assertFalse(Node(carg='?').is_more_specific(Node()))
     self.assertTrue(Node(carg='?').is_less_specific(Node()))
     self.assertTrue(Node().is_more_specific(Node(carg='?')))
     self.assertFalse(Node().is_less_specific(Node(carg='?')))
     self.assertFalse(Node(carg='?').is_more_specific(Node(carg='?')))
     self.assertFalse(Node(carg='?').is_less_specific(Node(carg='?')))
     self.assertFalse(Node(carg='?').is_more_specific(Node(carg='abc')))
     self.assertTrue(Node(carg='?').is_less_specific(Node(carg='abc')))
     self.assertTrue(Node(carg='abc').is_more_specific(Node(carg='?')))
     self.assertFalse(Node(carg='abc').is_less_specific(Node(carg='?')))
     # sortinfo underspecification
     self.assertFalse(Node(sortinfo=Sortinfo()).is_more_specific(Node()))
     self.assertTrue(Node(sortinfo=Sortinfo()).is_less_specific(Node()))
     self.assertTrue(Node().is_more_specific(Node(sortinfo=Sortinfo())))
     self.assertFalse(Node().is_less_specific(Node(sortinfo=Sortinfo())))
     self.assertFalse(
         Node(sortinfo=Sortinfo()).is_more_specific(
             Node(sortinfo=Sortinfo())))
     self.assertFalse(
         Node(sortinfo=Sortinfo()).is_less_specific(
             Node(sortinfo=Sortinfo())))
     self.assertFalse(
         Node(sortinfo=Sortinfo()).is_more_specific(
             Node(sortinfo=EventSortinfo(sf='abc'))))
     self.assertTrue(
         Node(sortinfo=Sortinfo()).is_less_specific(
             Node(sortinfo=EventSortinfo(sf='abc'))))
     self.assertTrue(
         Node(sortinfo=EventSortinfo(sf='abc')).is_more_specific(
             Node(sortinfo=Sortinfo())))
     self.assertFalse(
         Node(sortinfo=EventSortinfo(sf='abc')).is_less_specific(
             Node(sortinfo=Sortinfo())))
     # mixed specification
     self.assertFalse(Node(pred=Pred()).is_more_specific(Node(carg='?')))
     self.assertFalse(Node(pred=Pred()).is_less_specific(Node(carg='?')))
     self.assertFalse(
         Node(pred=Pred()).is_more_specific(Node(sortinfo=Sortinfo())))
     self.assertFalse(
         Node(pred=Pred()).is_less_specific(Node(sortinfo=Sortinfo())))
     self.assertFalse(
         Node(carg='?').is_more_specific(Node(sortinfo=Sortinfo())))
     self.assertFalse(
         Node(carg='?').is_less_specific(Node(sortinfo=Sortinfo())))
示例#15
0
def predsort():
    dmrs = DictDmrs()
    dmrs.add_node(
        Node(pred=Pred(),
             sortinfo=Sortinfo()))  # underspecified predicate and sortinfo
    return dmrs
示例#16
0
def noun():
    dmrs = DictDmrs()
    dmrs.add_node(
        Node(pred=RealPred('?', 'n', 'unknown'),
             sortinfo=Sortinfo()))  # underspecified noun and sortinfo
    return dmrs
示例#17
0
def _parse_node(string, nodeid, queries, equalities, anchors, sortinfo_classes,
                sortinfo_shortforms):
    m = string.find('(')
    if m < 0:
        m = string.find(' ')
    if m < 0:
        l = string.find(':')
    else:
        l = string.find(':', 0, m)
    if l < 0:
        ref_ids = None
        l = 0
    else:
        ref_ids = string[:l]
        l += 1
        while string[l] == ' ':
            l += 1
    if string[l:l + 4] == 'node' and (len(string) - l == 4
                                      or string[l + 4] in special_values):
        value = _parse_value(string[l + 4:], None, queries, equalities,
                             (lambda matching, dmrs: dmrs[matching[nodeid]]))
        assert not value
        pred = Pred()
        carg = '?'
        sortinfo = Sortinfo()
        ref_name = 'node'
    elif m < 0:
        pred, ref_name = _parse_pred(string[l:], nodeid, queries, equalities)
        carg = None
        sortinfo = None
    else:
        pred, ref_name = _parse_pred(string[l:m], nodeid, queries, equalities)
        if string[m] == '(':
            r = string.index(')', m)
            if string[m + 1] == '"' and string[r - 1] == '"':
                carg = string[m + 2:r - 1]
            else:
                carg = string[m + 1:r]
            assert '"' not in carg
            carg = _parse_value(
                carg, '?', queries, equalities,
                (lambda matching, dmrs: dmrs[matching[nodeid]].carg))
            m = r + 1
        else:
            carg = None
        if m < len(string) and string[m] == ' ':
            while string[m] == ' ':
                m += 1
            sortinfo = _parse_sortinfo(string[m:], nodeid, queries, equalities,
                                       sortinfo_classes, sortinfo_shortforms)
        else:
            sortinfo = None
    if not ref_ids:
        ref_ids = None
        node = Node(nodeid, pred, sortinfo=sortinfo, carg=carg)
    else:
        if ref_ids[0] == '[' and ref_ids[-1] == ']':
            ref_ids = ref_ids[1:-1].split(',')
            node = AnchorNode(anchors=ref_ids,
                              nodeid=nodeid,
                              pred=pred,
                              sortinfo=sortinfo,
                              carg=carg)
        elif ref_ids[0] == '(' and ref_ids[-1] == ')':
            ref_ids = ref_ids[1:-1].split(',')
            node = OptionalNode(anchors=ref_ids,
                                nodeid=nodeid,
                                pred=pred,
                                sortinfo=sortinfo,
                                carg=carg)
        elif ref_ids[0] == '{' and ref_ids[-1] == '}':
            ref_ids = ref_ids[1:-1].split(',')
            node = SubgraphNode(anchors=ref_ids,
                                nodeid=nodeid,
                                pred=pred,
                                sortinfo=sortinfo,
                                carg=carg)
        else:
            ref_ids = ref_ids.split(',')
            node = Node(nodeid=nodeid, pred=pred, sortinfo=sortinfo, carg=carg)
        for ref_id in ref_ids:
            assert ref_id not in anchors, 'Reference ids have to be unique.'
            anchors[ref_id] = node
    return node, ref_ids, ref_name
示例#18
0
def the():
    dmrs = DictDmrs()
    dmrs.add_node(Node(pred=RealPred('the', 'q')))  # node id set automatically
    return dmrs