def test_Node_isgpred_realpred_node(self): gnode = Node(pred='the_q') realnode = Node(pred='_cat_n') self.assertTrue(gnode.is_gpred_node) self.assertTrue(realnode.is_realpred_node) self.assertFalse(gnode.is_realpred_node) self.assertFalse(realnode.is_gpred_node)
def test_Node_str(self): node = Node() self.assertEqual(str(node), "None") node = Node(nodeid=2, pred='_dog_n_1', sortinfo=dict(cvarsort='i', pers='3', num='sg', ind='+'), carg='Pat') self.assertEqual(str(node), '_dog_n_1(Pat) x[pers=3, num=sg, ind=+]')
def the_dog_chases_the_cat_and_the_mouse(): return DictDmrs(nodes=[ Node(nodeid=1, pred=RealPred('the', 'q')), Node(nodeid=2, pred=RealPred('dog', 'n', '1'), sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')), Node(nodeid=3, pred=RealPred('chase', 'v', '1'), sortinfo=EventSortinfo(sf='prop', tense='pres', mood='indicative')), Node(nodeid=4, pred=RealPred('the', 'q')), Node(nodeid=5, pred=RealPred('cat', 'n', '1'), sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')), Node(nodeid=6, pred=GPred('udef_q')), Node(nodeid=7, pred=RealPred('and', 'c'), sortinfo=InstanceSortinfo(pers='3', num='pl')), Node(nodeid=8, pred=RealPred('the', 'q')), Node(nodeid=9, pred=RealPred('mouse', 'n', '1'), sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')) ], links=[ Link(start=1, end=2, rargname='RSTR', post='H'), Link(start=3, end=2, rargname='ARG1', post='NEQ'), Link(start=3, end=7, rargname='ARG2', post='NEQ'), Link(start=4, end=5, rargname='RSTR', post='H'), Link(start=6, end=7, rargname='RSTR', post='H'), Link(start=7, end=5, rargname='L-INDEX', post='NEQ'), Link(start=7, end=9, rargname='R-INDEX', post='NEQ'), Link(start=8, end=9, rargname='RSTR', post='H') ], index=3, top=3)
def the_mouse(): dmrs = DictDmrs(surface='the mouse') dmrs.add_node(Node(nodeid=1, pred=RealPred('the', 'q'), cfrom=0, cto=3)) dmrs.add_node( Node(nodeid=2, pred=RealPred('mouse', 'n', '1'), cfrom=4, cto=9, sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+'))) dmrs.add_link(Link(start=1, end=2, rargname='RSTR', post='H')) return dmrs
def dog_cat(): dmrs = DictDmrs(surface='dog cat') dmrs.add_node( Node(pred=RealPred('dog', 'n', '1'), cfrom=0, cto=3, sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+'))) dmrs.add_node( Node(pred=RealPred('cat', 'n', '1'), cfrom=4, cto=7, sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+'))) return dmrs
def test_Node_init(self): node = Node( nodeid=13, pred='the_q', surface='cat', base='x', cfrom=23, cto=27, carg='Kim', ) self.assertEqual(node.nodeid, 13) self.assertEqual(node.surface, 'cat') self.assertEqual(node.base, 'x') self.assertEqual(node.cfrom, 23) self.assertEqual(node.cto, 27) # Incorrect span with self.assertRaises(PydmrsValueError): Node(cfrom=22, cto=7) self.assertEqual(node.carg, 'Kim') # Fix carg with "". self.assertEqual(Node(carg='"Kim"').carg, 'Kim') # Unaccounted " in carg with self.assertRaises(PydmrsValueError): Node(carg='"Kim') # String pred. self.assertEqual(node.pred, GPred('the_q')) # Other pred self.assertEqual(Node(pred=GPred('the_q')).pred, GPred('the_q')) # Allow None for sortinfo. self.assertEqual(Node().sortinfo, None) # Dict sortinfo self.assertEqual( Node(sortinfo={ 'cvarsort': 'i', 'pers': '3' }).sortinfo, InstanceSortinfo(pers='3')) # Sortinfo sortinfo self.assertEqual( Node(sortinfo=InstanceSortinfo(pers='3')).sortinfo, InstanceSortinfo(pers='3')) # List sortinfo self.assertEqual( Node(sortinfo=[('cvarsort', 'i'), ('pers', '3')]).sortinfo, InstanceSortinfo(pers='3')) # But nothing else. with self.assertRaises(PydmrsTypeError): Node(sortinfo="x[pers=3, num=sg, ind=+]")
def test_get_matched_subgraph(self): match = general_matching.find_best_matches(self.cat_dmrs, self.small_dmrs)[0] subgraph = general_matching.get_matched_subgraph( self.small_dmrs, match) expected = DictDmrs( nodes=[ Node(nodeid=4, pred=RealPred('the', 'q')), Node(nodeid=5, pred=RealPred('cat', 'n', '1'), sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')) ], links=[Link(start=4, end=5, rargname='RSTR', post='H')]) self.assertListEqual(subgraph.nodes, expected.nodes) self.assertListEqual(subgraph.links, expected.links)
def test_get_matching_nodeids(self): # Match "the cat" onto "the dog chases the cat" (exact fit, only one match) matches1 = aligned_matching.get_matching_nodeids( self.the_cat, self.the_dog_chases_the_cat) self.assertEqual(len(matches1), 1) self.assertCountEqual(matches1[0], [(2, 5), (1, 4)]) # all_surface = True all_matches1 = aligned_matching.get_matching_nodeids( self.the_cat, self.the_dog_chases_the_cat, all_surface=True) # The same as earlier self.assertListEqual(matches1[0], all_matches1[0]) # Extra surface nodes: between dog and cat all_matches1 = aligned_matching.get_matching_nodeids( self.dog_cat, self.the_dog_chases_the_cat, all_surface=True) self.assertCountEqual(all_matches1[0], [(2, 5), (1, 2), (None, 3), (None, 4)]) # Match "the dog chases the cat" onto "the cat chases the dog" (inexact fit) matches2 = aligned_matching.get_matching_nodeids( self.the_dog_chases_the_cat, self.the_cat_chases_the_dog) # Two options: "the dog" matches or "the cat" matches, 'chases' doesn't because it's not part of the longest match self.assertEqual(len(matches2), 2) self.assertCountEqual(matches2, [[(5, 2), (4, 1)], [(2, 5), (1, 4)]]) # No match found matches = aligned_matching.get_matching_nodeids( self.the_mouse, self.dog_cat) self.assertListEqual(matches, []) # Should be the same as 'the cat'. mixed_cat = ListDmrs(surface='the cat') mixed_cat.add_node( Node(nodeid=2, pred=RealPred('cat', 'n', '1'), cfrom=4, cto=7, sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+'))) mixed_cat.add_node( Node(nodeid=1, pred=RealPred('the', 'q'), cfrom=0, cto=3)) mixed_cat.add_link(Link(start=1, end=2, rargname='RSTR', post='H')) mixed = aligned_matching.get_matching_nodeids( mixed_cat, self.the_dog_chases_the_cat) self.assertListEqual(mixed, matches1)
def the_cat_chases_the_dog(): return DictDmrs(surface='the cat chases the dog', nodes=[ Node(nodeid=1, pred=RealPred('the', 'q'), cfrom=0, cto=3), Node(nodeid=2, pred=RealPred('cat', 'n', '1'), cfrom=4, cto=7, sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')), Node(nodeid=3, pred=RealPred('chase', 'v', '1'), cfrom=8, cto=14, sortinfo=EventSortinfo(sf='prop', tense='pres', mood='indicative')), Node(nodeid=4, pred=RealPred('the', 'q'), cfrom=15, cto=18), Node(nodeid=5, pred=RealPred('dog', 'n', '1'), cfrom=19, cto=22, sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')) ], links=[ Link(start=1, end=2, rargname='RSTR', post='H'), Link(start=3, end=2, rargname='ARG1', post='NEQ'), Link(start=3, end=5, rargname='ARG2', post='NEQ'), Link(start=4, end=5, rargname='RSTR', post='H') ], index=3, top=3)
def loads_xml(bytestring, encoding=None, cls=ListDmrs, convert_legacy_prontype=True, **kwargs): """ Currently processes "<dmrs>...</dmrs>" To be updated for "<dmrslist>...</dmrslist>"... Expects a bytestring; to load from a string instead, specify encoding Produces a ListDmrs by default; for a different type, specify cls """ if encoding: bytestring = bytestring.encode(encoding) xml = ET.XML(bytestring) dmrs = cls(**kwargs) dmrs.cfrom = int(xml.get('cfrom')) if 'cfrom' in xml.attrib else None dmrs.cto = int(xml.get('cto')) if 'cto' in xml.attrib else None dmrs.surface = xml.get('surface') dmrs.ident = int(xml.get('ident')) if 'ident' in xml.attrib else None index_id = int(xml.get('index')) if 'index' in xml.attrib else None top_id = None for elem in xml: if elem.tag == 'node': node = Node.from_xml(elem, convert_legacy_prontype) dmrs.add_node(node) elif elem.tag == 'link': link = Link.from_xml(elem) if link.start == 0: top_id = link.end else: dmrs.add_link(link) else: raise PydmrsValueError(elem.tag) if top_id: dmrs.top = dmrs[top_id] if index_id: dmrs.index = dmrs[index_id] return dmrs
def loads_xml(bytestring, encoding=None, cls=ListDmrs, convert_legacy_prontype=True, **kwargs): """ Currently processes "<dmrs>...</dmrs>" To be updated for "<dmrslist>...</dmrslist>"... Expects a bytestring; to load from a string instead, specify encoding Produces a ListDmrs by default; for a different type, specify cls """ if encoding: bytestring = bytestring.encode(encoding) xml = ET.XML(bytestring) dmrs = cls(**kwargs) dmrs.cfrom = int(xml.get('cfrom')) if 'cfrom' in xml.attrib else None dmrs.cto = int(xml.get('cto')) if 'cto' in xml.attrib else None dmrs.surface = xml.get('surface') dmrs.ident = int(xml.get('ident')) if 'ident' in xml.attrib else None # top may be set as a graph attribute or as a link (see below) top_id = int(xml.get('top')) if 'top' in xml.attrib else None index_id = int(xml.get('index')) if 'index' in xml.attrib else None for elem in xml: if elem.tag == 'node': node = Node.from_xml(elem, convert_legacy_prontype) dmrs.add_node(node) elif elem.tag == 'link': link = Link.from_xml(elem) if link.start == 0: # this would overwrite any graph-level top attribute # (see above), but let's assume we won't encounter # both in the same graph top_id = link.end else: dmrs.add_link(link) else: raise PydmrsValueError(elem.tag) if top_id: dmrs.top = dmrs[top_id] if index_id: dmrs.index = dmrs[index_id] return dmrs
def test_Node_eq(self): # Unspecified nodes are always equal. node1 = Node() node2 = Node() self.assertEqual(node1, node2) sortinfo1 = {'cvarsort': 'e', 'tense': 'past'} sortinfo2 = {'cvarsort': 'e', 'tense': 'pres'} # Two nodes are equal if they have the same pred, sortinfo and carg, # even if all the other elements are different node1 = Node(nodeid=23, pred='the_q', sortinfo=sortinfo1, cfrom=2, cto=22, carg='Kim', surface='cat', base='x') node2 = Node(nodeid=25, pred='the_q', sortinfo=sortinfo1, cfrom=15, carg='Kim', surface='mad', base='w') self.assertEqual(node1, node2) # Different carg node2 = Node(pred='the_q', sortinfo=sortinfo1, carg='Jane') self.assertNotEqual(node1, node2) # Different pred node2 = Node(pred='_smile_v', sortinfo=sortinfo1, carg='Kim') self.assertNotEqual(node1, node2) # Different sortinfo. node2 = Node(pred='_the_q', sortinfo=sortinfo2, carg='Kim') self.assertNotEqual(node1, node2)
def test_Node_span(self): node = Node(cfrom=2, cto=15) self.assertEqual(node.span, (2, 15))
def test_Node_underspecification(self): with self.assertRaises(TypeError): Node(pred='_the_q').is_more_specific(4) # complete underspecification self.assertFalse(Node().is_more_specific(Node())) self.assertFalse(Node().is_less_specific(Node())) # pred underspecification self.assertFalse(Node(pred=Pred()).is_more_specific(Node())) self.assertTrue(Node(pred=Pred()).is_less_specific(Node())) self.assertTrue(Node().is_more_specific(Node(pred=Pred()))) self.assertFalse(Node().is_less_specific(Node(pred=Pred()))) self.assertFalse(Node(pred=Pred()).is_more_specific(Node(pred=Pred()))) self.assertFalse(Node(pred=Pred()).is_less_specific(Node(pred=Pred()))) self.assertFalse( Node(pred=Pred()).is_more_specific(Node(pred=GPred(name='abc')))) self.assertTrue( Node(pred=Pred()).is_less_specific(Node(pred=GPred(name='abc')))) self.assertTrue( Node(pred=GPred(name='abc')).is_more_specific(Node(pred=Pred()))) self.assertFalse( Node(pred=GPred(name='abc')).is_less_specific(Node(pred=Pred()))) # carg underspecification self.assertFalse(Node(carg='?').is_more_specific(Node())) self.assertTrue(Node(carg='?').is_less_specific(Node())) self.assertTrue(Node().is_more_specific(Node(carg='?'))) self.assertFalse(Node().is_less_specific(Node(carg='?'))) self.assertFalse(Node(carg='?').is_more_specific(Node(carg='?'))) self.assertFalse(Node(carg='?').is_less_specific(Node(carg='?'))) self.assertFalse(Node(carg='?').is_more_specific(Node(carg='abc'))) self.assertTrue(Node(carg='?').is_less_specific(Node(carg='abc'))) self.assertTrue(Node(carg='abc').is_more_specific(Node(carg='?'))) self.assertFalse(Node(carg='abc').is_less_specific(Node(carg='?'))) # sortinfo underspecification self.assertFalse(Node(sortinfo=Sortinfo()).is_more_specific(Node())) self.assertTrue(Node(sortinfo=Sortinfo()).is_less_specific(Node())) self.assertTrue(Node().is_more_specific(Node(sortinfo=Sortinfo()))) self.assertFalse(Node().is_less_specific(Node(sortinfo=Sortinfo()))) self.assertFalse( Node(sortinfo=Sortinfo()).is_more_specific( Node(sortinfo=Sortinfo()))) self.assertFalse( Node(sortinfo=Sortinfo()).is_less_specific( Node(sortinfo=Sortinfo()))) self.assertFalse( Node(sortinfo=Sortinfo()).is_more_specific( Node(sortinfo=EventSortinfo(sf='abc')))) self.assertTrue( Node(sortinfo=Sortinfo()).is_less_specific( Node(sortinfo=EventSortinfo(sf='abc')))) self.assertTrue( Node(sortinfo=EventSortinfo(sf='abc')).is_more_specific( Node(sortinfo=Sortinfo()))) self.assertFalse( Node(sortinfo=EventSortinfo(sf='abc')).is_less_specific( Node(sortinfo=Sortinfo()))) # mixed specification self.assertFalse(Node(pred=Pred()).is_more_specific(Node(carg='?'))) self.assertFalse(Node(pred=Pred()).is_less_specific(Node(carg='?'))) self.assertFalse( Node(pred=Pred()).is_more_specific(Node(sortinfo=Sortinfo()))) self.assertFalse( Node(pred=Pred()).is_less_specific(Node(sortinfo=Sortinfo()))) self.assertFalse( Node(carg='?').is_more_specific(Node(sortinfo=Sortinfo()))) self.assertFalse( Node(carg='?').is_less_specific(Node(sortinfo=Sortinfo())))
def predsort(): dmrs = DictDmrs() dmrs.add_node( Node(pred=Pred(), sortinfo=Sortinfo())) # underspecified predicate and sortinfo return dmrs
def noun(): dmrs = DictDmrs() dmrs.add_node( Node(pred=RealPred('?', 'n', 'unknown'), sortinfo=Sortinfo())) # underspecified noun and sortinfo return dmrs
def _parse_node(string, nodeid, queries, equalities, anchors, sortinfo_classes, sortinfo_shortforms): m = string.find('(') if m < 0: m = string.find(' ') if m < 0: l = string.find(':') else: l = string.find(':', 0, m) if l < 0: ref_ids = None l = 0 else: ref_ids = string[:l] l += 1 while string[l] == ' ': l += 1 if string[l:l + 4] == 'node' and (len(string) - l == 4 or string[l + 4] in special_values): value = _parse_value(string[l + 4:], None, queries, equalities, (lambda matching, dmrs: dmrs[matching[nodeid]])) assert not value pred = Pred() carg = '?' sortinfo = Sortinfo() ref_name = 'node' elif m < 0: pred, ref_name = _parse_pred(string[l:], nodeid, queries, equalities) carg = None sortinfo = None else: pred, ref_name = _parse_pred(string[l:m], nodeid, queries, equalities) if string[m] == '(': r = string.index(')', m) if string[m + 1] == '"' and string[r - 1] == '"': carg = string[m + 2:r - 1] else: carg = string[m + 1:r] assert '"' not in carg carg = _parse_value( carg, '?', queries, equalities, (lambda matching, dmrs: dmrs[matching[nodeid]].carg)) m = r + 1 else: carg = None if m < len(string) and string[m] == ' ': while string[m] == ' ': m += 1 sortinfo = _parse_sortinfo(string[m:], nodeid, queries, equalities, sortinfo_classes, sortinfo_shortforms) else: sortinfo = None if not ref_ids: ref_ids = None node = Node(nodeid, pred, sortinfo=sortinfo, carg=carg) else: if ref_ids[0] == '[' and ref_ids[-1] == ']': ref_ids = ref_ids[1:-1].split(',') node = AnchorNode(anchors=ref_ids, nodeid=nodeid, pred=pred, sortinfo=sortinfo, carg=carg) elif ref_ids[0] == '(' and ref_ids[-1] == ')': ref_ids = ref_ids[1:-1].split(',') node = OptionalNode(anchors=ref_ids, nodeid=nodeid, pred=pred, sortinfo=sortinfo, carg=carg) elif ref_ids[0] == '{' and ref_ids[-1] == '}': ref_ids = ref_ids[1:-1].split(',') node = SubgraphNode(anchors=ref_ids, nodeid=nodeid, pred=pred, sortinfo=sortinfo, carg=carg) else: ref_ids = ref_ids.split(',') node = Node(nodeid=nodeid, pred=pred, sortinfo=sortinfo, carg=carg) for ref_id in ref_ids: assert ref_id not in anchors, 'Reference ids have to be unique.' anchors[ref_id] = node return node, ref_ids, ref_name
def the(): dmrs = DictDmrs() dmrs.add_node(Node(pred=RealPred('the', 'q'))) # node id set automatically return dmrs