def test_from_dict(self): assert Dmrs.from_dict({}) == Dmrs() d1 = Dmrs.from_dict({ 'nodes': [{'nodeid': 10, 'predicate': '_rain_v_1'}], 'links': [{'from': 0, 'to': 10, 'rargname': None, 'post': 'H'}], }) d2 = Dmrs(nodes=[Node(10, sp('"_rain_v_1_rel"'))], links=[Link(0, 10, None, 'H')]) assert d1 == d2
def _reify_xmrs(path, top_axis=None): nodes = {} links = [] agenda = [(0, top_axis or ':/H>', path)] while agenda: srcnid, axis, tgt = agenda.pop() if tgt is None: continue # add link to tgt rargname, post = axis.strip(':<>').split('/') if axis.startswith('<'): links.append(Link(tgt.nodeid, srcnid, rargname or None, post)) elif axis.endswith('>'): links.append(Link(srcnid, tgt.nodeid, rargname or None, post)) elif axis == ':/EQ:': links.append(Link(srcnid, tgt.nodeid, None, 'EQ')) else: raise XmrsPathError('Invalid axis: {}'.format(axis)) # add node if necessary (note, currently does not update pred # or sortinfo if encountered twice) if tgt.nodeid not in nodes: sortinfo = dict([('cvarsort', tgt.context.get('varsort') or 'u')] + [(k.lstrip('@'), v) for k, v in tgt.context.items() if k.startswith('@')]) nodes[tgt.nodeid] = Node(tgt.nodeid, tgt.pred, sortinfo=sortinfo) # add new agenda for tgt for axis, next_tgt in tgt.links.items(): agenda.append((tgt.nodeid, axis, next_tgt)) return Dmrs(list(nodes.values()), links)
def test_empty(self): x = Dmrs() # Dmrs view assert len(nodes(x)) == 0 assert len(links(x)) == 0 # Xmrs members check_xmrs(x, None, None, None, 0, 0, 0, 0)
def decode_dmrs(elem): # dmrs { NODES LINKS } return Dmrs(nodes=list(map(decode_node)), links=list(map(decode_link)), lnk=None, surface=None, identifier=None)
def test_from_triples(self): assert Dmrs.from_triples([]) == Dmrs() d1 = Dmrs.from_triples([(10, 'predicate', '_rain_v_1'), (0, 'top', 10)]) # by default nodeids get remapped from 10000 d2 = Dmrs(nodes=[Node(10000, sp('"_rain_v_1_rel"'))], links=[Link(0, 10000, None, 'H')]) assert d1 == d2 d1 = Dmrs.from_triples([ (10, 'predicate', '_rain_v_1'), ]) assert d1 == d2 d1 = Dmrs.from_triples([ (10, 'predicate', '_rain_v_1'), (20, 'predicate', '_or_c'), (30, 'predicate', '_snow_v_1'), (0, 'top', 20), (20, 'L-INDEX-NEQ', 10), (20, 'L-HNDL-HEQ', 10), (20, 'R-INDEX-NEQ', 30), (20, 'R-HNDL-HEQ', 30) ]) d2 = Dmrs(nodes=[ Node(10000, sp('"_rain_v_1_rel"')), Node(10001, sp('_or_c_rel')), Node(10002, sp('"_snow_v_1_rel"')) ], links=[ Link(0, 10001, None, 'H'), Link(10001, 10000, 'L-INDEX', 'NEQ'), Link(10001, 10000, 'L-HNDL', 'HEQ'), Link(10001, 10002, 'R-INDEX', 'NEQ'), Link(10001, 10002, 'R-HNDL', 'HEQ') ]) assert d1 == d2
def test_from_dict(self): assert Dmrs.from_dict({}) == Dmrs() d1 = Dmrs.from_dict({ 'nodes': [{ 'nodeid': 10, 'predicate': '_rain_v_1' }], 'links': [{ 'from': 0, 'to': 10, 'rargname': None, 'post': 'H' }], }) d2 = Dmrs(nodes=[Node(10, sp('"_rain_v_1_rel"'))], links=[Link(0, 10, None, 'H')]) assert d1 == d2
def dmrs(self): """ Deserialize and return a Dmrs object for JSON-formatted DMRS data; otherwise return the original string. """ dmrs = self.get('dmrs') if dmrs is not None: if isinstance(dmrs, dict): dmrs = Dmrs.from_dict(dmrs) return dmrs
def dmrs(self): """ Deserialize and return a Dmrs object for JSON-formatted DMRS data; otherwise return the original string. """ dmrs = self.get('dmrs') if dmrs is not None: if isinstance(dmrs, dict): dmrs = Dmrs.from_dict(dmrs) return dmrs
def test_to_triples(self): assert Dmrs().to_triples() == [] x = Dmrs(nodes=[Node(10, sp('"_rain_v_1_rel"'))]) assert x.to_triples() == [(10, 'predicate', '_rain_v_1'), (10, 'cvarsort', 'u')] x = Dmrs(nodes=[Node(10, sp('"_rain_v_1_rel"'), {'cvarsort': 'e'})], links=[Link(0, 10, None, 'H')]) assert x.to_triples() == [(10, 'predicate', '_rain_v_1'), (10, 'cvarsort', 'e'), (0, 'top', 10)] assert x.to_triples(properties=False) == [(10, 'predicate', '_rain_v_1'), (0, 'top', 10)]
def _deserialize_dmrs(elem): # <!ELEMENT dmrs (node|link)*> # <!ATTLIST dmrs # cfrom CDATA #REQUIRED # cto CDATA #REQUIRED # surface CDATA #IMPLIED # ident CDATA #IMPLIED > elem = elem.find('.') # in case elem is an ElementTree rather than Element return Dmrs(nodes=list(map(_decode_node, elem.iter('node'))), links=list(map(_decode_link, elem.iter('link'))), lnk=_decode_lnk(elem), surface=elem.get('surface'), identifier=elem.get('ident'))
def test_to_dict(self): assert Dmrs().to_dict() == {'nodes': [], 'links': []} x = Dmrs(nodes=[Node(10, sp('"_rain_v_1_rel"'))]) assert x.to_dict() == { 'nodes': [{'nodeid': 10, 'predicate': '_rain_v_1', 'sortinfo': {'cvarsort': UNKNOWNSORT}}], 'links': [] } x = Dmrs( nodes=[Node(10, sp('"_rain_v_1_rel"'), {'cvarsort': 'e'})], links=[Link(0, 10, None, 'H')] ) assert x.to_dict() == { 'nodes': [{'nodeid': 10, 'predicate': '_rain_v_1', 'sortinfo': {'cvarsort': 'e'}}], 'links': [{'from': 0, 'to': 10, 'rargname': None, 'post': 'H'}] } assert x.to_dict(properties=False) == { 'nodes': [{'nodeid': 10, 'predicate': '_rain_v_1'}], 'links': [{'from': 0, 'to': 10, 'rargname': None, 'post': 'H'}] }
def test_single_node(self): # basic, one Node, no TOP x = Dmrs(nodes=[Node(10, sp('"_rain_v_1_rel"'))]) check_xmrs(x, None, None, None, 1, 0, 0, 2) # variables don't need to be created predictably, but it's nice # to get the expected values for simple cases assert x.label(10) == 'h1' assert x.ep(10).iv == 'u2' # now with cvarsort x = Dmrs(nodes=[Node(10, sp('"_rain_v_1_rel"'), {'cvarsort': 'e'})]) check_xmrs(x, None, None, None, 1, 0, 0, 2) assert x.label(10) == 'h1' assert x.ep(10).iv == 'e2' # now with TOP x = Dmrs( nodes=[Node(10, sp('"_rain_v_1_rel"'), {'cvarsort': 'e'})], links=[Link(0, 10, None, 'H')] ) check_xmrs(x, 'h0', None, None, 1, 1, 0, 3) assert x.label(10) == 'h1' assert x.ep(10).iv == 'e2'
def test_to_dict(self): assert Dmrs().to_dict() == {'nodes': [], 'links': []} x = Dmrs(nodes=[Node(10, sp('"_rain_v_1_rel"'))]) assert x.to_dict() == { 'nodes': [{ 'nodeid': 10, 'predicate': '_rain_v_1', 'sortinfo': { 'cvarsort': UNKNOWNSORT } }], 'links': [] } x = Dmrs(nodes=[Node(10, sp('"_rain_v_1_rel"'), {'cvarsort': 'e'})], links=[Link(0, 10, None, 'H')]) assert x.to_dict() == { 'nodes': [{ 'nodeid': 10, 'predicate': '_rain_v_1', 'sortinfo': { 'cvarsort': 'e' } }], 'links': [{ 'from': 0, 'to': 10, 'rargname': None, 'post': 'H' }] } assert x.to_dict(properties=False) == { 'nodes': [{ 'nodeid': 10, 'predicate': '_rain_v_1' }], 'links': [{ 'from': 0, 'to': 10, 'rargname': None, 'post': 'H' }] }
def parse_response(inp, ace_response, params): properties = True if params.get('properties') == 'json' else False tcpu, pedges = _get_parse_info(ace_response.get('NOTES', [])) result_data = [] for i, res in enumerate(ace_response.get('RESULTS', [])): mrs, udf = res['MRS'], res['DERIV'] xmrs = simplemrs.loads_one(mrs) d = {'result-id': i} if params.get('derivation') == 'udf': d['derivation'] = udf elif params.get('derivation') == 'json': d['derivation'] = udf_to_dict(udf, params) if params.get('mrs') == 'simple': d['mrs'] = mrs elif params.get('mrs') == 'json': d['mrs'] = Mrs.to_dict(xmrs, properties=properties) elif params.get('mrs') == 'latex': abort(501, "The 'latex' format for MRS is not yet implemented.") if params.get('eds') == 'native': d['eds'] = eds.dumps(xmrs, single=True) elif params.get('eds') == 'json': d['eds'] = eds.Eds.from_xmrs(xmrs).to_dict(properties=properties) elif params.get('eds') == 'latex': abort(501, "The 'latex' format for EDS is not yet implemented.") if params.get('dmrs') == 'json': d['dmrs'] = Dmrs.to_dict(xmrs, properties=properties) elif params.get('dmrs') == 'latex': d['dmrs'] = latex.dmrs_tikz_dependency(xmrs) result_data.append(d) data = { 'input': inp, 'readings': len(ace_response.get('RESULTS', [])), 'results': result_data } if tcpu is not None: data['tcpu'] = tcpu if pedges is not None: data['pedges'] = pedges return data
def test_ParseResult(): r = ParseResult() assert len(r) == 0 assert r.mrs() is None assert r.dmrs() is None assert r.eds() is None assert r.derivation() is None mrs_s = '[ TOP: h0 RELS: < ["_rain_v_1_rel" LBL: h1 ARG0: e2 ] > HCONS: < h0 qeq h1 > ]' mrs_d = { 'top': 'h0', 'relations': [{ 'predicate': '_rain_v_1', 'label': 'h1', 'arguments': { 'ARG0': 'e2' } }], 'constraints': [{ 'relation': 'qeq', 'high': 'h0', 'low': 'h1' }] } mrs = simplemrs.loads_one(mrs_s) r = ParseResult(mrs=mrs_s) assert len(r) == 1 assert r['mrs'] == mrs_s assert r.mrs() == mrs r = ParseResult(mrs=mrs_d) assert len(r) == 1 assert r['mrs'] == mrs_d assert r.mrs() == mrs r = ParseResult(mrs=mrs_d) assert len(r) == 1 assert r['mrs'] == mrs_d assert r.mrs() == mrs # r = ParseResult(mrs='nonsense') # assert r['mrs'] == 'nonsense' # with pytest.raises(XmrsDeserializationError): # r.mrs() dmrs_d = { 'nodes': [{ 'nodeid': 10000, 'predicate': '_rain_v_1', 'sortinfo': { 'cvarsort': 'e' } }], 'links': [{ 'from': 0, 'to': 10000, 'rargname': None, 'post': 'H' }] } dmrs = Dmrs.from_dict(dmrs_d) r = ParseResult(dmrs=dmrs_d) assert len(r) == 1 assert r['dmrs'] == dmrs_d assert r.dmrs() == dmrs # r = ParseResult(dmrs='nonsense') # assert len(r) == 1 # assert r['dmrs'] == 'nonsense' # with pytest.raises(XmrsDeserializationError): # r.dmrs() eds_d = { 'top': 'e2', 'nodes': { 'e2': { 'label': '_rain_v_1', 'lnk': { 'from': 3, 'to': 9 }, 'edges': {} } } } eds_s = '{e2: e2:_rain_v_1<3:9>[]}' eds = Eds.from_dict(eds_d) r = ParseResult(eds=eds_s) assert len(r) == 1 assert r['eds'] == eds_s assert r.eds() == eds r = ParseResult(eds=eds_d) assert len(r) == 1 assert r['eds'] == eds_d assert r.eds() == eds # r = ParseResult(eds='nonsense') # assert len(r) == 1 # assert r['eds'] == 'nonsense' # with pytest.raises(XmrsDeserializationError): # r.eds() # several changes were made to the below for compatibility: # - removed head annotation (on W_PERIOD_PLR) # - removed type info # - removed from/to info # - added start/end # - escaped quotes # - capitalized entity names deriv_s = '(189 SB-HD_MC_C 0.228699 0 2 (37 it 0.401245 0 1 ("it" 34 "token [ +FORM \\"it\\" +FROM #1=\\"0\\" +TO \\"2\\" ]")) (188 W_PERIOD_PLR -0.113641 1 2 (187 V_PST_OLR 0 1 2 (56 rain_v1 0 1 2 ("rained." 32 "token [ +FORM \\"rained.\\" +FROM #1=\\"3\\" +TO \\"10\\" ]")))))' deriv_d = { "id": 189, "entity": "SB-HD_MC_C", "label": "S", "score": 0.228699, "start": 0, "end": 2, "daughters": [ # , "type": "subjh_mc_rule" { "id": 37, "entity": "it", "score": 0.401245, "start": 0, "end": 1, "form": "it", "tokens": [ # , "type": "n_-_pr-it-x_le" , "from": 0, "to": 2 { "id": 34, "tfs": "token [ +FORM \\\"it\\\" +FROM #1=\\\"0\\\" +TO \\\"2\\\" ]" } ] }, # , "from": 0, "to": 2 { "id": 188, "entity": "W_PERIOD_PLR", "score": -0.113641, "start": 1, "end": 2, "daughters": [ # , "type": "punctuation_period_rule" { "id": 187, "entity": "V_PST_OLR", "score": 0, "start": 1, "end": 2, "daughters": [ # , "type": "v_pst_inflrule" { "id": 56, "entity": "rain_v1", "score": 0, "start": 1, "end": 2, "form": "rained.", "tokens": [ # , "type": "v_-_it_le", "from": 3, "to": 10 { "id": 32, "tfs": "token [ +FORM \\\"rained.\\\" +FROM #1=\\\"3\\\" +TO \\\"10\\\" ]" } ] } ] } ] } ] # , "from": 3, "to": 10 } deriv = Derivation.from_dict(deriv_d) r = ParseResult(derivation=deriv_s) assert len(r) == 1 assert r['derivation'] == deriv_s assert r.derivation() == deriv r = ParseResult(derivation=deriv_d) assert len(r) == 1 assert r['derivation'] == deriv_d assert r.derivation() == deriv
def test_single_node(self): # basic, one Node, no TOP x = Dmrs(nodes=[Node(10, sp('"_rain_v_1_rel"'))]) check_xmrs(x, None, None, None, 1, 0, 0, 2) # variables don't need to be created predictably, but it's nice # to get the expected values for simple cases assert x.label(10) == 'h1' assert x.ep(10).iv == 'u2' # now with cvarsort x = Dmrs(nodes=[Node(10, sp('"_rain_v_1_rel"'), {'cvarsort': 'e'})]) check_xmrs(x, None, None, None, 1, 0, 0, 2) assert x.label(10) == 'h1' assert x.ep(10).iv == 'e2' # now with TOP x = Dmrs(nodes=[Node(10, sp('"_rain_v_1_rel"'), {'cvarsort': 'e'})], links=[Link(0, 10, None, 'H')]) check_xmrs(x, 'h0', None, None, 1, 1, 0, 3) assert x.label(10) == 'h1' assert x.ep(10).iv == 'e2'
def _parse_repsonse(inp, ace_response, params): properties = True if params.get('properties') == 'json' else False tcpu = ace_response.get('tcpu') pedges = ace_response.get('pedges') readings = ace_response.get('readings') if readings is None: readings = len(ace_response.get('results', [])) result_data = [] for i, res in enumerate(ace_response.results()): mrs, udf = res['mrs'], res['derivation'] xmrs = simplemrs.loads_one(mrs) d = {'result-id': i} if params.get('derivation') == 'udf': d['derivation'] = udf elif params.get('derivation') == 'json': d['derivation'] = _udf_to_dict(udf, params) if params.get('mrs') == 'simple': d['mrs'] = mrs elif params.get('mrs') == 'json': d['mrs'] = Mrs.to_dict(xmrs, properties=properties) elif params.get('mrs') == 'latex': abort(501, "The 'latex' format for MRS is not yet implemented.") if params.get('eds') == 'native': d['eds'] = eds.dumps(xmrs, single=True) elif params.get('eds') == 'json': d['eds'] = eds.Eds.from_xmrs(xmrs).to_dict(properties=properties) elif params.get('eds') in ('amr', 'penman'): d['eds'] = penman.dumps([xmrs], model=eds.Eds) elif params.get('eds') == 'latex': abort(501, "The 'latex' format for EDS is not yet implemented.") if params.get('dmrs') == 'json': d['dmrs'] = Dmrs.to_dict(xmrs, properties=properties) elif params.get('dmrs') == 'penman': d['dmrs'] = penman.dumps([xmrs], model=Dmrs) elif params.get('dmrs') == 'latex': d['dmrs'] = latex.dmrs_tikz_dependency(xmrs) result_data.append(d) data = { 'input': inp, 'readings': readings, 'results': result_data } if tcpu is not None: data['tcpu'] = tcpu if pedges is not None: data['pedges'] = pedges if params.get('tokens'): t1 = ace_response.tokens('initial') t2 = ace_response.tokens('internal') if params['tokens'] == 'json': data['tokens'] = { 'initial': t1.to_list(), 'internal': t2.to_list() } elif params['tokens'] == 'yy': data['tokens'] = { 'initial': str(t1), 'internal': str(t2) } return data
def test_ParseResult(): r = ParseResult() assert len(r) == 0 assert r.mrs() is None assert r.dmrs() is None assert r.eds() is None assert r.derivation() is None mrs_s = '[ TOP: h0 RELS: < ["_rain_v_1_rel" LBL: h1 ARG0: e2 ] > HCONS: < h0 qeq h1 > ]' mrs_d = { 'top': 'h0', 'relations': [ { 'predicate': '_rain_v_1', 'label': 'h1', 'arguments': {'ARG0': 'e2'} } ], 'constraints': [ {'relation': 'qeq', 'high': 'h0', 'low': 'h1'} ] } mrs = simplemrs.loads_one(mrs_s) r = ParseResult(mrs=mrs_s) assert len(r) == 1 assert r['mrs'] == mrs_s assert r.mrs() == mrs r = ParseResult(mrs=mrs_d) assert len(r) == 1 assert r['mrs'] == mrs_d assert r.mrs() == mrs r = ParseResult(mrs=mrs_d) assert len(r) == 1 assert r['mrs'] == mrs_d assert r.mrs() == mrs # r = ParseResult(mrs='nonsense') # assert r['mrs'] == 'nonsense' # with pytest.raises(XmrsDeserializationError): # r.mrs() dmrs_d = { 'nodes': [ {'nodeid': 10000, 'predicate': '_rain_v_1', 'sortinfo': {'cvarsort': 'e'}} ], 'links': [ {'from': 0, 'to': 10000, 'rargname': None, 'post': 'H'} ] } dmrs = Dmrs.from_dict(dmrs_d) r = ParseResult(dmrs=dmrs_d) assert len(r) == 1 assert r['dmrs'] == dmrs_d assert r.dmrs() == dmrs # r = ParseResult(dmrs='nonsense') # assert len(r) == 1 # assert r['dmrs'] == 'nonsense' # with pytest.raises(XmrsDeserializationError): # r.dmrs() eds_d = { 'top': 'e2', 'nodes': { 'e2': { 'label': '_rain_v_1', 'lnk': {'from': 3, 'to': 9}, 'edges': {} } } } eds_s = '{e2: e2:_rain_v_1<3:9>[]}' eds = Eds.from_dict(eds_d) r = ParseResult(eds=eds_s) assert len(r) == 1 assert r['eds'] == eds_s assert r.eds() == eds r = ParseResult(eds=eds_d) assert len(r) == 1 assert r['eds'] == eds_d assert r.eds() == eds # r = ParseResult(eds='nonsense') # assert len(r) == 1 # assert r['eds'] == 'nonsense' # with pytest.raises(XmrsDeserializationError): # r.eds() # several changes were made to the below for compatibility: # - removed head annotation (on W_PERIOD_PLR) # - removed type info # - removed from/to info # - added start/end # - escaped quotes # - capitalized entity names deriv_s = '(189 SB-HD_MC_C 0.228699 0 2 (37 it 0.401245 0 1 ("it" 34 "token [ +FORM \\"it\\" +FROM #1=\\"0\\" +TO \\"2\\" ]")) (188 W_PERIOD_PLR -0.113641 1 2 (187 V_PST_OLR 0 1 2 (56 rain_v1 0 1 2 ("rained." 32 "token [ +FORM \\"rained.\\" +FROM #1=\\"3\\" +TO \\"10\\" ]")))))' deriv_d = { "id": 189, "entity": "SB-HD_MC_C", "label": "S", "score": 0.228699, "start": 0, "end": 2, "daughters": [ # , "type": "subjh_mc_rule" {"id": 37, "entity": "it", "score": 0.401245, "start": 0, "end": 1, "form": "it", "tokens": [ # , "type": "n_-_pr-it-x_le" , "from": 0, "to": 2 {"id": 34, "tfs": "token [ +FORM \\\"it\\\" +FROM #1=\\\"0\\\" +TO \\\"2\\\" ]"}]}, # , "from": 0, "to": 2 {"id": 188, "entity": "W_PERIOD_PLR", "score": -0.113641, "start": 1, "end": 2, "daughters": [ # , "type": "punctuation_period_rule" {"id": 187, "entity": "V_PST_OLR", "score": 0, "start": 1, "end": 2, "daughters": [ # , "type": "v_pst_inflrule" {"id": 56, "entity": "rain_v1", "score": 0, "start": 1, "end": 2, "form": "rained.", "tokens": [ # , "type": "v_-_it_le", "from": 3, "to": 10 {"id": 32, "tfs": "token [ +FORM \\\"rained.\\\" +FROM #1=\\\"3\\\" +TO \\\"10\\\" ]"}]}]}]}] # , "from": 3, "to": 10 } deriv = Derivation.from_dict(deriv_d) r = ParseResult(derivation=deriv_s) assert len(r) == 1 assert r['derivation'] == deriv_s assert r.derivation() == deriv r = ParseResult(derivation=deriv_d) assert len(r) == 1 assert r['derivation'] == deriv_d assert r.derivation() == deriv