示例#1
0
 def test_from_dict(self):
     s = '(root (1 some-type -1 -1 -1 ("a")))'
     d = {
         'entity': 'root',
         'daughters': [
             {
                 'id': 1,
                 'entity': 'some-type',
                 'form': 'a'
             }
         ]
     }
     assert D.from_dict(d) == D.from_string(s)
     s = (   r'(root (1 some-type -1 -1 -1 ("a b"'
             r' 2 "token [ +FORM \"a\" ]"'
             r' 3 "token [ +FORM \"b\" ]")))' )
     d = {
         'entity': 'root',
         'daughters': [
             {
                 'id': 1,
                 'entity': 'some-type',
                 'form': 'a b',
                 'tokens': [
                     {'id': 2, 'tfs': r'token [ +FORM \"a\" ]'},
                     {'id': 3, 'tfs': r'token [ +FORM \"b\" ]'}
                 ]
             }
         ]
     }
     assert D.from_dict(d) == D.from_string(s)
示例#2
0
 def test_from_dict(self):
     s = '(root (1 some-thing -1 -1 -1 ("a")))'
     d = {
         'entity': 'root',
         'daughters': [
             {
                 'id': 1,
                 'entity': 'some-thing',
                 'form': 'a'
             }
         ]
     }
     assert D.from_dict(d) == D.from_string(s)
     s = (r'(root (1 ^some-thing@some-type -1 -1 -1 ("a b"'
          r' 2 "token [ +FORM \"a\" ]"'
          r' 3 "token [ +FORM \"b\" ]")))' )
     d = {
         'entity': 'root',
         'daughters': [
             {
                 'id': 1,
                 'entity': 'some-thing',
                 'type': 'some-type',
                 'head': True,
                 'form': 'a b',
                 'tokens': [
                     {'id': 2, 'tfs': r'token [ +FORM \"a\" ]'},
                     {'id': 3, 'tfs': r'token [ +FORM \"b\" ]'}
                 ]
             }
         ]
     }
     assert D.from_dict(d) == D.from_string(s)
示例#3
0
 def derivation(self):
     """
     Deserialize and return a Derivation object for UDF- or
     JSON-formatted derivation data; otherwise return the original
     string.
     """
     drv = self.get('derivation')
     if drv is not None:
         if isinstance(drv, dict):
             drv = Derivation.from_dict(drv)
         elif isinstance(drv, stringtypes):
             drv = Derivation.from_string(drv)
     return drv
示例#4
0
 def derivation(self):
     """
     Deserialize and return a Derivation object for UDF- or
     JSON-formatted derivation data; otherwise return the original
     string.
     """
     drv = self.get('derivation')
     if drv is not None:
         if isinstance(drv, dict):
             drv = Derivation.from_dict(drv)
         elif isinstance(drv, stringtypes):
             drv = Derivation.from_string(drv)
     return drv
def test_ParseResult():
    r = ParseResult()
    assert len(r) == 0
    assert r.mrs() is None
    assert r.dmrs() is None
    assert r.eds() is None
    assert r.derivation() is None

    mrs_s = '[ TOP: h0 RELS: < ["_rain_v_1_rel" LBL: h1 ARG0: e2 ] > HCONS: < h0 qeq h1 > ]'
    mrs_d = {
        'top': 'h0',
        'relations': [
            {
                'predicate': '_rain_v_1',
                'label': 'h1',
                'arguments': {'ARG0': 'e2'}
            }
        ],
        'constraints': [
            {'relation': 'qeq', 'high': 'h0', 'low': 'h1'}
        ]
    }
    mrs = simplemrs.loads_one(mrs_s)

    r = ParseResult(mrs=mrs_s)
    assert len(r) == 1
    assert r['mrs'] == mrs_s
    assert r.mrs() == mrs

    r = ParseResult(mrs=mrs_d)
    assert len(r) == 1
    assert r['mrs'] == mrs_d
    assert r.mrs() == mrs

    r = ParseResult(mrs=mrs_d)
    assert len(r) == 1
    assert r['mrs'] == mrs_d
    assert r.mrs() == mrs

    # r = ParseResult(mrs='nonsense')
    # assert r['mrs'] == 'nonsense'
    # with pytest.raises(XmrsDeserializationError):
    #     r.mrs()

    dmrs_d = {
        'nodes': [
            {'nodeid': 10000, 'predicate': '_rain_v_1',
             'sortinfo': {'cvarsort': 'e'}}
        ],
        'links': [
            {'from': 0, 'to': 10000, 'rargname': None, 'post': 'H'}
        ]
    }
    dmrs = Dmrs.from_dict(dmrs_d)

    r = ParseResult(dmrs=dmrs_d)
    assert len(r) == 1
    assert r['dmrs'] == dmrs_d
    assert r.dmrs() == dmrs

    # r = ParseResult(dmrs='nonsense')
    # assert len(r) == 1
    # assert r['dmrs'] == 'nonsense'
    # with pytest.raises(XmrsDeserializationError):
    #     r.dmrs()

    eds_d = {
        'top': 'e2',
        'nodes': {
            'e2': {
                'label': '_rain_v_1',
                'lnk': {'from': 3, 'to': 9},
                'edges': {}
            }
        }
    }
    eds_s = '{e2: e2:_rain_v_1<3:9>[]}'
    eds = Eds.from_dict(eds_d)

    r = ParseResult(eds=eds_s)
    assert len(r) == 1
    assert r['eds'] == eds_s
    assert r.eds() == eds

    r = ParseResult(eds=eds_d)
    assert len(r) == 1
    assert r['eds'] == eds_d
    assert r.eds() == eds

    # r = ParseResult(eds='nonsense')
    # assert len(r) == 1
    # assert r['eds'] == 'nonsense'
    # with pytest.raises(XmrsDeserializationError):
    #     r.eds()

    # several changes were made to the below for compatibility:
    #  - removed head annotation (on W_PERIOD_PLR)
    #  - removed type info
    #  - removed from/to info
    #  - added start/end
    #  - escaped quotes
    #  - capitalized entity names

    deriv_s = '(189 SB-HD_MC_C 0.228699 0 2 (37 it 0.401245 0 1 ("it" 34 "token [ +FORM \\"it\\" +FROM #1=\\"0\\" +TO \\"2\\" ]")) (188 W_PERIOD_PLR -0.113641 1 2 (187 V_PST_OLR 0 1 2 (56 rain_v1 0 1 2 ("rained." 32 "token [ +FORM \\"rained.\\" +FROM #1=\\"3\\" +TO \\"10\\" ]")))))'
    deriv_d = {
        "id": 189, "entity": "SB-HD_MC_C", "label": "S", "score": 0.228699, "start": 0, "end": 2, "daughters": [  # , "type": "subjh_mc_rule"
            {"id": 37, "entity": "it", "score": 0.401245, "start": 0, "end": 1, "form": "it", "tokens": [  # , "type": "n_-_pr-it-x_le" , "from": 0, "to": 2
                {"id": 34, "tfs": "token [ +FORM \\\"it\\\" +FROM #1=\\\"0\\\" +TO \\\"2\\\" ]"}]},  # , "from": 0, "to": 2
            {"id": 188, "entity": "W_PERIOD_PLR", "score": -0.113641, "start": 1, "end": 2, "daughters": [  # , "type": "punctuation_period_rule"
                {"id": 187, "entity": "V_PST_OLR", "score": 0, "start": 1, "end": 2, "daughters": [  # , "type": "v_pst_inflrule"
                    {"id": 56, "entity": "rain_v1", "score": 0, "start": 1, "end": 2, "form": "rained.", "tokens": [  # , "type": "v_-_it_le", "from": 3, "to": 10
                        {"id": 32, "tfs": "token [ +FORM \\\"rained.\\\" +FROM #1=\\\"3\\\" +TO \\\"10\\\" ]"}]}]}]}]  # , "from": 3, "to": 10
    }
    deriv = Derivation.from_dict(deriv_d)

    r = ParseResult(derivation=deriv_s)
    assert len(r) == 1
    assert r['derivation'] == deriv_s
    assert r.derivation() == deriv

    r = ParseResult(derivation=deriv_d)
    assert len(r) == 1
    assert r['derivation'] == deriv_d
    assert r.derivation() == deriv
示例#6
0
def test_ParseResult():
    r = ParseResult()
    assert len(r) == 0
    assert r.mrs() is None
    assert r.dmrs() is None
    assert r.eds() is None
    assert r.derivation() is None

    mrs_s = '[ TOP: h0 RELS: < ["_rain_v_1_rel" LBL: h1 ARG0: e2 ] > HCONS: < h0 qeq h1 > ]'
    mrs_d = {
        'top':
        'h0',
        'relations': [{
            'predicate': '_rain_v_1',
            'label': 'h1',
            'arguments': {
                'ARG0': 'e2'
            }
        }],
        'constraints': [{
            'relation': 'qeq',
            'high': 'h0',
            'low': 'h1'
        }]
    }
    mrs = simplemrs.loads_one(mrs_s)

    r = ParseResult(mrs=mrs_s)
    assert len(r) == 1
    assert r['mrs'] == mrs_s
    assert r.mrs() == mrs

    r = ParseResult(mrs=mrs_d)
    assert len(r) == 1
    assert r['mrs'] == mrs_d
    assert r.mrs() == mrs

    r = ParseResult(mrs=mrs_d)
    assert len(r) == 1
    assert r['mrs'] == mrs_d
    assert r.mrs() == mrs

    # r = ParseResult(mrs='nonsense')
    # assert r['mrs'] == 'nonsense'
    # with pytest.raises(XmrsDeserializationError):
    #     r.mrs()

    dmrs_d = {
        'nodes': [{
            'nodeid': 10000,
            'predicate': '_rain_v_1',
            'sortinfo': {
                'cvarsort': 'e'
            }
        }],
        'links': [{
            'from': 0,
            'to': 10000,
            'rargname': None,
            'post': 'H'
        }]
    }
    dmrs = Dmrs.from_dict(dmrs_d)

    r = ParseResult(dmrs=dmrs_d)
    assert len(r) == 1
    assert r['dmrs'] == dmrs_d
    assert r.dmrs() == dmrs

    # r = ParseResult(dmrs='nonsense')
    # assert len(r) == 1
    # assert r['dmrs'] == 'nonsense'
    # with pytest.raises(XmrsDeserializationError):
    #     r.dmrs()

    eds_d = {
        'top': 'e2',
        'nodes': {
            'e2': {
                'label': '_rain_v_1',
                'lnk': {
                    'from': 3,
                    'to': 9
                },
                'edges': {}
            }
        }
    }
    eds_s = '{e2: e2:_rain_v_1<3:9>[]}'
    eds = Eds.from_dict(eds_d)

    r = ParseResult(eds=eds_s)
    assert len(r) == 1
    assert r['eds'] == eds_s
    assert r.eds() == eds

    r = ParseResult(eds=eds_d)
    assert len(r) == 1
    assert r['eds'] == eds_d
    assert r.eds() == eds

    # r = ParseResult(eds='nonsense')
    # assert len(r) == 1
    # assert r['eds'] == 'nonsense'
    # with pytest.raises(XmrsDeserializationError):
    #     r.eds()

    # several changes were made to the below for compatibility:
    #  - removed head annotation (on W_PERIOD_PLR)
    #  - removed type info
    #  - removed from/to info
    #  - added start/end
    #  - escaped quotes
    #  - capitalized entity names

    deriv_s = '(189 SB-HD_MC_C 0.228699 0 2 (37 it 0.401245 0 1 ("it" 34 "token [ +FORM \\"it\\" +FROM #1=\\"0\\" +TO \\"2\\" ]")) (188 W_PERIOD_PLR -0.113641 1 2 (187 V_PST_OLR 0 1 2 (56 rain_v1 0 1 2 ("rained." 32 "token [ +FORM \\"rained.\\" +FROM #1=\\"3\\" +TO \\"10\\" ]")))))'
    deriv_d = {
        "id":
        189,
        "entity":
        "SB-HD_MC_C",
        "label":
        "S",
        "score":
        0.228699,
        "start":
        0,
        "end":
        2,
        "daughters": [  # , "type": "subjh_mc_rule"
            {
                "id":
                37,
                "entity":
                "it",
                "score":
                0.401245,
                "start":
                0,
                "end":
                1,
                "form":
                "it",
                "tokens": [  # , "type": "n_-_pr-it-x_le" , "from": 0, "to": 2
                    {
                        "id":
                        34,
                        "tfs":
                        "token [ +FORM \\\"it\\\" +FROM #1=\\\"0\\\" +TO \\\"2\\\" ]"
                    }
                ]
            },  # , "from": 0, "to": 2
            {
                "id":
                188,
                "entity":
                "W_PERIOD_PLR",
                "score":
                -0.113641,
                "start":
                1,
                "end":
                2,
                "daughters": [  # , "type": "punctuation_period_rule"
                    {
                        "id":
                        187,
                        "entity":
                        "V_PST_OLR",
                        "score":
                        0,
                        "start":
                        1,
                        "end":
                        2,
                        "daughters": [  # , "type": "v_pst_inflrule"
                            {
                                "id":
                                56,
                                "entity":
                                "rain_v1",
                                "score":
                                0,
                                "start":
                                1,
                                "end":
                                2,
                                "form":
                                "rained.",
                                "tokens":
                                [  # , "type": "v_-_it_le", "from": 3, "to": 10
                                    {
                                        "id":
                                        32,
                                        "tfs":
                                        "token [ +FORM \\\"rained.\\\" +FROM #1=\\\"3\\\" +TO \\\"10\\\" ]"
                                    }
                                ]
                            }
                        ]
                    }
                ]
            }
        ]  # , "from": 3, "to": 10
    }
    deriv = Derivation.from_dict(deriv_d)

    r = ParseResult(derivation=deriv_s)
    assert len(r) == 1
    assert r['derivation'] == deriv_s
    assert r.derivation() == deriv

    r = ParseResult(derivation=deriv_d)
    assert len(r) == 1
    assert r['derivation'] == deriv_d
    assert r.derivation() == deriv