Пример #1
0
 def test__bool__(self):
     assert not Lnk(None)
     assert not Lnk.charspan(-1, -1)
     assert Lnk.charspan(0, 0)
     assert Lnk.chartspan(0, 0)
     assert Lnk.tokens([])
     assert Lnk.edge(0)
Пример #2
0
def abrams_barked_dmrs():
    d = DMRS(
        30, 30,
        nodes=[Node(10, 'udef_q'),
               Node(20, 'named', type='x',
                    carg='Abrams', lnk=Lnk.charspan(0,6)),
               Node(30, '_bark_v_1', type='e', properties={'TENSE': 'past'},
                    lnk=Lnk.charspan(7,13))],
        links=[Link(10, 20, 'RSTR', 'H'),
               Link(30, 20, 'ARG1', 'NEQ')],
        lnk=Lnk.charspan(0,14),
        surface='Abrams barked.',
        identifier='1000380')
    return d
Пример #3
0
def _decode_lnk(cfrom, cto):
    if cfrom is cto is None:
        return None
    elif None in (cfrom, cto):
        raise ValueError('Both cfrom and cto, or neither, must be specified.')
    else:
        return Lnk.charspan(cfrom, cto)
Пример #4
0
 def testCharSpanLnk(self):
     lnk = Lnk.charspan(0, 1)
     assert lnk.type == Lnk.CHARSPAN
     assert lnk.data == (0, 1)
     assert str(lnk) == '<0:1>'
     repr(lnk)  # no error
     lnk = Lnk.charspan('0', '1')
     assert lnk.data == (0, 1)
     with pytest.raises(TypeError):
         Lnk.charspan(1)
     with pytest.raises(TypeError):
         Lnk.charspan([1, 2])
     with pytest.raises(TypeError):
         Lnk.charspan(1, 2, 3)
     with pytest.raises(ValueError):
         Lnk.charspan('a', 'b')
Пример #5
0
 def from_string(cls, s):
     """
     Decode from the YY token lattice format.
     """
     def _qstrip(s):
         return s[1:-1]  # remove assumed quote characters
     tokens = []
     for match in _yy_re.finditer(s):
         d = match.groupdict()
         lnk, pos = None, []
         if d['lnkfrom'] is not None:
             lnk = Lnk.charspan(d['lnkfrom'], d['lnkto'])
         if d['pos'] is not None:
             ps = d['pos'].strip().split()
             pos = list(zip(map(_qstrip, ps[::2]), map(float, ps[1::2])))
         tokens.append(
             YYToken(
                 int(d['id']),
                 int(d['start']),
                 int(d['end']),
                 lnk,
                 list(map(int, d['paths'].strip().split())),
                 _qstrip(d['form']),
                 None if d['surface'] is None else _qstrip(d['surface']),
                 int(d['ipos']),
                 list(map(_qstrip, d['lrules'].strip().split())),
                 pos
             )
         )
     return cls(tokens)
Пример #6
0
 def tokenize_result(self, result, pattern=DEFAULT_TOKENIZER):
     logger.info('tokenize_result(%r, %r)', result, pattern)
     tokens = [
         YYToken(id=i, start=i, end=(i + 1),
                 lnk=Lnk.charspan(tok[0], tok[1]),
                 form=tok[2])
         for i, tok in enumerate(_tokenize(result, pattern))
     ]
     return YYTokenLattice(tokens)
Пример #7
0
def from_triples(triples):
    """
    Decode triples, as from :func:`to_triples`, into a DMRS object.
    """
    top = lnk = surface = identifier = None
    nids, nd, edges = [], {}, []
    for src, rel, tgt in triples:
        rel = rel.lstrip(':')
        src, tgt = str(src), str(tgt)  # in case penman converts ids to ints
        if src is None and rel == 'top':
            top = tgt
            continue
        elif src not in nd:
            if top is None:
                top = src
            nids.append(src)
            nd[src] = {
                'pred': None,
                'lnk': None,
                'type': None,
                'props': {},
                'carg': None
            }
        if rel == 'instance':
            nd[src]['pred'] = tgt
        elif rel == 'lnk':
            cfrom, cto = tgt.strip('"<>').split(':')
            nd[src]['lnk'] = Lnk.charspan(int(cfrom), int(cto))
        elif rel == 'carg':
            if (tgt[0], tgt[-1]) == ('"', '"'):
                tgt = tgt[1:-1]
            nd[src]['carg'] = tgt
        elif rel == CVARSORT:
            nd[src]['type'] = tgt
        elif rel.islower():
            nd[src]['props'][rel] = tgt
        else:
            rargname, post = rel.rsplit('-', 1)
            edges.append((src, tgt, rargname, post))
    nidmap = dict((nid, FIRST_NODE_ID + i) for i, nid in enumerate(nids))
    nodes = [
        Node(id=nidmap[nid],
             predicate=nd[nid]['pred'],
             type=nd[nid]['type'],
             properties=nd[nid]['props'],
             lnk=nd[nid]['lnk'],
             carg=nd[nid]['carg']) for i, nid in enumerate(nids)
    ]
    links = [Link(nidmap[s], nidmap[t], r, p) for s, t, r, p in edges]
    return DMRS(top=nidmap[top],
                nodes=nodes,
                links=links,
                lnk=lnk,
                surface=surface,
                identifier=identifier)
Пример #8
0
def test_len_change_with_capturing_group():
    x = r.from_string(r"!wo(n't)	will \1").apply("I won't go")
    assert x.string == "I will n't go"
    assert x.startmap.tolist() == [
        1, 0, 0, 0, -1, -2, -3, -4, -3, -3, -3, -3, -3, -3, -3
    ]
    assert x.endmap.tolist() == [
        0, 0, 0, 1, 0, -1, -2, -3, -3, -3, -3, -3, -3, -3, -4
    ]

    x = r.from_string(r"!wo(n't)	will \1").tokenize("I won't go")
    assert len(x.tokens) == 4
    assert x.tokens[0].form == 'I'
    assert x.tokens[0].lnk == Lnk.charspan(0, 1)
    assert x.tokens[1].form == 'will'
    assert x.tokens[1].lnk == Lnk.charspan(2, 4)
    assert x.tokens[2].form == "n't"
    assert x.tokens[2].lnk == Lnk.charspan(4, 7)
    assert x.tokens[3].form == 'go'
    assert x.tokens[3].lnk == Lnk.charspan(8, 10)
Пример #9
0
 def test_from_dict(self):
     t = YYToken.from_dict({'id': 1, 'start': 0, 'end': 1, 'form': "dog"})
     check_token(t, 1, 0, 1, None, [1], "dog", None, 0, ["null"], [])
     t = YYToken.from_dict({
         'id': 1, 'start': 0, 'end': 1, 'from': 0, 'to': 1,
         # 'paths': [1],
         'form': "dog", 'surface': "Dog",
         # 'ipos': 0, 'lrules': ["null"],
         'tags': ["NN"], 'probabilities': [1.0]
     })
     check_token(t, 1, 0, 1, Lnk.charspan(0, 1), [1], "dog", "Dog",
                 0, ["null"], [("NN", 1.0)])
Пример #10
0
 def test_to_dict(self):
     t = YYToken(1, 0, 1, form="dog")
     assert t.to_dict() == {'id': 1, 'start': 0, 'end': 1, 'form': "dog"}
     t = YYToken(1, 0, 1, Lnk.charspan(0, 1), [1], "dog", "Dog",
                 ipos=0, lrules=["null"], pos=[("NN", 1.0)])
     assert t.to_dict() == {
         'id': 1, 'start': 0, 'end': 1, 'from': 0, 'to': 1,
         # 'paths': [1],
         'form': "dog", 'surface': "Dog",
         # 'ipos': 0, 'lrules': ["null"],
         'tags': ["NN"], 'probabilities': [1.0]
     }
Пример #11
0
    def test_to_list(self):
        tl = YY([YYToken(1, 0, 1, form="dog")])
        assert tl.to_list() == [{'id': 1, 'start': 0, 'end': 1, 'form': "dog"}]

        tl = YY([
            YYToken(1, 0, 1, Lnk.charspan(0, 4), [1], "dogs", "Dogs",
                    ipos=0, lrules=["null"], pos=[("NN", 1.0)]),
            YYToken(2, 1, 2, Lnk.charspan(5, 9), [1], "bark",
                    ipos=0, lrules=["null"], pos=[("VBZ", 1.0)])
        ])
        assert tl.to_list() == [
            {'id': 1, 'start': 0, 'end': 1, 'from': 0, 'to': 4,
             'form': "dogs", 'surface': "Dogs",
             # 'ipos': 0, 'lrules': ["null"],
             'tags': ["NN"], 'probabilities': [1.0]
            },
            {'id': 2, 'start': 1, 'end': 2, 'from': 5, 'to': 9,
             'form': "bark",
             # 'ipos': 0, 'lrules': ["null"],
             'tags': ["VBZ"], 'probabilities': [1.0]
            }
        ]
Пример #12
0
 def from_dict(cls, d):
     """
     Decode from a dictionary as from :meth:`to_dict`.
     """
     return cls(
         d['id'],
         d['start'],
         d['end'],
         Lnk.charspan(d['from'], d['to']) if 'from' in d else None,
         # d.get('paths', [1]),
         form=d['form'],
         surface=d.get('surface'),
         # ipos=
         # lrules=
         pos=list(zip(d.get('tags', []), d.get('probabilities', [])))
     )
Пример #13
0
 def test_init(self):
     with pytest.raises(TypeError):
         YYToken()
         YYToken(1)
         YYToken(1, 0)
         YYToken(1, 0, 1)
         YYToken(1, 0, 1, Lnk.charspan(0, 1))
         YYToken(1, 0, 1, Lnk.charspan(0, 1), [1])
         YYToken(1, 0, 1, Lnk.charspan(0, 1), [1], surface=".")
         YYToken(1, 0, 1, Lnk.charspan(0, 1), [1], surface=".", ipos=0)
         YYToken(1, 0, 1, Lnk.charspan(0, 1), [1], surface=".",
                 ipos=0, lrules=["null"])
         YYToken(1, 0, 1, Lnk.charspan(0, 1), [1], surface=".",
                 ipos=0, lrules=["null"], pos=[(".", 1.0)])
     t = YYToken(1, 0, 1, form="dog")
     check_token(t, 1, 0, 1, None, [1], "dog", None, 0, ["null"], [])
     t = YYToken(1, 0, 1, Lnk.charspan(0, 1), [1], "dog", "Dog",
                 ipos=0, lrules=["null"], pos=[("NN", 1.0)])
     check_token(t, 1, 0, 1, Lnk.charspan(0, 1), [1], "dog", "Dog",
                 0, ["null"], [("NN", 1.0)])
Пример #14
0
def from_dict(d):
    """
    Decode a dictionary, as from :func:`to_dict`, into an EDS object.
    """
    top = d.get('top')
    nodes = []
    for nodeid, node in d.get('nodes', {}).items():
        props = node.get('properties', None)
        nodetype = node.get('type')
        lnk = None
        if 'lnk' in node:
            lnk = Lnk.charspan(node['lnk']['from'], node['lnk']['to'])
        nodes.append(
            Node(id=nodeid,
                 predicate=node['label'],
                 type=nodetype,
                 edges=node.get('edges', {}),
                 properties=props,
                 carg=node.get('carg'),
                 lnk=lnk))
    nodes.sort(key=lambda n: (n.cfrom, -n.cto))
    return EDS(top, nodes=nodes)
Пример #15
0
 def test__eq__(self):
     assert Lnk(None) == Lnk(None)
     assert Lnk(None) != Lnk.charspan(0, 1)
     assert Lnk.charspan(0, 1) == Lnk.charspan(0, 1)
     assert Lnk.charspan(0, 1) != Lnk.charspan(0, 2)
     assert Lnk.charspan(0, 1) != Lnk.chartspan(0, 1)
Пример #16
0
 def _lnk(o):
     return None if o is None else Lnk.charspan(o['from'], o['to'])
Пример #17
0
 def test_fromstring(self):
     assert len(YY.from_string(token_v1_basic).tokens) == 1
     t = YY.from_string(token_v1_basic).tokens[0]
     check_token(t, 1, 0, 1, None, [1], "dog", None, 0, ["null"], [])
     t = YY.from_string(token_v1_surface).tokens[0]
     check_token(t, 1, 0, 1, None, [1], "dog", "Dog", 0, ["null"], [])
     t = YY.from_string(token_v1_pos).tokens[0]
     check_token(t, 1, 0, 1, None, [1], "dog", None, 0, ["null"],
                 [("NN", 0.8), ("VV", 0.2)])
     t = YY.from_string(token_v1_surface_pos).tokens[0]
     check_token(t, 1, 0, 1, None, [1], "dog", "Dog", 0, ["null"],
                 [("NN", 1.0)])
     t = YY.from_string(token_v1_lrules).tokens[0]
     check_token(t, 1, 0, 1, None, [1], "dog", None, 0,
                 ["lrule1", "lrule2"], [])
     t = YY.from_string(token_v2).tokens[0]
     check_token(t, 1, 0, 1, Lnk.charspan(1, 3), [1], "dog", "Dog",
                 0, ["null"], [("NN", 1.0)])
     tl = YY.from_string(tokenstring)
     assert len(tl.tokens) == 9
     check_token(
         tl.tokens[0],
         42, 0, 1, Lnk.charspan(0, 12), [1], "Tokenization", None,
         0, ["null"], [("NNP", 0.7677), ("NN", 0.2323)]
     )
     check_token(
         tl.tokens[1],
         43, 1, 2, Lnk.charspan(12, 13), [1], ",", None,
         0, ["null"], [(",", 1.0000)]
     )
     check_token(
         tl.tokens[2],
         44, 2, 3, Lnk.charspan(14, 15), [1], "a", None,
         0, ["null"], [("DT", 1.0000)]
     )
     check_token(
         tl.tokens[3],
         45, 3, 4, Lnk.charspan(16, 27), [1], "non-trivial", None,
         0, ["null"], [("JJ", 1.0000)]
     )
     check_token(
         tl.tokens[4],
         46, 4, 5, Lnk.charspan(28, 36), [1], "exercise", None,
         0, ["null"], [("NN", 0.9887), ("VB", 0.0113)]
     )
     check_token(
         tl.tokens[5],
         47, 5, 6, Lnk.charspan(36, 37), [1], ",", None,
         0, ["null"], [(",", 1.0000)]
     )
     check_token(
         tl.tokens[6],
         48, 6, 7, Lnk.charspan(38, 44), [1], "bazed", None,
         0, ["null"], [("VBD", 0.5975), ("VBN", 0.4025)]
     )
     check_token(
         tl.tokens[7],
         49, 7, 8, Lnk.charspan(45, 58), [1], "*****@*****.**", None,
         0, ["null"], [("NN", 0.7342), ("JJ", 0.2096)]
     )
     check_token(
         tl.tokens[8],
         50, 8, 9, Lnk.charspan(58, 59), [1], ".", None,
         0, ["null"], [(".", 1.0000)]
     )
Пример #18
0
 def _lnk(x):
     return None if x is None else Lnk.charspan(x['from'], x['to'])
Пример #19
0
def _decode_lnk(elem):
    return Lnk.charspan(elem.get('cfrom', '-1'), elem.get('cto', '-1'))
Пример #20
0
 def __init__(self):
     self.lnk = Lnk.charspan(0, 1)