示例#1
0
def test_with_iterative_groups():
    x = r.from_string('#1\n'
                      r'!(^| )([()%,])([^ ])	\1\2 \3'
                      '\n'
                      r'!([^ ])([()%,])( |$)	\1 \2\3'
                      '\n'
                      '#\n'
                      '>1').apply('(42%),')
    assert x.string == '( 42 % ) ,'
    assert x.startmap.tolist() == [1, 0, 0, -1, -1, -1, -2, -2, -3, -3, -4, -4]
    assert x.endmap.tolist() == [0, 0, -1, -1, -1, -2, -2, -3, -3, -4, -4, -5]

    x = r.from_string('#1\n'
                      r'!(^| )([()%,])([^ ])	\1\2 \3'
                      '\n'
                      r'!([^ ])([()%,])( |$)	\1 \2\3'
                      '\n'
                      '#\n'
                      '>1').tokenize('(42%),')
    assert len(x.tokens) == 5
    assert x.tokens[0].form == '('
    assert x.tokens[0].lnk == Lnk.charspan(0, 1)
    assert x.tokens[1].form == '42'
    assert x.tokens[1].lnk == Lnk.charspan(1, 3)
    assert x.tokens[2].form == '%'
    assert x.tokens[2].lnk == Lnk.charspan(3, 4)
    assert x.tokens[3].form == ')'
    assert x.tokens[3].lnk == Lnk.charspan(4, 5)
    assert x.tokens[4].form == ','
    assert x.tokens[4].lnk == Lnk.charspan(5, 6)
示例#2
0
    def test_from_list(self):
        tl = YY.from_list(
            [{'id': 1, 'start': 0, 'end': 1, 'form': "dog"}]
        )
        assert tl.tokens == [YYToken(1, 0, 1, form="dog")]

        tl = YY.from_list(
            [
                {'id': 1, 'start': 0, 'end': 1, 'from': 0, 'to': 4,
                 'paths': [1], 'form': "dogs", 'surface': "Dogs",
                 # 'ipos': 0, 'lrules': ["null"],
                 'tags': ["NN"], 'probabilities': [1.0]
                },
                {'id': 1, 'start': 0, 'end': 1, 'from': 5, 'to': 9,
                 'paths': [1], 'form': "bark",
                 # 'ipos': 0, 'lrules': ["null"],
                 'tags': ["VBZ"], 'probabilities': [1.0]
                }
            ]
        )
        assert tl.tokens == [
            YYToken(1, 0, 1, Lnk.charspan(0, 4), [1], "dogs", "Dogs",
                    ipos=0, lrules=["null"], pos=[("NN", 1.0)]),
            YYToken(1, 0, 1, Lnk.charspan(5, 9), [1], "bark",
                    ipos=0, lrules=["null"], pos=[("VBZ", 1.0)])
        ]
示例#3
0
def abrams_barked_dmrs():
    d = DMRS(
        30, 30,
        nodes=[Node(10, 'udef_q'),
               Node(20, 'named', type='x',
                    carg='Abrams', lnk=Lnk.charspan(0,6)),
               Node(30, '_bark_v_1', type='e', properties={'TENSE': 'past'},
                    lnk=Lnk.charspan(7,13))],
        links=[Link(10, 20, 'RSTR', 'H'),
               Link(30, 20, 'ARG1', 'NEQ')],
        lnk=Lnk.charspan(0,14),
        surface='Abrams barked.',
        identifier='1000380')
    return d
示例#4
0
def _decode_lnk(cfrom, cto):
    if cfrom is cto is None:
        return None
    elif None in (cfrom, cto):
        raise ValueError('Both cfrom and cto, or neither, must be specified.')
    else:
        return Lnk.charspan(cfrom, cto)
示例#5
0
def from_triples(triples):
    """
    Decode triples, as from :func:`to_triples`, into an EDS object.
    """
    nids, nd = [], {}
    for src, rel, tgt in triples:
        rel = rel.lstrip(':')
        if src not in nd:
            nids.append(src)
            nd[src] = {'pred': None, 'type': None, 'edges': {},
                       'props': {}, 'lnk': None, 'carg': None}
        if rel == 'instance':
            nd[src]['pred'] = tgt
        elif rel == 'lnk':
            nd[src]['lnk'] = Lnk(tgt.strip('"'))
        elif rel == 'carg':
            if (tgt[0], tgt[-1]) == ('"', '"'):
                tgt = tgt[1:-1]
            nd[src]['carg'] = tgt
        elif rel == 'type':
            nd[src]['type'] = tgt
        elif rel.islower():
            nd[src]['props'][rel.upper()] = tgt
        else:
            nd[src]['edges'][rel] = tgt
    nodes = [Node(nid,
                  nd[nid]['pred'],
                  type=nd[nid]['type'],
                  edges=nd[nid]['edges'],
                  properties=nd[nid]['props'],
                  carg=nd[nid]['carg'],
                  lnk=nd[nid]['lnk'])
             for nid in nids]
    top = nids[0] if nids else None
    return EDS(top=top, nodes=nodes)
示例#6
0
def _decode_node(start, lexer):
    predicate = lexer.expect_type(SYMBOL).lower()
    lnk = Lnk(lexer.accept_type(LNK))
    carg = lexer.accept_type(CARG)
    nodetype, properties = _decode_properties(start, lexer)
    edges = _decode_edges(start, lexer)
    return Node(start, predicate, nodetype, edges, properties, carg, lnk)
示例#7
0
 def testChartSpanLnk(self):
     lnk = Lnk.chartspan(0, 1)
     assert lnk.type == Lnk.CHARTSPAN
     assert lnk.data == (0, 1)
     assert str(lnk) == '<0#1>'
     repr(lnk)  # no error
     lnk = Lnk.chartspan('0', '1')
     assert lnk.data == (0, 1)
     with pytest.raises(TypeError):
         Lnk.chartspan(1)
     with pytest.raises(TypeError):
         Lnk.chartspan([1, 2])
     with pytest.raises(TypeError):
         Lnk.chartspan(1, 2, 3)
     with pytest.raises(ValueError):
         Lnk.chartspan('a', 'b')
示例#8
0
 def from_string(cls, s):
     """
     Decode from the YY token lattice format.
     """
     def _qstrip(s):
         return s[1:-1]  # remove assumed quote characters
     tokens = []
     for match in _yy_re.finditer(s):
         d = match.groupdict()
         lnk, pos = None, []
         if d['lnkfrom'] is not None:
             lnk = Lnk.charspan(d['lnkfrom'], d['lnkto'])
         if d['pos'] is not None:
             ps = d['pos'].strip().split()
             pos = list(zip(map(_qstrip, ps[::2]), map(float, ps[1::2])))
         tokens.append(
             YYToken(
                 int(d['id']),
                 int(d['start']),
                 int(d['end']),
                 lnk,
                 list(map(int, d['paths'].strip().split())),
                 _qstrip(d['form']),
                 None if d['surface'] is None else _qstrip(d['surface']),
                 int(d['ipos']),
                 list(map(_qstrip, d['lrules'].strip().split())),
                 pos
             )
         )
     return cls(tokens)
示例#9
0
 def tokenize_result(self, result, pattern=DEFAULT_TOKENIZER):
     logger.info('tokenize_result(%r, %r)', result, pattern)
     tokens = [
         YYToken(id=i, start=i, end=(i + 1),
                 lnk=Lnk.charspan(tok[0], tok[1]),
                 form=tok[2])
         for i, tok in enumerate(_tokenize(result, pattern))
     ]
     return YYTokenLattice(tokens)
示例#10
0
def from_triples(triples):
    """
    Decode triples, as from :func:`to_triples`, into a DMRS object.
    """
    top = lnk = surface = identifier = None
    nids, nd, edges = [], {}, []
    for src, rel, tgt in triples:
        rel = rel.lstrip(':')
        src, tgt = str(src), str(tgt)  # in case penman converts ids to ints
        if src is None and rel == 'top':
            top = tgt
            continue
        elif src not in nd:
            if top is None:
                top = src
            nids.append(src)
            nd[src] = {
                'pred': None,
                'lnk': None,
                'type': None,
                'props': {},
                'carg': None
            }
        if rel == 'instance':
            nd[src]['pred'] = tgt
        elif rel == 'lnk':
            cfrom, cto = tgt.strip('"<>').split(':')
            nd[src]['lnk'] = Lnk.charspan(int(cfrom), int(cto))
        elif rel == 'carg':
            if (tgt[0], tgt[-1]) == ('"', '"'):
                tgt = tgt[1:-1]
            nd[src]['carg'] = tgt
        elif rel == CVARSORT:
            nd[src]['type'] = tgt
        elif rel.islower():
            nd[src]['props'][rel] = tgt
        else:
            rargname, post = rel.rsplit('-', 1)
            edges.append((src, tgt, rargname, post))
    nidmap = dict((nid, FIRST_NODE_ID + i) for i, nid in enumerate(nids))
    nodes = [
        Node(id=nidmap[nid],
             predicate=nd[nid]['pred'],
             type=nd[nid]['type'],
             properties=nd[nid]['props'],
             lnk=nd[nid]['lnk'],
             carg=nd[nid]['carg']) for i, nid in enumerate(nids)
    ]
    links = [Link(nidmap[s], nidmap[t], r, p) for s, t, r, p in edges]
    return DMRS(top=nidmap[top],
                nodes=nodes,
                links=links,
                lnk=lnk,
                surface=surface,
                identifier=identifier)
示例#11
0
def test_len_change_with_capturing_group():
    x = r.from_string(r"!wo(n't)	will \1").apply("I won't go")
    assert x.string == "I will n't go"
    assert x.startmap.tolist() == [
        1, 0, 0, 0, -1, -2, -3, -4, -3, -3, -3, -3, -3, -3, -3
    ]
    assert x.endmap.tolist() == [
        0, 0, 0, 1, 0, -1, -2, -3, -3, -3, -3, -3, -3, -3, -4
    ]

    x = r.from_string(r"!wo(n't)	will \1").tokenize("I won't go")
    assert len(x.tokens) == 4
    assert x.tokens[0].form == 'I'
    assert x.tokens[0].lnk == Lnk.charspan(0, 1)
    assert x.tokens[1].form == 'will'
    assert x.tokens[1].lnk == Lnk.charspan(2, 4)
    assert x.tokens[2].form == "n't"
    assert x.tokens[2].lnk == Lnk.charspan(4, 7)
    assert x.tokens[3].form == 'go'
    assert x.tokens[3].lnk == Lnk.charspan(8, 10)
示例#12
0
 def __new__(cls, id, start, end,
             lnk=None, paths=(1,), form=None, surface=None,
             ipos=0, lrules=("null",), pos=()):
     if form is None:
         raise TypeError('Missing required keyword argument \'form\'.')
     if lnk is None:
         lnk = Lnk.default()
     return super(YYToken, cls).__new__(
         cls, id, start, end, lnk, list(paths), form, surface,
         ipos, list(lrules), list(pos)
     )
示例#13
0
 def test_from_dict(self):
     t = YYToken.from_dict({'id': 1, 'start': 0, 'end': 1, 'form': "dog"})
     check_token(t, 1, 0, 1, None, [1], "dog", None, 0, ["null"], [])
     t = YYToken.from_dict({
         'id': 1, 'start': 0, 'end': 1, 'from': 0, 'to': 1,
         # 'paths': [1],
         'form': "dog", 'surface': "Dog",
         # 'ipos': 0, 'lrules': ["null"],
         'tags': ["NN"], 'probabilities': [1.0]
     })
     check_token(t, 1, 0, 1, Lnk.charspan(0, 1), [1], "dog", "Dog",
                 0, ["null"], [("NN", 1.0)])
示例#14
0
 def test_to_dict(self):
     t = YYToken(1, 0, 1, form="dog")
     assert t.to_dict() == {'id': 1, 'start': 0, 'end': 1, 'form': "dog"}
     t = YYToken(1, 0, 1, Lnk.charspan(0, 1), [1], "dog", "Dog",
                 ipos=0, lrules=["null"], pos=[("NN", 1.0)])
     assert t.to_dict() == {
         'id': 1, 'start': 0, 'end': 1, 'from': 0, 'to': 1,
         # 'paths': [1],
         'form': "dog", 'surface': "Dog",
         # 'ipos': 0, 'lrules': ["null"],
         'tags': ["NN"], 'probabilities': [1.0]
     }
示例#15
0
 def testEdgeLnk(self):
     lnk = Lnk.edge(1)
     assert lnk.type == Lnk.EDGE
     assert lnk.data == 1
     assert str(lnk) == '<@1>'
     repr(lnk)  # no error
     lnk = Lnk.edge('1')
     assert lnk.data == 1
     with pytest.raises(TypeError):
         Lnk.edge(None)
     with pytest.raises(TypeError):
         Lnk.edge((1, ))
     with pytest.raises(ValueError):
         Lnk.edge('a')
示例#16
0
 def test__bool__(self):
     assert not Lnk(None)
     assert not Lnk.charspan(-1, -1)
     assert Lnk.charspan(0, 0)
     assert Lnk.chartspan(0, 0)
     assert Lnk.tokens([])
     assert Lnk.edge(0)
示例#17
0
 def from_dict(cls, d):
     """
     Decode from a dictionary as from :meth:`to_dict`.
     """
     return cls(
         d['id'],
         d['start'],
         d['end'],
         Lnk.charspan(d['from'], d['to']) if 'from' in d else None,
         # d.get('paths', [1]),
         form=d['form'],
         surface=d.get('surface'),
         # ipos=
         # lrules=
         pos=list(zip(d.get('tags', []), d.get('probabilities', [])))
     )
示例#18
0
 def testTokensLnk(self):
     lnk = Lnk.tokens([1, 2, 3])
     assert lnk.type == Lnk.TOKENS
     assert lnk.data == (1, 2, 3)
     assert str(lnk) == '<1 2 3>'
     repr(lnk)  # no error
     lnk = Lnk.tokens(['1'])
     assert lnk.data == (1, )
     # empty tokens list might be invalid, but accept for now
     lnk = Lnk.tokens([])
     assert lnk.data == tuple()
     with pytest.raises(TypeError):
         Lnk.tokens(1)
     with pytest.raises(ValueError):
         Lnk.tokens(['a', 'b'])
示例#19
0
 def test_init(self):
     with pytest.raises(TypeError):
         YYToken()
         YYToken(1)
         YYToken(1, 0)
         YYToken(1, 0, 1)
         YYToken(1, 0, 1, Lnk.charspan(0, 1))
         YYToken(1, 0, 1, Lnk.charspan(0, 1), [1])
         YYToken(1, 0, 1, Lnk.charspan(0, 1), [1], surface=".")
         YYToken(1, 0, 1, Lnk.charspan(0, 1), [1], surface=".", ipos=0)
         YYToken(1, 0, 1, Lnk.charspan(0, 1), [1], surface=".",
                 ipos=0, lrules=["null"])
         YYToken(1, 0, 1, Lnk.charspan(0, 1), [1], surface=".",
                 ipos=0, lrules=["null"], pos=[(".", 1.0)])
     t = YYToken(1, 0, 1, form="dog")
     check_token(t, 1, 0, 1, None, [1], "dog", None, 0, ["null"], [])
     t = YYToken(1, 0, 1, Lnk.charspan(0, 1), [1], "dog", "Dog",
                 ipos=0, lrules=["null"], pos=[("NN", 1.0)])
     check_token(t, 1, 0, 1, Lnk.charspan(0, 1), [1], "dog", "Dog",
                 0, ["null"], [("NN", 1.0)])
示例#20
0
def from_dict(d):
    """
    Decode a dictionary, as from :func:`to_dict`, into an EDS object.
    """
    top = d.get('top')
    nodes = []
    for nodeid, node in d.get('nodes', {}).items():
        props = node.get('properties', None)
        nodetype = node.get('type')
        lnk = None
        if 'lnk' in node:
            lnk = Lnk.charspan(node['lnk']['from'], node['lnk']['to'])
        nodes.append(
            Node(id=nodeid,
                 predicate=node['label'],
                 type=nodetype,
                 edges=node.get('edges', {}),
                 properties=props,
                 carg=node.get('carg'),
                 lnk=lnk))
    nodes.sort(key=lambda n: (n.cfrom, -n.cto))
    return EDS(top, nodes=nodes)
示例#21
0
 def _lnk(x):
     return None if x is None else Lnk.charspan(x['from'], x['to'])
示例#22
0
 def test_raw_init(self):
     with pytest.raises(TypeError):
         Lnk()
     # don't allow just any Lnk type
     with pytest.raises(LnkError):
         Lnk('lnktype', (0, 1))
示例#23
0
 def test_fromstring(self):
     assert len(YY.from_string(token_v1_basic).tokens) == 1
     t = YY.from_string(token_v1_basic).tokens[0]
     check_token(t, 1, 0, 1, None, [1], "dog", None, 0, ["null"], [])
     t = YY.from_string(token_v1_surface).tokens[0]
     check_token(t, 1, 0, 1, None, [1], "dog", "Dog", 0, ["null"], [])
     t = YY.from_string(token_v1_pos).tokens[0]
     check_token(t, 1, 0, 1, None, [1], "dog", None, 0, ["null"],
                 [("NN", 0.8), ("VV", 0.2)])
     t = YY.from_string(token_v1_surface_pos).tokens[0]
     check_token(t, 1, 0, 1, None, [1], "dog", "Dog", 0, ["null"],
                 [("NN", 1.0)])
     t = YY.from_string(token_v1_lrules).tokens[0]
     check_token(t, 1, 0, 1, None, [1], "dog", None, 0,
                 ["lrule1", "lrule2"], [])
     t = YY.from_string(token_v2).tokens[0]
     check_token(t, 1, 0, 1, Lnk.charspan(1, 3), [1], "dog", "Dog",
                 0, ["null"], [("NN", 1.0)])
     tl = YY.from_string(tokenstring)
     assert len(tl.tokens) == 9
     check_token(
         tl.tokens[0],
         42, 0, 1, Lnk.charspan(0, 12), [1], "Tokenization", None,
         0, ["null"], [("NNP", 0.7677), ("NN", 0.2323)]
     )
     check_token(
         tl.tokens[1],
         43, 1, 2, Lnk.charspan(12, 13), [1], ",", None,
         0, ["null"], [(",", 1.0000)]
     )
     check_token(
         tl.tokens[2],
         44, 2, 3, Lnk.charspan(14, 15), [1], "a", None,
         0, ["null"], [("DT", 1.0000)]
     )
     check_token(
         tl.tokens[3],
         45, 3, 4, Lnk.charspan(16, 27), [1], "non-trivial", None,
         0, ["null"], [("JJ", 1.0000)]
     )
     check_token(
         tl.tokens[4],
         46, 4, 5, Lnk.charspan(28, 36), [1], "exercise", None,
         0, ["null"], [("NN", 0.9887), ("VB", 0.0113)]
     )
     check_token(
         tl.tokens[5],
         47, 5, 6, Lnk.charspan(36, 37), [1], ",", None,
         0, ["null"], [(",", 1.0000)]
     )
     check_token(
         tl.tokens[6],
         48, 6, 7, Lnk.charspan(38, 44), [1], "bazed", None,
         0, ["null"], [("VBD", 0.5975), ("VBN", 0.4025)]
     )
     check_token(
         tl.tokens[7],
         49, 7, 8, Lnk.charspan(45, 58), [1], "*****@*****.**", None,
         0, ["null"], [("NN", 0.7342), ("JJ", 0.2096)]
     )
     check_token(
         tl.tokens[8],
         50, 8, 9, Lnk.charspan(58, 59), [1], ".", None,
         0, ["null"], [(".", 1.0000)]
     )
示例#24
0
 def test__eq__(self):
     assert Lnk(None) == Lnk(None)
     assert Lnk(None) != Lnk.charspan(0, 1)
     assert Lnk.charspan(0, 1) == Lnk.charspan(0, 1)
     assert Lnk.charspan(0, 1) != Lnk.charspan(0, 2)
     assert Lnk.charspan(0, 1) != Lnk.chartspan(0, 1)
示例#25
0
 def _lnk(o):
     return None if o is None else Lnk.charspan(o['from'], o['to'])
示例#26
0
 def __init__(self):
     self.lnk = Lnk.charspan(0, 1)
示例#27
0
def _decode_lnk(lexer):
    lnk = lexer.accept_type(LNK)
    if lnk is not None:
        lnk = Lnk(lnk)
    return lnk
示例#28
0
def _decode_lnk(elem):
    return Lnk.charspan(elem.get('cfrom', '-1'), elem.get('cto', '-1'))
示例#29
0
 def testDefault(self):
     lnk = Lnk.default()
     assert lnk.type == Lnk.UNSPECIFIED
     assert str(lnk) == ''
     repr(lnk)  # no error