Exemplo n.º 1
0
def left_loops(node):
    '''
    Return (an estimate of) all left-recursive loops from the given node.
    
    We cannot know for certain whether a loop is left recursive because we
    don't know exactly which parsers will consume data.  But we can estimate
    by assuming that all matchers eventually (ie via their children) consume
    something.  We can also improve that slightly by ignoring `Lookahead`.
    
    So we estimate left-recursive loops as paths that start and end at
    the given node, and which are first children of intermediate nodes
    unless the node is `Or`, or the preceding matcher is a
    `Lookahead`.  
    
    Each loop is a list that starts and ends with the given node.
    '''
    from lepl.matchers.combine import Or
    from lepl.matchers.core import Lookahead
    stack = [[node]]
    known = set([node]) # avoid getting lost in embedded loops
    while stack:
        ancestors = stack.pop()
        parent = ancestors[-1]
        if isinstance(parent, Matcher):
            for child in parent:
                family = list(ancestors) + [child]
                if child is node:
                    yield family
                else:
                    if child not in known:
                        stack.append(family)
                        known.add(child)
                if not is_child(parent, Or, fail=False) and \
                        not is_child(child, Lookahead, fail=False):
                    break
Exemplo n.º 2
0
def left_loops(node):
    '''
    Return (an estimate of) all left-recursive loops from the given node.
    
    We cannot know for certain whether a loop is left recursive because we
    don't know exactly which parsers will consume data.  But we can estimate
    by assuming that all matchers eventually (ie via their children) consume
    something.  We can also improve that slightly by ignoring `Lookahead`.
    
    So we estimate left-recursive loops as paths that start and end at
    the given node, and which are first children of intermediate nodes
    unless the node is `Or`, or the preceding matcher is a
    `Lookahead`.  
    
    Each loop is a list that starts and ends with the given node.
    '''
    from lepl.matchers.combine import Or
    from lepl.matchers.core import Lookahead
    stack = [[node]]
    known = set([node])  # avoid getting lost in embedded loops
    while stack:
        ancestors = stack.pop()
        parent = ancestors[-1]
        if isinstance(parent, Matcher):
            for child in parent:
                family = list(ancestors) + [child]
                if child is node:
                    yield family
                else:
                    if child not in known:
                        stack.append(family)
                        known.add(child)
                if not is_child(parent, Or, fail=False) and \
                        not is_child(child, Lookahead, fail=False):
                    break
Exemplo n.º 3
0
 def add(self, type_, node):
     '''
     Add a node of a given type.
     '''
     try:
         node_type = matcher_type(node)
     except MatcherTypeException:
         node_type = type(node)
     if type_ & LEAF:
         self.leaves += 1
     if type_ & NONTREE and is_child(node_type, Matcher, fail=False):
         self.loops += 1
     try:
         if node not in self.__known:
             self.__known.add(node)
             if node_type not in self.types:
                 self.types[node_type] = set()
             self.types[node_type].add(node)
             if is_child(node_type, Matcher):
                 self.total += 1
             else:
                 self.others += 1
         else:
             self.duplicates += 1
     except:
         self.unhashable += 1
Exemplo n.º 4
0
 def add(self, type_, node):
     '''
     Add a node of a given type.
     '''
     try:
         node_type = matcher_type(node)
     except MatcherTypeException:
         node_type = type(node)
     if type_ & LEAF:
         self.leaves += 1
     if type_ & NONTREE and is_child(node_type, Matcher, fail=False):
         self.loops += 1
     try:
         if node not in self.__known:
             self.__known.add(node)
             if node_type not in self.types:
                 self.types[node_type] = set()
             self.types[node_type].add(node)
             if is_child(node_type, Matcher):
                 self.total += 1
             else:
                 self.others += 1
         else:
             self.duplicates += 1
     except:
         self.unhashable += 1
Exemplo n.º 5
0
 def new_clone(i, j, node, args, kargs):
     type_, ok = None, False
     for parent in self.spec:
         if is_child(node, parent):
             type_ = self.spec[parent]
     if type_:
         ok = True
         for arg in args:
             if isinstance(arg, Matcher) and not \
                     isinstance(arg, NoTrampoline):
                 ok = False
         for name in kargs:
             arg = kargs[name]
             if isinstance(arg, Matcher) and not \
                     isinstance(arg, NoTrampoline):
                 ok = False
     if not ok:
         type_ = type(node)
     try:
         copy = type_(*args, **kargs)
         copy_standard_attributes(node, copy)
         return copy
     except TypeError as err:
         raise TypeError(fmt('Error cloning {0} with ({1}, {2}): {3}',
                                type_, args, kargs, err))
Exemplo n.º 6
0
 def new_clone(node, args, kargs):
     type_, ok = None, False
     for parent in self.spec:
         if is_child(node, parent):
             type_ = self.spec[parent]
     if type_:
         ok = True
         for arg in args:
             if isinstance(arg, Matcher) and not \
                     isinstance(arg, NoTrampolineTransformableWrapper):
                 ok = False
         for name in kargs:
             arg = kargs[name]
             if isinstance(arg, Matcher) and not \
                     isinstance(arg, NoTrampolineTransformableWrapper):
                 ok = False
     if not ok:
         type_ = type(node)
     try:
         copy = type_(*args, **kargs)
         copy_standard_attributes(node, copy)
         return copy
     except TypeError as err:
         raise TypeError(
             format('Error cloning {0} with ({1}, {2}): {3}', type_,
                    args, kargs, err))
Exemplo n.º 7
0
 def test_and(self):
     matcher = (Any() & Optional(Any())) > append('x')
     matcher.config.clear().compose_transforms()
     parser = matcher.get_parse()
     result = parser('a')[0]
     assert result == 'ax', result
     assert is_child(parser.matcher, And), type(parser.matcher)
Exemplo n.º 8
0
def find_tokens(matcher):
    '''
    Returns a set of Tokens.  Also asserts that children of tokens are
    not themselves Tokens. 
    
    Should we also check that a Token occurs somewhere on every path to a
    leaf node?
    '''
    (tokens, visited, non_tokens) = (set(), set(), set())
    stack = deque([matcher])
    while stack:
        matcher = stack.popleft()
        if matcher not in visited:
            if is_child(matcher, NonToken):
                non_tokens.add(matcher)
            if isinstance(matcher, BaseToken):
                tokens.add(matcher)
                if matcher.content:
                    assert_not_token(matcher.content, visited)
            else:
                for child in matcher:
                    if isinstance(child, Matcher):
                        stack.append(child)
            visited.add(matcher)
    if tokens and non_tokens:
        raise LexerError(
            format('The grammar contains a mix of Tokens and non-Token '
                   'matchers at the top level. If Tokens are used then '
                   'non-token matchers that consume input must only '
                   'appear "inside" Tokens.  The non-Token matchers '
                   'include: {0}.',
                   '; '.join(str(n) for n in non_tokens)))
    return tokens
Exemplo n.º 9
0
def find_tokens(matcher):
    '''
    Returns a set of Tokens.  Also asserts that children of tokens are
    not themselves Tokens. 
    
    Should we also check that a Token occurs somewhere on every path to a
    leaf node?
    '''
    (tokens, visited, non_tokens) = (set(), set(), set())
    stack = deque([matcher])
    while stack:
        matcher = stack.popleft()
        if matcher not in visited:
            if is_child(matcher, NonToken):
                non_tokens.add(matcher)
            if isinstance(matcher, BaseToken):
                tokens.add(matcher)
                if matcher.content:
                    assert_not_token(matcher.content, visited)
            else:
                for child in matcher:
                    if isinstance(child, Matcher):
                        stack.append(child)
            visited.add(matcher)
    if tokens and non_tokens:
        raise LexerError(
            format(
                'The grammar contains a mix of Tokens and non-Token '
                'matchers at the top level. If Tokens are used then '
                'non-token matchers that consume input must only '
                'appear "inside" Tokens.  The non-Token matchers '
                'include: {0}.', '; '.join(str(n) for n in non_tokens)))
    return tokens
Exemplo n.º 10
0
 def assert_children(self, b):
     '''
     Check children are non-None.
     '''
     #        print('>>>{0!s}<<<'.format(b))
     assert is_child(b, Or)
     for child in b.matchers:
         assert child
Exemplo n.º 11
0
def RMemo(matcher):
    '''
    Wrap in the _RMemo cache if required.
    '''
    if is_child(matcher, NoMemo, fail=False):
        return matcher
    else:
        return _RMemo(matcher)
Exemplo n.º 12
0
    def assert_children(self, b):
        '''
        Check children are non-None.
        '''
#        print('>>>{0!s}<<<'.format(b))
        assert is_child(b, Or)
        for child in b.matchers:
            assert child
Exemplo n.º 13
0
def RMemo(matcher):
    '''
    Wrap in the _RMemo cache if required.
    '''
    if is_child(matcher, NoMemo, fail=False):
        return matcher
    else:
        return _RMemo(matcher)
Exemplo n.º 14
0
 def _fmt_repr(self, indent, value, visited, key=None):
     prefix = (' ' * indent) + (key + '=' if key else '')
     if is_child(value, Matcher, fail=False):
         if value in visited:
             return prefix + '[' + value._small_str + ']'
         else:
             return value._indented_repr(indent, visited, key)
     else:
         return prefix + repr(value)
Exemplo n.º 15
0
 def _fmt_repr(self, indent, value, visited, key=None):
     prefix = (' ' * indent) + (key + '=' if key else '')
     if is_child(value, Matcher, fail=False):
         if value in visited:
             return prefix + '[' + value._small_str + ']'
         else:
             return value._indented_repr(indent, visited, key)
     else:
         return prefix + repr(value)
Exemplo n.º 16
0
def LMemo(matcher, curtail=None):
    '''
    Wrap in the _LMemo cache if required.
    '''
    if is_child(matcher, NoMemo, fail=False):
        return matcher
    else:
        if curtail is None:
            curtail = lambda depth, length: depth > length
        return _LMemo(matcher, curtail)
Exemplo n.º 17
0
def LMemo(matcher, curtail=None):
    '''
    Wrap in the _LMemo cache if required.
    '''
    if is_child(matcher, NoMemo, fail=False):
        return matcher
    else:
        if curtail is None:
            curtail = lambda depth, length: depth > length
        return _LMemo(matcher, curtail)
Exemplo n.º 18
0
 def __call__(self, graph):
     from lepl.matchers.core import Delayed
     from lepl.matchers.combine import Or
     for delayed in [x for x in preorder(graph, Matcher) 
                     if isinstance(x, Delayed)]:
         for loop in either_loops(delayed, self.conservative):
             for i in range(len(loop)):
                 if is_child(loop[i], Or, fail=False):
                     # we cannot be at the end of the list here, since that
                     # is a Delayed instance
                     # copy from tuple to list
                     loop[i].matchers = list(loop[i].matchers)
                     matchers = loop[i].matchers
                     target = loop[i+1]
                     # move target to end of list
                     index = matchers.index(target)
                     del matchers[index]
                     matchers.append(target)
     return graph
Exemplo n.º 19
0
 def __call__(self, graph):
     self._warn('Alternatives are being re-ordered to improve stability with left-recursion.\n'
                'This will change the ordering of results.')
     #raise Exception('wtf')
     for delayed in [x for x in preorder(graph, Matcher) 
                     if isinstance(x, Delayed)]:
         for loop in either_loops(delayed, self.conservative):
             for i in range(len(loop)):
                 if is_child(loop[i], Or, fail=False):
                     # we cannot be at the end of the list here, since that
                     # is a Delayed instance
                     # copy from tuple to list
                     loop[i].matchers = list(loop[i].matchers)
                     matchers = loop[i].matchers
                     target = loop[i+1]
                     # move target to end of list
                     index = matchers.index(target)
                     del matchers[index]
                     matchers.append(target)
     return graph
Exemplo n.º 20
0
 def __call__(self, graph):
     from lepl.matchers.core import Delayed
     from lepl.matchers.combine import Or
     for delayed in [
             x for x in preorder(graph, Matcher) if isinstance(x, Delayed)
     ]:
         for loop in either_loops(delayed, self.conservative):
             for i in range(len(loop)):
                 if is_child(loop[i], Or, fail=False):
                     # we cannot be at the end of the list here, since that
                     # is a Delayed instance
                     # copy from tuple to list
                     loop[i].matchers = list(loop[i].matchers)
                     matchers = loop[i].matchers
                     target = loop[i + 1]
                     # move target to end of list
                     index = matchers.index(target)
                     del matchers[index]
                     matchers.append(target)
     return graph
Exemplo n.º 21
0
 def __call__(self, graph):
     self._warn('Alternatives are being re-ordered to improve stability with left-recursion.\n'
                'This will change the ordering of results.')
     #raise Exception('wtf')
     for delayed in [x for x in preorder(graph, Matcher) 
                     if isinstance(x, Delayed)]:
         for loop in either_loops(delayed, self.conservative):
             for i in range(len(loop)):
                 if is_child(loop[i], Or, fail=False):
                     # we cannot be at the end of the list here, since that
                     # is a Delayed instance
                     # copy from tuple to list
                     loop[i].matchers = list(loop[i].matchers)
                     matchers = loop[i].matchers
                     target = loop[i+1]
                     # move target to end of list
                     index = matchers.index(target)
                     del matchers[index]
                     matchers.append(target)
     return graph
Exemplo n.º 22
0
 def non_optional_copy(matcher):
     '''
     Check whether a matcher is optional and, if so, make it not so.
     '''
     # both of the "copy" calls below make me nervous - it's not the
     # way the rest of lepl works - but i don't have any specific
     # criticism, or a good alternative.
     required, optional = matcher, False
     if isinstance(matcher, Transform):
         temp, optional = non_optional_copy(matcher.matcher)
         if optional:
             required = copy(matcher)
             required.matcher = temp
     elif is_child(matcher, BaseSearch, fail=False):
         # this introspection only works because Repeat sets named
         # (ie kargs) arguments. 
         optional = (matcher.start == 0)
         if optional:
             required = copy(matcher)
             required.start = 1
             if required.stop == 1:
                 required = required.first
     return required, optional
Exemplo n.º 23
0
 def non_optional_copy(matcher):
     '''
     Check whether a matcher is optional and, if so, make it not so.
     '''
     # both of the "copy" calls below make me nervous - it's not the
     # way the rest of lepl works - but i don't have any specific
     # criticism, or a good alternative.
     required, optional = matcher, False
     if isinstance(matcher, Transform):
         temp, optional = non_optional_copy(matcher.matcher)
         if optional:
             required = copy(matcher)
             required.matcher = temp
     elif is_child(matcher, BaseSearch, fail=False):
         # this introspection only works because Repeat sets named
         # (ie kargs) arguments.
         optional = (matcher.start == 0)
         if optional:
             required = copy(matcher)
             required.start = 1
             if required.stop == 1:
                 required = required.first
     return required, optional