Пример #1
0
 def new_clone(node, old_args, kargs):
     '''
     The flattening cloner.
     '''
     table = matcher_map({And: '*matchers', Or: '*matchers'})
     new_args = []
     type_ = matcher_type(node, fail=False)
     if type_ in table:
         attribute_name = table[type_]
         for arg in old_args:
             if matcher_type(arg, fail=False) is type_ \
                     and not arg.wrapper \
                     and not node.wrapper:
                 if attribute_name.startswith('*'):
                     new_args.extend(getattr(arg, attribute_name[1:]))
                 else:
                     new_args.append(getattr(arg, attribute_name))
             else:
                 new_args.append(arg)
     if not new_args:
         new_args = old_args
     return clone(node, new_args, kargs)
Пример #2
0
 def new_clone(node, old_args, kargs):
     '''
     The flattening cloner.
     '''
     table = matcher_map({And: '*matchers', Or: '*matchers'})
     new_args = []
     type_ = matcher_type(node, fail=False)
     if type_ in table:
         attribute_name = table[type_]
         for arg in old_args:
             if matcher_type(arg, fail=False) is type_ \
                     and not arg.wrapper \
                     and not node.wrapper:
                 if attribute_name.startswith('*'):
                     new_args.extend(getattr(arg, attribute_name[1:]))
                 else:
                     new_args.append(getattr(arg, attribute_name))
             else:
                 new_args.append(arg)
     if not new_args:
         new_args = old_args
     return clone(node, new_args, kargs)
Пример #3
0
def make_clone(alphabet_, old_clone, regexp_type, use_from_start):
    '''
    Factory that generates a clone suitable for rewriting recursive descent
    to regular expressions.
    '''
    
    # clone functions below take the "standard" clone and the node, and then
    # reproduce the normal argument list of the matcher being cloned.
    # they should return either a container or a matcher.
    
    # Avoid dependency loops
    from lepl.matchers.derived import add
    from lepl.matchers.combine import And, Or, DepthFirst
    from lepl.matchers.core import Any, Literal
    from lepl.matchers.transform import Transform

    log = getLogger('lepl.regexp.rewriters.make_clone')
    
    def clone_any(use, original, restrict=None):
        '''
        We can always convert Any() to a regular expression; the only question
        is whether we have an open range or not.
        '''
        if restrict is None:
            char = Character([(alphabet_.min, alphabet_.max)], alphabet_)
        else:
            char = Character(((char, char) for char in restrict), alphabet_)
        log.debug(format('Any: cloned {0}', char))
        regexp = Sequence(alphabet_, char)
        return RegexpContainer.build(original, regexp, alphabet_, 
                                     regexp_type, use)
        
    def clone_or(use, original, *matchers):
        '''
        We can convert an Or only if all the sub-matchers have possible
        regular expressions.
        '''
        (use, regexps) = \
            RegexpContainer.to_regexps(use, matchers, have_add=False)
        regexp = Choice(alphabet_, *regexps)
        log.debug(format('Or: cloned {0}', regexp))
        return RegexpContainer.build(original, regexp, alphabet_, 
                                     regexp_type, use)

    def clone_and(use, original, *matchers):
        '''
        We can convert an And only if all the sub-matchers have possible
        regular expressions, and even then we must tag the result unless
        an add transform is present.
        '''
        wrapper = original.wrapper.functions
        add_reqd = True
        if wrapper:
            if wrapper[0] is add:
                wrapper = wrapper[1:]
                add_reqd = False
            else:
                raise Unsuitable
        try:
            # combine all
            (use, regexps) = \
                RegexpContainer.to_regexps(use, matchers, have_add=None)
            # if we have regexp sub-expressions, join them
            regexp = Sequence(alphabet_, *regexps)
            log.debug(format('And: cloning {0}', regexp))
            return RegexpContainer.build(original, regexp, alphabet_, 
                                         regexp_type, use, add_reqd=add_reqd,
                                         wrapper=wrapper)
        except Unsuitable:
            # combine contiguous matchers where possible
            if add_reqd:
                raise
            def unpack(matcher):
                original = RegexpContainer.to_matcher(matcher)
                try:
                    return (original, 
                            RegexpContainer.to_regexps(use, [matcher], 
                                                       have_add=None)[1][0])
                except Unsuitable:
                    return (original, None)
            output = []
            (regexps, originals) = ([], [])
            for (matcher, regexp) in [unpack(matcher) for matcher in matchers]:
                if regexp:
                    regexps.append(regexp)
                    originals.append(matcher)
                else:
                    if len(regexps) > 1:
                        # combine regexps
                        output.append(
                            regexp_type(Sequence(alphabet_, *regexps), 
                                         alphabet_))
                    else:
                        output.extend(originals)
                    output.append(matcher)
                    (regexps, originals) = ([], [])
            if len(regexps) > 1:
                output.append(
                    regexp_type(Sequence(alphabet_, *regexps), alphabet_))
            else:
                output.extend(originals)
            merged = And(*output)
            return merged.compose(original.wrapper)
        
    def clone_transform(use, original, matcher, wrapper):
        '''
        We can assume that wrapper is a transformation.  add joins into
        a sequence.
        '''
        if original.wrapper:
            if original.wrapper.functions[0] is add:
                have_add = True
                wrapper = original.wrapper.functions[1:]
            else:
                have_add = False
                wrapper = original.wrapper.functions
        else:
            # punt to next level
            return matcher
        (use, [regexp]) = \
            RegexpContainer.to_regexps(use, [matcher], have_add=have_add)
        log.debug(format('Transform: cloning {0}', regexp))
        return RegexpContainer.build(original, regexp, alphabet_, 
                                     regexp_type, use,
                                     add_reqd=False, wrapper=wrapper)
        
    def clone_literal(use, original, text):
        '''
        Literal values are easy to transform.
        '''
        chars = [Character([(c, c)], alphabet_) for c in text]
        regexp = Sequence(alphabet_, *chars)
        log.debug(format('Literal: cloned {0}', regexp))
        return RegexpContainer.build(original, regexp, alphabet_, 
                                     regexp_type, use)
    
    def clone_regexp(use, original, pattern, alphabet=None):
        '''
        Regexps values are also easy.
        '''
        try:
            if isinstance(pattern, basestring):
                pattern = Sequence(alphabet_, *alphabet_.parse(pattern))
        except TypeError:
            raise Unsuitable
        return RegexpContainer.build(original, pattern, alphabet_, 
                                     regexp_type, use)
    
    def clone_dfs(use, original, first, start, stop, rest=None):
        '''
        This forces use=True as it is likely that a regexp is a gain.
        '''
        if stop is not None and start > stop:
            raise Unsuitable
        add_reqd = stop is None or stop > 1
        wrapper = False
        if original.wrapper:
            if original.wrapper.functions[0] is add:
                add_reqd = False
                wrapper = original.wrapper.functions[1:]
            else:
                raise Unsuitable
        rest = first if rest is None else rest
        (use, [first, rest]) = \
                RegexpContainer.to_regexps(True, [first, rest], have_add=None)
        seq = []
        if first != rest:
            seq.append(first.clone())
        while len(seq) < start:
            seq.append(rest.clone())
        addzero = len(seq) > start # first was exceptional and start=0
        if stop:
            if stop > start:
                # use nested form to avoid inefficient nfa
                extras = Option(alphabet_, rest.clone())
                for _i in range(stop - start - 1):
                    extras = Option(alphabet_, 
                                    Sequence(alphabet_, rest.clone(), extras))
                seq.append(extras)
        else:
            seq.append(Repeat(alphabet_, rest.clone()))
        regexp = Sequence(alphabet_, *seq)
        if addzero:
            regexp = Choice(alphabet_, regexp, Empty(alphabet_))
        log.debug(format('DFS: cloned {0}', regexp))
        return RegexpContainer.build(original, regexp, alphabet_, 
                                     regexp_type, use, add_reqd=add_reqd,
                                     wrapper=wrapper)
        
    def clone_wrapper(use, original, *args, **kargs):
        factory = original.factory
        if factory in map_:
            log.debug(format('Found {0}', factory))
            return map_[factory](use, original, *args, **kargs)
        else:
            log.debug(format('No clone for {0}, {1}', factory, map_.keys()))
            return original
        
    map_ = matcher_map({Any: clone_any, 
                        Or: clone_or, 
                        And: clone_and,
                        Transform: clone_transform,
                        Literal: clone_literal,
                        Regexp: clone_regexp,
                        NfaRegexp: clone_regexp,
                        DfaRegexp: clone_regexp,
                        DepthFirst: clone_dfs,
                        FunctionWrapper: clone_wrapper,
                        SequenceWrapper: clone_wrapper,
                        TrampolineWrapper: clone_wrapper,
                        TrampolineWrapper: clone_wrapper})
    
    def clone_(node, args, kargs):
        '''
        Do the cloning, dispatching by type to the methods above.
        '''
        original_args = [RegexpContainer.to_matcher(arg) for arg in args]
        original_kargs = dict((name, RegexpContainer.to_matcher(kargs[name]))
                              for name in kargs)
        original = old_clone(node, original_args, original_kargs)
        type_ = type(node)
        if type_ in map_:
            # pylint: disable-msg=W0142
            try:
                return map_[type_](use_from_start, original, *args, **kargs)
            except Unsuitable:
                pass
        return original

    return clone_
Пример #4
0
def make_clone(alphabet_, old_clone, regexp_type, use_from_start):
    '''
    Factory that generates a clone suitable for rewriting recursive descent
    to regular expressions.
    '''

    # clone functions below take the "standard" clone and the node, and then
    # reproduce the normal argument list of the matcher being cloned.
    # they should return either a container or a matcher.

    # Avoid dependency loops
    from lepl.matchers.derived import add
    from lepl.matchers.combine import And, Or, DepthFirst
    from lepl.matchers.core import Any, Literal
    from lepl.matchers.transform import Transform

    log = getLogger('lepl.regexp.rewriters.make_clone')

    def clone_any(use, original, restrict=None):
        '''
        We can always convert Any() to a regular expression; the only question
        is whether we have an open range or not.
        '''
        if restrict is None:
            char = Character([(alphabet_.min, alphabet_.max)], alphabet_)
        else:
            char = Character(((char, char) for char in restrict), alphabet_)
        log.debug(fmt('Any: cloned {0}', char))
        regexp = Sequence(alphabet_, char)
        return RegexpContainer.build(original, regexp, alphabet_, regexp_type,
                                     use)

    def clone_or(use, original, *matchers):
        '''
        We can convert an Or only if all the sub-matchers have possible
        regular expressions.
        '''
        (use, regexps) = \
            RegexpContainer.to_regexps(use, matchers, have_add=False)
        regexp = Choice(alphabet_, *regexps)
        log.debug(fmt('Or: cloned {0}', regexp))
        return RegexpContainer.build(original, regexp, alphabet_, regexp_type,
                                     use)

    def clone_and(use, original, *matchers):
        '''
        We can convert an And only if all the sub-matchers have possible
        regular expressions, and even then we must tag the result unless
        an add transform is present.
        '''
        if hasattr(original, 'wrapper'):
            wrapper = original.wrapper.functions
        else:
            wrapper = None
        add_reqd = True
        if wrapper:
            if wrapper[0] is add:
                wrapper = wrapper[1:]
                add_reqd = False
            else:
                raise Unsuitable
        try:
            # combine all
            (use, regexps) = \
                RegexpContainer.to_regexps(use, matchers, have_add=None)
            # if we have regexp sub-expressions, join them
            regexp = Sequence(alphabet_, *regexps)
            log.debug(fmt('And: cloning {0}', regexp))
            return RegexpContainer.build(original,
                                         regexp,
                                         alphabet_,
                                         regexp_type,
                                         use,
                                         add_reqd=add_reqd,
                                         wrapper=wrapper)
        except Unsuitable:
            # combine contiguous matchers where possible
            if add_reqd:
                raise

            def unpack(matcher):
                original = RegexpContainer.to_matcher(matcher)
                try:
                    return (original,
                            RegexpContainer.to_regexps(use, [matcher],
                                                       have_add=None)[1][0])
                except Unsuitable:
                    return (original, None)

            output = []
            (regexps, originals) = ([], [])
            for (matcher, regexp) in [unpack(matcher) for matcher in matchers]:
                if regexp:
                    regexps.append(regexp)
                    originals.append(matcher)
                else:
                    if len(regexps) > 1:
                        # combine regexps
                        output.append(
                            regexp_type(Sequence(alphabet_, *regexps),
                                        alphabet_))
                    else:
                        output.extend(originals)
                    output.append(matcher)
                    (regexps, originals) = ([], [])
            if len(regexps) > 1:
                output.append(
                    regexp_type(Sequence(alphabet_, *regexps), alphabet_))
            else:
                output.extend(originals)
            merged = And(*output)
            return merged.compose(original.wrapper)

    def clone_transform(use, original, matcher, wrapper):
        '''
        We can assume that wrapper is a transformation.  Add joins into
        a sequence.
        '''
        if original.wrapper:
            if original.wrapper.functions[0] is add:
                have_add = True
                wrapper = original.wrapper.functions[1:]
            else:
                have_add = False
                wrapper = original.wrapper.functions
        else:
            # punt to next level
            return matcher
        (use, [regexp]) = \
            RegexpContainer.to_regexps(use, [matcher], have_add=have_add)
        log.debug(fmt('Transform: cloning {0}', regexp))
        return RegexpContainer.build(original,
                                     regexp,
                                     alphabet_,
                                     regexp_type,
                                     use,
                                     add_reqd=False,
                                     wrapper=wrapper)

    def clone_literal(use, original, text):
        '''
        Literal values are easy to transform.
        '''
        chars = [Character([(c, c)], alphabet_) for c in text]
        regexp = Sequence(alphabet_, *chars)
        log.debug(fmt('Literal: cloned {0}', regexp))
        return RegexpContainer.build(original, regexp, alphabet_, regexp_type,
                                     use)

    def clone_regexp(use, original, pattern, alphabet=None):
        '''
        Regexps values are also easy.
        '''
        try:
            if isinstance(pattern, basestring):
                pattern = Sequence(alphabet_, *alphabet_.parse(pattern))
        except TypeError:
            raise Unsuitable
        except Error:  # cannot parse regexp
            raise Unsuitable
        return RegexpContainer.build(original, pattern, alphabet_, regexp_type,
                                     use)

    def clone_dfs(use,
                  original,
                  first,
                  start,
                  stop,
                  rest=None,
                  reduce=None,
                  generator_manager_queue_len=None):
        '''
        This forces use=True as it is likely that a regexp is a gain.
        '''
        if stop is not None and start > stop:
            raise Unsuitable
        if reduce and not (isinstance(reduce, tuple) and len(reduce) == 2
                           and reduce[0] == [] and reduce[1] == __add__):
            raise Unsuitable
        if generator_manager_queue_len:
            # this should only be set when running
            raise Unsuitable
        add_reqd = stop is None or stop > 1
        wrapper = False
        if hasattr(original, 'wrapper') and original.wrapper:
            if original.wrapper.functions[0] is add:
                add_reqd = False
                wrapper = original.wrapper.functions[1:]
            else:
                raise Unsuitable
        rest = first if rest is None else rest
        (use, [first, rest]) = \
                RegexpContainer.to_regexps(True, [first, rest], have_add=None)
        seq = []
        if first != rest:
            seq.append(first.clone())
        while len(seq) < start:
            seq.append(rest.clone())
        addzero = len(seq) > start  # first was exceptional and start=0
        if stop:
            if stop > start:
                # use nested form to avoid inefficient nfa
                extras = Option(alphabet_, rest.clone())
                for _i in range(stop - start - 1):
                    extras = Option(alphabet_,
                                    Sequence(alphabet_, rest.clone(), extras))
                seq.append(extras)
        else:
            seq.append(Repeat(alphabet_, rest.clone()))
        regexp = Sequence(alphabet_, *seq)
        if addzero:
            regexp = Choice(alphabet_, regexp, Empty(alphabet_))
        log.debug(fmt('DFS: cloned {0}', regexp))
        return RegexpContainer.build(original,
                                     regexp,
                                     alphabet_,
                                     regexp_type,
                                     use,
                                     add_reqd=add_reqd,
                                     wrapper=wrapper)

    def clone_wrapper(use, original, *args, **kargs):
        factory = original.factory
        if factory in map_:
            log.debug(fmt('Found {0}', factory))
            return map_[factory](use, original, *args, **kargs)
        else:
            log.debug(fmt('No clone for {0}, {1}', factory, map_.keys()))
            return original

    map_ = matcher_map({
        Any: clone_any,
        Or: clone_or,
        And: clone_and,
        AndNoTrampoline: clone_and,
        Transform: clone_transform,
        Literal: clone_literal,
        Regexp: clone_regexp,
        NfaRegexp: clone_regexp,
        DfaRegexp: clone_regexp,
        DepthFirst: clone_dfs,
        DepthNoTrampoline: clone_dfs,
        FunctionWrapper: clone_wrapper,
        SequenceWrapper: clone_wrapper,
        TrampolineWrapper: clone_wrapper,
        TransformableTrampolineWrapper: clone_wrapper
    })

    def clone_(i, j, node, args, kargs):
        '''
        Do the cloning, dispatching by type to the methods above.
        '''
        original_args = [RegexpContainer.to_matcher(arg) for arg in args]
        original_kargs = dict(
            (name, RegexpContainer.to_matcher(kargs[name])) for name in kargs)
        original = old_clone(i, j, node, original_args, original_kargs)
        type_ = type(node)
        if type_ in map_:
            # pylint: disable-msg=W0142
            try:
                return map_[type_](use_from_start, original, *args, **kargs)
            except Unsuitable:
                pass
        return original

    return clone_
Пример #5
0
def make_clone(alphabet, old_clone, matcher_type, use_from_start):
    '''
    Factory that generates a clone suitable for rewriting recursive descent
    to regular expressions.
    '''

    # clone functions below take the "standard" clone and the node, and then
    # reproduce the normal argument list of the matcher being cloned.
    # they should return either a container or a matcher.

    # Avoid dependency loops
    from lepl.matchers.derived import add
    from lepl.matchers.combine import And, Or, DepthFirst
    from lepl.matchers.core import Any, Literal
    from lepl.matchers.transform import Transformable, Transform, \
        TransformationWrapper

    log = getLogger('lepl.regexp.rewriters.make_clone')

    def clone_any(use, original, restrict=None):
        '''
        We can always convert Any() to a regular expression; the only question
        is whether we have an open range or not.
        '''
        if restrict is None:
            char = Character([(alphabet.min, alphabet.max)], alphabet)
        else:
            char = Character(((char, char) for char in restrict), alphabet)
        log.debug(format('Any: cloned {0}', char))
        regexp = Sequence([char], alphabet)
        return RegexpContainer.build(original, regexp, alphabet, matcher_type,
                                     use)

    def clone_or(use, original, *matchers):
        '''
        We can convert an Or only if all the sub-matchers have possible
        regular expressions.
        '''
        assert isinstance(original, Transformable)
        try:
            (use, regexps) = RegexpContainer.to_regexps(use, matchers)
            regexp = Choice(regexps, alphabet)
            log.debug(format('Or: cloned {0}', regexp))
            return RegexpContainer.build(original, regexp, alphabet,
                                         matcher_type, use)
        except Unsuitable:
            log.debug(format('Or not rewritten: {0}', original))
            return original

    def clone_and(use, original, *matchers):
        '''
        We can convert an And only if all the sub-matchers have possible
        regular expressions, and even then we must tag the result unless
        an add transform is present.
        '''
        assert isinstance(original, Transformable)
        try:
            # since we're going to require add anyway, we're happy to take
            # other inputs, whether add is required or not.
            (use, regexps) = \
                RegexpContainer.to_regexps(use, matchers, add_reqd=None)
            # if we have regexp sub-expressions, join them
            regexp = Sequence(regexps, alphabet)
            log.debug(format('And: cloning {0}', regexp))
            if use and len(original.wrapper.functions) > 1 \
                    and original.wrapper.functions[0] is add:
                # we have additional functions, so cannot take regexp higher,
                # but use is True, so return a new matcher.
                # hack to copy across other functions
                original.wrapper = \
                        TransformationWrapper(original.wrapper.functions[1:])
                log.debug('And: OK (final)')
                # NEED TEST FOR THIS
                return single(alphabet, original, regexp, matcher_type)
            elif len(original.wrapper.functions) == 1 \
                    and original.wrapper.functions[0] is add:
                # OR JUST ONE?
                # lucky!  we just combine and continue
                log.debug('And: OK')
                return RegexpContainer.build(original,
                                             regexp,
                                             alphabet,
                                             matcher_type,
                                             use,
                                             transform=False)
            elif not original.wrapper:
                # regexp can't return multiple values, so hope that we have
                # an add
                log.debug('And: add required')
                return RegexpContainer.build(original,
                                             regexp,
                                             alphabet,
                                             matcher_type,
                                             use,
                                             add_reqd=True)
            else:
                log.debug(
                    format('And: wrong transformation: {0!r}',
                           original.wrapper))
                return original
        except Unsuitable:
            log.debug(format('And: not rewritten: {0}', original))
            return original

    def clone_transform(use,
                        original,
                        matcher,
                        wrapper,
                        _raw=False,
                        _args=False):
        '''
        We can assume that wrapper is a transformation.  add joins into
        a sequence.
        '''
        assert isinstance(wrapper, TransformationWrapper)
        try:
            # this is the only place add is required
            (use, [regexp]) = RegexpContainer.to_regexps(use, [matcher],
                                                         add_reqd=True)
            log.debug(format('Transform: cloning {0}', regexp))
            if use and len(wrapper.functions) > 1 \
                    and wrapper.functions[0] is add:
                # we have additional functions, so cannot take regexp higher,
                # but use is True, so return a new matcher.
                # hack to copy across other functions
                original.wrapper = \
                    TransformationWrapper().extend(wrapper.functions[1:])
                log.debug('Transform: OK (final)')
                # NEED TEST FOR THIS
                return single(alphabet, original, regexp, matcher_type)
            elif len(wrapper.functions) == 1 and wrapper.functions[0] is add:
                # exactly what we wanted!  combine and continue
                log.debug('Transform: OK')
                return RegexpContainer.build(original,
                                             regexp,
                                             alphabet,
                                             matcher_type,
                                             use,
                                             transform=False)
            elif not wrapper:
                # we're just forwarding the add_reqd from before here
                log.debug('Transform: empty, add required')
                return RegexpContainer(original, regexp, use, add_reqd=True)
            else:
                log.debug(
                    format('Transform: wrong transformation: {0!r}',
                           original.wrapper))
                return original
        except Unsuitable:
            log.debug(format('Transform: not rewritten: {0}', original))
            return original

    def clone_literal(use, original, text):
        '''
        Literal values are easy to transform.
        '''
        chars = [Character([(c, c)], alphabet) for c in text]
        regexp = Sequence(chars, alphabet)
        log.debug(format('Literal: cloned {0}', regexp))
        return RegexpContainer.build(original, regexp, alphabet, matcher_type,
                                     use)

    def clone_dfs(use, original, first, start, stop, rest=None):
        '''
        We only convert DFS if start=0 or 1, stop=1 or None and first and 
        rest are both regexps.
        
        This forces use=True as it is likely that a regexp is a gain.
        '''
        assert not isinstance(original, Transformable)
        try:
            if start not in (0, 1) or stop not in (1, None):
                raise Unsuitable()
            (use, [first, rest]) = \
                    RegexpContainer.to_regexps(True, [first, rest])
            # we need to be careful here to get the depth first bit right
            if stop is None:
                regexp = Sequence([first, Repeat([rest], alphabet)], alphabet)
                if start == 0:
                    regexp = Choice([regexp, Empty(alphabet)], alphabet)
            else:
                regexp = first
                if start == 0:
                    regexp = Choice([regexp, Empty(alphabet)], alphabet)
            log.debug(format('DFS: cloned {0}', regexp))
            return RegexpContainer.build(original,
                                         regexp,
                                         alphabet,
                                         matcher_type,
                                         use,
                                         add_reqd=stop is None)
        except Unsuitable:
            log.debug(format('DFS: not rewritten: {0}', original))
            return original

    def clone_wrapper(use, original, *args, **kargs):
        factory = original.factory
        if factory in map_:
            log.debug(format('Found {0}', factory))
            return map_[factory](use, original, *args, **kargs)
        else:
            log.debug(format('No clone for {0}, {1}', factory, map_.keys()))
            return original

    map_ = matcher_map({
        Any: clone_any,
        Or: clone_or,
        And: clone_and,
        Transform: clone_transform,
        Literal: clone_literal,
        DepthFirst: clone_dfs,
        FunctionWrapper: clone_wrapper,
        SequenceWrapper: clone_wrapper,
        TrampolineWrapper: clone_wrapper,
        TransformableTrampolineWrapper: clone_wrapper
    })

    def clone_(node, args, kargs):
        '''
        Do the cloning, dispatching by type to the methods above.
        '''
        original_args = [RegexpContainer.to_matcher(arg) for arg in args]
        original_kargs = dict(
            (name, RegexpContainer.to_matcher(kargs[name])) for name in kargs)
        original = old_clone(node, original_args, original_kargs)
        type_ = type(node)
        if type_ in map_:
            # pylint: disable-msg=W0142
            return map_[type_](use_from_start, original, *args, **kargs)
        else:
            return original

    return clone_
Пример #6
0
def make_clone(alphabet, old_clone, matcher_type, use_from_start):
    '''
    Factory that generates a clone suitable for rewriting recursive descent
    to regular expressions.
    '''
    
    # clone functions below take the "standard" clone and the node, and then
    # reproduce the normal argument list of the matcher being cloned.
    # they should return either a container or a matcher.
    
    # Avoid dependency loops
    from lepl.matchers.derived import add
    from lepl.matchers.combine import And, Or, DepthFirst
    from lepl.matchers.core import Any, Literal
    from lepl.matchers.transform import Transformable, Transform, \
        TransformationWrapper

    log = getLogger('lepl.regexp.rewriters.make_clone')
    
    def clone_any(use, original, restrict=None):
        '''
        We can always convert Any() to a regular expression; the only question
        is whether we have an open range or not.
        '''
        if restrict is None:
            char = Character([(alphabet.min, alphabet.max)], alphabet)
        else:
            char = Character(((char, char) for char in restrict), alphabet)
        log.debug(format('Any: cloned {0}', char))
        regexp = Sequence([char], alphabet)
        return RegexpContainer.build(original, regexp, alphabet, 
                                     matcher_type, use)
        
    def clone_or(use, original, *matchers):
        '''
        We can convert an Or only if all the sub-matchers have possible
        regular expressions.
        '''
        assert isinstance(original, Transformable)
        try:
            (use, regexps) = RegexpContainer.to_regexps(use, matchers)
            regexp = Choice(regexps, alphabet)
            log.debug(format('Or: cloned {0}', regexp))
            return RegexpContainer.build(original, regexp, alphabet, 
                                         matcher_type, use)
        except Unsuitable:
            log.debug(format('Or not rewritten: {0}', original))
            return original

    def clone_and(use, original, *matchers):
        '''
        We can convert an And only if all the sub-matchers have possible
        regular expressions, and even then we must tag the result unless
        an add transform is present.
        '''
        assert isinstance(original, Transformable)
        try:
            # since we're going to require add anyway, we're happy to take
            # other inputs, whether add is required or not.
            (use, regexps) = \
                RegexpContainer.to_regexps(use, matchers, add_reqd=None)
            # if we have regexp sub-expressions, join them
            regexp = Sequence(regexps, alphabet)
            log.debug(format('And: cloning {0}', regexp))
            if use and len(original.wrapper.functions) > 1 \
                    and original.wrapper.functions[0] is add:
                # we have additional functions, so cannot take regexp higher,
                # but use is True, so return a new matcher.
                # hack to copy across other functions
                original.wrapper = \
                        TransformationWrapper(original.wrapper.functions[1:])
                log.debug('And: OK (final)')
                # NEED TEST FOR THIS
                return single(alphabet, original, regexp, matcher_type) 
            elif len(original.wrapper.functions) == 1 \
                    and original.wrapper.functions[0] is add:
                # OR JUST ONE?
                # lucky!  we just combine and continue
                log.debug('And: OK')
                return RegexpContainer.build(original, regexp, alphabet, 
                                             matcher_type, use, transform=False)
            elif not original.wrapper:
                # regexp can't return multiple values, so hope that we have
                # an add
                log.debug('And: add required')
                return RegexpContainer.build(original, regexp, alphabet, 
                                             matcher_type, use, add_reqd=True)
            else:
                log.debug(format('And: wrong transformation: {0!r}',
                                 original.wrapper))
                return original
        except Unsuitable:
            log.debug(format('And: not rewritten: {0}', original))
            return original
    
    def clone_transform(use, original, matcher, wrapper, 
                          _raw=False, _args=False):
        '''
        We can assume that wrapper is a transformation.  add joins into
        a sequence.
        '''
        assert isinstance(wrapper, TransformationWrapper)
        try:
            # this is the only place add is required
            (use, [regexp]) = RegexpContainer.to_regexps(use, [matcher], 
                                                         add_reqd=True)
            log.debug(format('Transform: cloning {0}', regexp))
            if use and len(wrapper.functions) > 1 \
                    and wrapper.functions[0] is add:
                # we have additional functions, so cannot take regexp higher,
                # but use is True, so return a new matcher.
                # hack to copy across other functions
                original.wrapper = \
                    TransformationWrapper().extend(wrapper.functions[1:])
                log.debug('Transform: OK (final)')
                # NEED TEST FOR THIS
                return single(alphabet, original, regexp, matcher_type) 
            elif len(wrapper.functions) == 1 and wrapper.functions[0] is add:
                # exactly what we wanted!  combine and continue
                log.debug('Transform: OK')
                return RegexpContainer.build(original, regexp, alphabet, 
                                             matcher_type, use, transform=False)
            elif not wrapper:
                # we're just forwarding the add_reqd from before here
                log.debug('Transform: empty, add required')
                return RegexpContainer(original, regexp, use, add_reqd=True)
            else:
                log.debug(format('Transform: wrong transformation: {0!r}',
                                 original.wrapper))
                return original
        except Unsuitable:
            log.debug(format('Transform: not rewritten: {0}', original))
            return original
        
    def clone_literal(use, original, text):
        '''
        Literal values are easy to transform.
        '''
        chars = [Character([(c, c)], alphabet) for c in text]
        regexp = Sequence(chars, alphabet)
        log.debug(format('Literal: cloned {0}', regexp))
        return RegexpContainer.build(original, regexp, alphabet, 
                                     matcher_type, use)
    
    def clone_dfs(use, original, first, start, stop, rest=None):
        '''
        We only convert DFS if start=0 or 1, stop=1 or None and first and 
        rest are both regexps.
        
        This forces use=True as it is likely that a regexp is a gain.
        '''
        assert not isinstance(original, Transformable)
        try:
            if start not in (0, 1) or stop not in (1, None):
                raise Unsuitable()
            (use, [first, rest]) = \
                    RegexpContainer.to_regexps(True, [first, rest])
            # we need to be careful here to get the depth first bit right
            if stop is None:
                regexp = Sequence([first, Repeat([rest], alphabet)], alphabet)
                if start == 0:
                    regexp = Choice([regexp, Empty(alphabet)], alphabet)
            else:
                regexp = first
                if start == 0:
                    regexp = Choice([regexp, Empty(alphabet)], alphabet)
            log.debug(format('DFS: cloned {0}', regexp))
            return RegexpContainer.build(original, regexp, alphabet, 
                                         matcher_type, use, 
                                         add_reqd=stop is None)
        except Unsuitable:
            log.debug(format('DFS: not rewritten: {0}', original))
            return original
        
    def clone_wrapper(use, original, *args, **kargs):
        factory = original.factory
        if factory in map_:
            log.debug(format('Found {0}', factory))
            return map_[factory](use, original, *args, **kargs)
        else:
            log.debug(format('No clone for {0}, {1}', factory, map_.keys()))
            return original
        
    map_ = matcher_map({Any: clone_any, 
                        Or: clone_or, 
                        And: clone_and,
                        Transform: clone_transform,
                        Literal: clone_literal,
                        DepthFirst: clone_dfs,
                        FunctionWrapper: clone_wrapper,
                        SequenceWrapper: clone_wrapper,
                        TrampolineWrapper: clone_wrapper,
                        TransformableTrampolineWrapper: clone_wrapper})
    
    def clone_(node, args, kargs):
        '''
        Do the cloning, dispatching by type to the methods above.
        '''
        original_args = [RegexpContainer.to_matcher(arg) for arg in args]
        original_kargs = dict((name, RegexpContainer.to_matcher(kargs[name]))
                              for name in kargs)
        original = old_clone(node, original_args, original_kargs)
        type_ = type(node)
        if type_ in map_:
            # pylint: disable-msg=W0142
            return map_[type_](use_from_start, original, *args, **kargs)
        else:
            return original

    return clone_