示例#1
0
 def clone_dfs(use,
               original,
               first,
               start,
               stop,
               rest=None,
               reduce=None,
               generator_manager_queue_len=None):
     '''
     This forces use=True as it is likely that a regexp is a gain.
     '''
     if stop is not None and start > stop:
         raise Unsuitable
     if reduce and not (isinstance(reduce, tuple) and len(reduce) == 2
                        and reduce[0] == [] and reduce[1] == __add__):
         raise Unsuitable
     if generator_manager_queue_len:
         # this should only be set when running
         raise Unsuitable
     add_reqd = stop is None or stop > 1
     wrapper = False
     if hasattr(original, 'wrapper') and original.wrapper:
         if original.wrapper.functions[0] is add:
             add_reqd = False
             wrapper = original.wrapper.functions[1:]
         else:
             raise Unsuitable
     rest = first if rest is None else rest
     (use, [first, rest]) = \
             RegexpContainer.to_regexps(True, [first, rest], have_add=None)
     seq = []
     if first != rest:
         seq.append(first.clone())
     while len(seq) < start:
         seq.append(rest.clone())
     addzero = len(seq) > start  # first was exceptional and start=0
     if stop:
         if stop > start:
             # use nested form to avoid inefficient nfa
             extras = Option(alphabet_, rest.clone())
             for _i in range(stop - start - 1):
                 extras = Option(alphabet_,
                                 Sequence(alphabet_, rest.clone(), extras))
             seq.append(extras)
     else:
         seq.append(Repeat(alphabet_, rest.clone()))
     regexp = Sequence(alphabet_, *seq)
     if addzero:
         regexp = Choice(alphabet_, regexp, Empty(alphabet_))
     log.debug(fmt('DFS: cloned {0}', regexp))
     return RegexpContainer.build(original,
                                  regexp,
                                  alphabet_,
                                  regexp_type,
                                  use,
                                  add_reqd=add_reqd,
                                  wrapper=wrapper)
示例#2
0
 def clone_dfs(use, original, first, start, stop, rest=None):
     '''
     We only convert DFS if start=0 or 1, stop=1 or None and first and 
     rest are both regexps.
     
     This forces use=True as it is likely that a regexp is a gain.
     '''
     assert not isinstance(original, Transformable)
     try:
         if start not in (0, 1) or stop not in (1, None):
             raise Unsuitable()
         (use, [first, rest]) = \
                 RegexpContainer.to_regexps(True, [first, rest])
         # we need to be careful here to get the depth first bit right
         if stop is None:
             regexp = Sequence([first, Repeat([rest], alphabet)], alphabet)
             if start == 0:
                 regexp = Choice([regexp, Empty(alphabet)], alphabet)
         else:
             regexp = first
             if start == 0:
                 regexp = Choice([regexp, Empty(alphabet)], alphabet)
         log.debug(format('DFS: cloned {0}', regexp))
         return RegexpContainer.build(original,
                                      regexp,
                                      alphabet,
                                      matcher_type,
                                      use,
                                      add_reqd=stop is None)
     except Unsuitable:
         log.debug(format('DFS: not rewritten: {0}', original))
         return original
示例#3
0
 def clone_literal(use, original, text):
     '''
     Literal values are easy to transform.
     '''
     chars = [Character([(c, c)], alphabet_) for c in text]
     regexp = Sequence(alphabet_, *chars)
     log.debug(fmt('Literal: cloned {0}', regexp))
     return RegexpContainer.build(original, regexp, alphabet_, regexp_type,
                                  use)
示例#4
0
 def clone_and(use, original, *matchers):
     '''
     We can convert an And only if all the sub-matchers have possible
     regular expressions, and even then we must tag the result unless
     an add transform is present.
     '''
     assert isinstance(original, Transformable)
     try:
         # since we're going to require add anyway, we're happy to take
         # other inputs, whether add is required or not.
         (use, regexps) = \
             RegexpContainer.to_regexps(use, matchers, add_reqd=None)
         # if we have regexp sub-expressions, join them
         regexp = Sequence(regexps, alphabet)
         log.debug(format('And: cloning {0}', regexp))
         if use and len(original.wrapper.functions) > 1 \
                 and original.wrapper.functions[0] is add:
             # we have additional functions, so cannot take regexp higher,
             # but use is True, so return a new matcher.
             # hack to copy across other functions
             original.wrapper = \
                     TransformationWrapper(original.wrapper.functions[1:])
             log.debug('And: OK (final)')
             # NEED TEST FOR THIS
             return single(alphabet, original, regexp, matcher_type)
         elif len(original.wrapper.functions) == 1 \
                 and original.wrapper.functions[0] is add:
             # OR JUST ONE?
             # lucky!  we just combine and continue
             log.debug('And: OK')
             return RegexpContainer.build(original,
                                          regexp,
                                          alphabet,
                                          matcher_type,
                                          use,
                                          transform=False)
         elif not original.wrapper:
             # regexp can't return multiple values, so hope that we have
             # an add
             log.debug('And: add required')
             return RegexpContainer.build(original,
                                          regexp,
                                          alphabet,
                                          matcher_type,
                                          use,
                                          add_reqd=True)
         else:
             log.debug(
                 format('And: wrong transformation: {0!r}',
                        original.wrapper))
             return original
     except Unsuitable:
         log.debug(format('And: not rewritten: {0}', original))
         return original
示例#5
0
 def clone_regexp(use, original, pattern, alphabet=None):
     '''
     Regexps values are also easy.
     '''
     try:
         if isinstance(pattern, basestring):
             pattern = Sequence(alphabet_, *alphabet_.parse(pattern))
     except TypeError:
         raise Unsuitable
     except Error:  # cannot parse regexp
         raise Unsuitable
     return RegexpContainer.build(original, pattern, alphabet_, regexp_type,
                                  use)
示例#6
0
 def clone_any(use, original, restrict=None):
     '''
     We can always convert Any() to a regular expression; the only question
     is whether we have an open range or not.
     '''
     if restrict is None:
         char = Character([(alphabet_.min, alphabet_.max)], alphabet_)
     else:
         char = Character(((char, char) for char in restrict), alphabet_)
     log.debug(fmt('Any: cloned {0}', char))
     regexp = Sequence(alphabet_, char)
     return RegexpContainer.build(original, regexp, alphabet_, regexp_type,
                                  use)
示例#7
0
    def clone_and(use, original, *matchers):
        '''
        We can convert an And only if all the sub-matchers have possible
        regular expressions, and even then we must tag the result unless
        an add transform is present.
        '''
        if hasattr(original, 'wrapper'):
            wrapper = original.wrapper.functions
        else:
            wrapper = None
        add_reqd = True
        if wrapper:
            if wrapper[0] is add:
                wrapper = wrapper[1:]
                add_reqd = False
            else:
                raise Unsuitable
        try:
            # combine all
            (use, regexps) = \
                RegexpContainer.to_regexps(use, matchers, have_add=None)
            # if we have regexp sub-expressions, join them
            regexp = Sequence(alphabet_, *regexps)
            log.debug(fmt('And: cloning {0}', regexp))
            return RegexpContainer.build(original,
                                         regexp,
                                         alphabet_,
                                         regexp_type,
                                         use,
                                         add_reqd=add_reqd,
                                         wrapper=wrapper)
        except Unsuitable:
            # combine contiguous matchers where possible
            if add_reqd:
                raise

            def unpack(matcher):
                original = RegexpContainer.to_matcher(matcher)
                try:
                    return (original,
                            RegexpContainer.to_regexps(use, [matcher],
                                                       have_add=None)[1][0])
                except Unsuitable:
                    return (original, None)

            output = []
            (regexps, originals) = ([], [])
            for (matcher, regexp) in [unpack(matcher) for matcher in matchers]:
                if regexp:
                    regexps.append(regexp)
                    originals.append(matcher)
                else:
                    if len(regexps) > 1:
                        # combine regexps
                        output.append(
                            regexp_type(Sequence(alphabet_, *regexps),
                                        alphabet_))
                    else:
                        output.extend(originals)
                    output.append(matcher)
                    (regexps, originals) = ([], [])
            if len(regexps) > 1:
                output.append(
                    regexp_type(Sequence(alphabet_, *regexps), alphabet_))
            else:
                output.extend(originals)
            merged = And(*output)
            return merged.compose(original.wrapper)
示例#8
0
 def sequence(self, x):
     '''
     Create a sequence.
     '''
     return Sequence(self.alphabet, *x)