def clone_dfs(use, original, first, start, stop, rest=None, reduce=None, generator_manager_queue_len=None): ''' This forces use=True as it is likely that a regexp is a gain. ''' if stop is not None and start > stop: raise Unsuitable if reduce and not (isinstance(reduce, tuple) and len(reduce) == 2 and reduce[0] == [] and reduce[1] == __add__): raise Unsuitable if generator_manager_queue_len: # this should only be set when running raise Unsuitable add_reqd = stop is None or stop > 1 wrapper = False if hasattr(original, 'wrapper') and original.wrapper: if original.wrapper.functions[0] is add: add_reqd = False wrapper = original.wrapper.functions[1:] else: raise Unsuitable rest = first if rest is None else rest (use, [first, rest]) = \ RegexpContainer.to_regexps(True, [first, rest], have_add=None) seq = [] if first != rest: seq.append(first.clone()) while len(seq) < start: seq.append(rest.clone()) addzero = len(seq) > start # first was exceptional and start=0 if stop: if stop > start: # use nested form to avoid inefficient nfa extras = Option(alphabet_, rest.clone()) for _i in range(stop - start - 1): extras = Option(alphabet_, Sequence(alphabet_, rest.clone(), extras)) seq.append(extras) else: seq.append(Repeat(alphabet_, rest.clone())) regexp = Sequence(alphabet_, *seq) if addzero: regexp = Choice(alphabet_, regexp, Empty(alphabet_)) log.debug(fmt('DFS: cloned {0}', regexp)) return RegexpContainer.build(original, regexp, alphabet_, regexp_type, use, add_reqd=add_reqd, wrapper=wrapper)
def clone_dfs(use, original, first, start, stop, rest=None): ''' We only convert DFS if start=0 or 1, stop=1 or None and first and rest are both regexps. This forces use=True as it is likely that a regexp is a gain. ''' assert not isinstance(original, Transformable) try: if start not in (0, 1) or stop not in (1, None): raise Unsuitable() (use, [first, rest]) = \ RegexpContainer.to_regexps(True, [first, rest]) # we need to be careful here to get the depth first bit right if stop is None: regexp = Sequence([first, Repeat([rest], alphabet)], alphabet) if start == 0: regexp = Choice([regexp, Empty(alphabet)], alphabet) else: regexp = first if start == 0: regexp = Choice([regexp, Empty(alphabet)], alphabet) log.debug(format('DFS: cloned {0}', regexp)) return RegexpContainer.build(original, regexp, alphabet, matcher_type, use, add_reqd=stop is None) except Unsuitable: log.debug(format('DFS: not rewritten: {0}', original)) return original
def clone_literal(use, original, text): ''' Literal values are easy to transform. ''' chars = [Character([(c, c)], alphabet_) for c in text] regexp = Sequence(alphabet_, *chars) log.debug(fmt('Literal: cloned {0}', regexp)) return RegexpContainer.build(original, regexp, alphabet_, regexp_type, use)
def clone_and(use, original, *matchers): ''' We can convert an And only if all the sub-matchers have possible regular expressions, and even then we must tag the result unless an add transform is present. ''' assert isinstance(original, Transformable) try: # since we're going to require add anyway, we're happy to take # other inputs, whether add is required or not. (use, regexps) = \ RegexpContainer.to_regexps(use, matchers, add_reqd=None) # if we have regexp sub-expressions, join them regexp = Sequence(regexps, alphabet) log.debug(format('And: cloning {0}', regexp)) if use and len(original.wrapper.functions) > 1 \ and original.wrapper.functions[0] is add: # we have additional functions, so cannot take regexp higher, # but use is True, so return a new matcher. # hack to copy across other functions original.wrapper = \ TransformationWrapper(original.wrapper.functions[1:]) log.debug('And: OK (final)') # NEED TEST FOR THIS return single(alphabet, original, regexp, matcher_type) elif len(original.wrapper.functions) == 1 \ and original.wrapper.functions[0] is add: # OR JUST ONE? # lucky! we just combine and continue log.debug('And: OK') return RegexpContainer.build(original, regexp, alphabet, matcher_type, use, transform=False) elif not original.wrapper: # regexp can't return multiple values, so hope that we have # an add log.debug('And: add required') return RegexpContainer.build(original, regexp, alphabet, matcher_type, use, add_reqd=True) else: log.debug( format('And: wrong transformation: {0!r}', original.wrapper)) return original except Unsuitable: log.debug(format('And: not rewritten: {0}', original)) return original
def clone_regexp(use, original, pattern, alphabet=None): ''' Regexps values are also easy. ''' try: if isinstance(pattern, basestring): pattern = Sequence(alphabet_, *alphabet_.parse(pattern)) except TypeError: raise Unsuitable except Error: # cannot parse regexp raise Unsuitable return RegexpContainer.build(original, pattern, alphabet_, regexp_type, use)
def clone_any(use, original, restrict=None): ''' We can always convert Any() to a regular expression; the only question is whether we have an open range or not. ''' if restrict is None: char = Character([(alphabet_.min, alphabet_.max)], alphabet_) else: char = Character(((char, char) for char in restrict), alphabet_) log.debug(fmt('Any: cloned {0}', char)) regexp = Sequence(alphabet_, char) return RegexpContainer.build(original, regexp, alphabet_, regexp_type, use)
def clone_and(use, original, *matchers): ''' We can convert an And only if all the sub-matchers have possible regular expressions, and even then we must tag the result unless an add transform is present. ''' if hasattr(original, 'wrapper'): wrapper = original.wrapper.functions else: wrapper = None add_reqd = True if wrapper: if wrapper[0] is add: wrapper = wrapper[1:] add_reqd = False else: raise Unsuitable try: # combine all (use, regexps) = \ RegexpContainer.to_regexps(use, matchers, have_add=None) # if we have regexp sub-expressions, join them regexp = Sequence(alphabet_, *regexps) log.debug(fmt('And: cloning {0}', regexp)) return RegexpContainer.build(original, regexp, alphabet_, regexp_type, use, add_reqd=add_reqd, wrapper=wrapper) except Unsuitable: # combine contiguous matchers where possible if add_reqd: raise def unpack(matcher): original = RegexpContainer.to_matcher(matcher) try: return (original, RegexpContainer.to_regexps(use, [matcher], have_add=None)[1][0]) except Unsuitable: return (original, None) output = [] (regexps, originals) = ([], []) for (matcher, regexp) in [unpack(matcher) for matcher in matchers]: if regexp: regexps.append(regexp) originals.append(matcher) else: if len(regexps) > 1: # combine regexps output.append( regexp_type(Sequence(alphabet_, *regexps), alphabet_)) else: output.extend(originals) output.append(matcher) (regexps, originals) = ([], []) if len(regexps) > 1: output.append( regexp_type(Sequence(alphabet_, *regexps), alphabet_)) else: output.extend(originals) merged = And(*output) return merged.compose(original.wrapper)
def sequence(self, x): ''' Create a sequence. ''' return Sequence(self.alphabet, *x)