def __iadd__(self, matcher): if self.matcher: raise ValueError('Delayed matcher already bound.') else: self.matcher = coerce_(matcher) self._match = matcher._match return self
def Word(chars=NfaRegexp('[^%s]' % whitespace), body=None): ''' Match a sequence of non-space characters, joining them together. chars and body, if given as strings, define possible characters to use for the first and rest of the characters in the word, respectively. If body is not given, then chars is used for the entire word. They can also specify matchers, which typically should match only a single character. So ``Word(Upper(), Lower())`` would match names that being with an upper case letter, for example, while ``Word(AnyBut(Space()))`` (the default) matches any sequence of non-space characters. ''' chars = coerce_(chars, Any) body = chars if body is None else coerce_(body, Any) return Add(And(chars, Star(body)))
def AnyBut(exclude=None): ''' Match any character except those specified (or, if a matcher is used as the exclude, if the matcher fails). The argument should be a list of tokens (or a string of suitable characters) to exclude, or a matcher. If omitted all tokens are accepted. ''' return And(~Lookahead(coerce_(exclude, Any)), Any())
def __init__(self, matcher, negated=False): ''' On success, no input is consumed. If negated, this will succeed if the matcher fails. If the matcher is a string it is coerced to a literal match. ''' super(Lookahead, self).__init__() self._arg(matcher=coerce_(matcher)) self._karg(negated=negated)
def ContinuedLineFactory(matcher): ''' Create a replacement for ``Line()`` that can match multiple lines if they end in the given character/matcher. ''' matcher = coerce_(matcher, lambda regexp: Token(regexp)) restricted = RestrictTokensBy(matcher, LineEnd(), LineStart()) def factory(matcher, indent=True): return restricted(Line(matcher, indent=indent)) return factory
def __init__(self, matcher, function): super(Transform, self).__init__(function) self._arg(matcher=coerce_(matcher)) # it's ok that this overwrites the same thing from Transformable # (Transformable cannot have an argument because it subclasses # OperatorMatcher, and passing in function as a constructor arg # is a nightmare). if not isinstance(function, TransformationWrapper): function = TransformationWrapper(function) self._arg(wrapper=function)
def __call__(self, content, complete=None): ''' If complete is specified as True of False it overrides the value set in the constructor. If True the content matcher must complete match the Token contents. ''' args, kargs = self._constructor_args() kargs['complete'] = self.complete if complete is None else complete kargs['content'] = coerce_(content) return type(self)(*args, **kargs)
def __init__(self, min_, max_, escape='\\', escaped=ILLEGAL, illegal=ILLEGAL, range=None, parser_factory=make_str_parser): from lepl.matchers.core import Any, Never super(StrAlphabet, self).__init__(min_, max_) self.__escape = escape self.__escaped = coerce_(escaped, Any) self.__illegal = illegal self.__range = range if range else Never() self._parser = parser_factory(self)
def Repeat(matcher, start=0, stop=None, algorithm=DEPTH_FIRST, separator=None, add_=False): ''' This is called by the [] operator. It repeats the given matcher between start and stop number of times (inclusive). If ``add`` is true then the results are joined with `Add`. If ``separator`` is given then each repetition is separated by that matcher. ''' first = coerce_(matcher) if separator is None: rest = first else: rest = And(coerce_(separator, Regexp), first) if start is None: start = 0 assert_type('The start index for Repeat or [...]', start, int) assert_type('The stop index for Repeat or [...]', stop, int, none_ok=True) assert_type('The algorithm/increment for Repeat or [...]', algorithm, str) if start < 0: raise ValueError('Repeat or [...] cannot have a negative start.') if stop is not None and stop < start: raise ValueError('Repeat or [...] must have a stop ' 'value greater than or equal to the start.') if 'dbgn'.find(algorithm) == -1: raise ValueError('Repeat or [...] must have a step (algorithm) ' 'of d, b, g or n.') add_ = Add if add_ else Identity return {DEPTH_FIRST: add_(DepthFirst(first=first, start=start, stop=stop, rest=rest)), BREADTH_FIRST: add_(BreadthFirst(first=first, start=start, stop=stop, rest=rest)), GREEDY: add_(OrderByResultCount(BreadthFirst(first=first, start=start, stop=stop, rest=rest))), NON_GREEDY: add_(OrderByResultCount(BreadthFirst(first=first, start=start, stop=stop, rest=rest), False)) }[algorithm]
def _ContinuedLineFactory(continuation, base): ''' Return the base (line) matcher, modified so that it applies its contents to a stream which continues past line breaks if the given token is present. ''' continuation = coerce_(continuation, Token) def ContinuedLine(matcher): ''' Like `base`, but continues over multiple lines if the continuation token is found at the end of each line. ''' multiple = ExcludeSequence(any_token, [continuation, Eol(), Indent()]) return base(multiple(matcher)) return ContinuedLine
def __init__(self, separator): ''' If the separator is a string it is coerced to `Regexp()`; if None then any previous defined separator is effectively removed. ''' # Handle circular dependencies from lepl.matchers.core import Regexp from lepl.matchers.combine import And from lepl.matchers.support import coerce_ if separator is None: and_ = And repeat = RepeatWrapper else: separator = coerce_(separator, Regexp) (and_, repeat) = self._replacements(separator) super(_BaseSeparator, self).__init__(and_=and_, repeat=repeat)
def Identity(matcher): '''Functions identically to the matcher given as an argument.''' return coerce_(matcher)
def Star(matcher): ''' Match zero or more instances of a matcher (**[0:]**) ''' return Repeat(coerce_(matcher))
def Optional(matcher): ''' Match zero or one instances of a matcher (**[0:1]**). ''' return Repeat(coerce_(matcher), stop=1)
def Repeat(matcher, start=0, stop=None, limit=None, algorithm=DEPTH_FIRST, separator=None, add_=False, reduce=None): ''' This is called by the [] operator. It repeats the given matcher between `start` and `stop` number of times (inclusive). If `limit` is given it is an upper limit on the number of different results returned on backtracking. `algorithm` selects the repeat algorithm to use. If `separator` is given then each repetition is separated by that matcher. If `add_` is true then the results are joined with `Add` (once all results are obtained). If `reduce` is given it should be a pair (zero, join) where `join(results, next)` is used to accumulate results and `zero` is the initial value of `results`. This is implemented via `Reduce`. `reduce` and `add_` cannot be given together. ''' first = coerce_(matcher) if separator is None: rest = first else: rest = And(coerce_(separator, Regexp), first) if start is None: start = 0 # allow duck typing (mutable values - IntVar etc) # assert_type('The start index for Repeat or [...]', start, int) # assert_type('The stop index for Repeat or [...]', stop, int, none_ok=True) # assert_type('The limit value (step index) for Repeat or [...]', limit, int, none_ok=True) # assert_type('The algorithm (step index) for Repeat or [...]', algorithm, str) # if start < 0: # raise ValueError('Repeat or [...] cannot have a negative start.') # if stop is not None and stop < start: # raise ValueError('Repeat or [...] must have a stop ' # 'value greater than or equal to the start.') # if 'dbgn'.find(algorithm) == -1: # raise ValueError('Repeat or [...] must have a step (algorithm) ' # 'of d, b, g or n.') if add_ and reduce: raise ValueError('Repeat cannot apply both add_ and reduce') elif add_: process = Add elif reduce: process = lambda r: Reduce(r, reduce[0], reduce[1]) else: process = Identity matcher = {DEPTH_FIRST: process(DepthFirst(first=first, start=start, stop=stop, rest=rest)), BREADTH_FIRST: process(BreadthFirst(first=first, start=start, stop=stop, rest=rest)), GREEDY: process(OrderByResultCount( BreadthFirst(first=first, start=start, stop=stop, rest=rest))), NON_GREEDY: process(OrderByResultCount( BreadthFirst(first=first, start=start, stop=stop, rest=rest), False)) }[algorithm] if limit is not None: matcher = Limit(matcher, count=limit) return matcher
def Plus(matcher): ''' Match one or more instances of a matcher (**[1:]**) ''' return Repeat(coerce_(matcher), start=1)
def Repeat(matcher, start=0, stop=None, limit=None, algorithm=DEPTH_FIRST, separator=None, add_=False, reduce=None): ''' This is called by the [] operator. It repeats the given matcher between `start` and `stop` number of times (inclusive). If `limit` is given it is an upper limit on the number of different results returned on backtracking. `algorithm` selects the repeat algorithm to use. If `separator` is given then each repetition is separated by that matcher. If `add_` is true then the results are joined with `Add` (once all results are obtained). If `reduce` is given it should be a pair (zero, join) where `join(results, next)` is used to accumulate results and `zero` is the initial value of `results`. This is implemented via `Reduce`. `reduce` and `add_` cannot be given together. ''' first = coerce_(matcher) if separator is None: rest = first else: rest = And(coerce_(separator, Regexp), first) if start is None: start = 0 # allow duck typing (mutable values - IntVar etc) # assert_type('The start index for Repeat or [...]', start, int) # assert_type('The stop index for Repeat or [...]', stop, int, none_ok=True) # assert_type('The limit value (step index) for Repeat or [...]', limit, int, none_ok=True) # assert_type('The algorithm (step index) for Repeat or [...]', algorithm, str) # if start < 0: # raise ValueError('Repeat or [...] cannot have a negative start.') # if stop is not None and stop < start: # raise ValueError('Repeat or [...] must have a stop ' # 'value greater than or equal to the start.') # if 'dbgn'.find(algorithm) == -1: # raise ValueError('Repeat or [...] must have a step (algorithm) ' # 'of d, b, g or n.') if add_ and reduce: raise ValueError('Repeat cannot apply both add_ and reduce') elif add_: process = Add elif reduce: process = lambda r: Reduce(r, reduce[0], reduce[1]) else: process = Identity matcher = { DEPTH_FIRST: process(DepthFirst(first=first, start=start, stop=stop, rest=rest)), BREADTH_FIRST: process(BreadthFirst(first=first, start=start, stop=stop, rest=rest)), GREEDY: process( OrderByResultCount( BreadthFirst(first=first, start=start, stop=stop, rest=rest))), NON_GREEDY: process( OrderByResultCount( BreadthFirst(first=first, start=start, stop=stop, rest=rest), False)) }[algorithm] if limit is not None: matcher = Limit(matcher, count=limit) return matcher