Exemplo n.º 1
0
 def __iadd__(self, matcher):
     if self.matcher:
         raise ValueError('Delayed matcher already bound.')
     else:
         self.matcher = coerce_(matcher)
         self._match = matcher._match
         return self
Exemplo n.º 2
0
 def __iadd__(self, matcher):
     if self.matcher:
         raise ValueError('Delayed matcher already bound.')
     else:
         self.matcher = coerce_(matcher)
         self._match = matcher._match
         return self
Exemplo n.º 3
0
def Word(chars=NfaRegexp('[^%s]' % whitespace), body=None):
    '''
    Match a sequence of non-space characters, joining them together. 
     
    chars and body, if given as strings, define possible characters to use
    for the first and rest of the characters in the word, respectively.
    If body is not given, then chars is used for the entire word.
    They can also specify matchers, which typically should match only a
    single character.
    
    So ``Word(Upper(), Lower())`` would match names that being with an upper
    case letter, for example, while ``Word(AnyBut(Space()))`` (the default)
    matches any sequence of non-space characters. 
    '''
    chars = coerce_(chars, Any)
    body = chars if body is None else coerce_(body, Any)
    return Add(And(chars, Star(body)))
Exemplo n.º 4
0
def Word(chars=NfaRegexp('[^%s]' % whitespace), body=None):
    '''
    Match a sequence of non-space characters, joining them together. 
     
    chars and body, if given as strings, define possible characters to use
    for the first and rest of the characters in the word, respectively.
    If body is not given, then chars is used for the entire word.
    They can also specify matchers, which typically should match only a
    single character.
    
    So ``Word(Upper(), Lower())`` would match names that being with an upper
    case letter, for example, while ``Word(AnyBut(Space()))`` (the default)
    matches any sequence of non-space characters. 
    '''
    chars = coerce_(chars, Any)
    body = chars if body is None else coerce_(body, Any)
    return Add(And(chars, Star(body)))
Exemplo n.º 5
0
def AnyBut(exclude=None):
    '''
    Match any character except those specified (or, if a matcher is used as
    the exclude, if the matcher fails).
    
    The argument should be a list of tokens (or a string of suitable 
    characters) to exclude, or a matcher.  If omitted all tokens are accepted.
    '''
    return And(~Lookahead(coerce_(exclude, Any)), Any())
Exemplo n.º 6
0
 def __init__(self, matcher, negated=False):
     '''
     On success, no input is consumed.
     If negated, this will succeed if the matcher fails.  If the matcher is
     a string it is coerced to a literal match.
     '''
     super(Lookahead, self).__init__()
     self._arg(matcher=coerce_(matcher))
     self._karg(negated=negated)
Exemplo n.º 7
0
def AnyBut(exclude=None):
    '''
    Match any character except those specified (or, if a matcher is used as
    the exclude, if the matcher fails).
    
    The argument should be a list of tokens (or a string of suitable 
    characters) to exclude, or a matcher.  If omitted all tokens are accepted.
    '''
    return And(~Lookahead(coerce_(exclude, Any)), Any())
Exemplo n.º 8
0
 def __init__(self, matcher, negated=False):
     '''
     On success, no input is consumed.
     If negated, this will succeed if the matcher fails.  If the matcher is
     a string it is coerced to a literal match.
     '''
     super(Lookahead, self).__init__()
     self._arg(matcher=coerce_(matcher))
     self._karg(negated=negated)
Exemplo n.º 9
0
def ContinuedLineFactory(matcher):
    '''
    Create a replacement for ``Line()`` that can match multiple lines if they
    end in the given character/matcher.
    '''
    matcher = coerce_(matcher, lambda regexp: Token(regexp))
    restricted = RestrictTokensBy(matcher, LineEnd(), LineStart())
    def factory(matcher, indent=True):
        return restricted(Line(matcher, indent=indent))
    return factory
Exemplo n.º 10
0
 def __init__(self, matcher, function):
     super(Transform, self).__init__(function)
     self._arg(matcher=coerce_(matcher))
     # it's ok that this overwrites the same thing from Transformable
     # (Transformable cannot have an argument because it subclasses
     # OperatorMatcher, and passing in function as a constructor arg
     # is a nightmare).
     if not isinstance(function, TransformationWrapper):
         function = TransformationWrapper(function)
     self._arg(wrapper=function)
Exemplo n.º 11
0
 def __call__(self, content, complete=None):
     '''
     If complete is specified as True of False it overrides the value
     set in the constructor.  If True the content matcher must complete 
     match the Token contents.
     '''
     args, kargs = self._constructor_args()
     kargs['complete'] = self.complete if complete is None else complete
     kargs['content'] = coerce_(content)
     return type(self)(*args, **kargs)
Exemplo n.º 12
0
 def __init__(self, min_, max_, escape='\\', escaped=ILLEGAL,
              illegal=ILLEGAL, range=None,
              parser_factory=make_str_parser):
     from lepl.matchers.core import Any, Never
     super(StrAlphabet, self).__init__(min_, max_)
     self.__escape = escape
     self.__escaped = coerce_(escaped, Any)
     self.__illegal = illegal
     self.__range = range if range else Never()
     self._parser = parser_factory(self)
Exemplo n.º 13
0
def Repeat(matcher, start=0, stop=None, algorithm=DEPTH_FIRST, 
            separator=None, add_=False):
    '''
    This is called by the [] operator.  It repeats the given matcher between
    start and stop number of times (inclusive).  If ``add`` is true then the
    results are joined with `Add`. If ``separator`` is given then each
    repetition is separated by that matcher.
    '''
    first = coerce_(matcher)
    if separator is None:
        rest = first
    else:
        rest = And(coerce_(separator, Regexp), first)
    if start is None:
        start = 0
    assert_type('The start index for Repeat or [...]', start, int)
    assert_type('The stop index for Repeat or [...]', stop, int, none_ok=True)
    assert_type('The algorithm/increment for Repeat or [...]', algorithm, str)
    if start < 0:
        raise ValueError('Repeat or [...] cannot have a negative start.')
    if stop is not None and stop < start:
        raise ValueError('Repeat or [...] must have a stop '
                         'value greater than or equal to the start.')
    if 'dbgn'.find(algorithm) == -1:
        raise ValueError('Repeat or [...] must have a step (algorithm) '
                         'of d, b, g or n.')
    add_ = Add if add_ else Identity
    return {DEPTH_FIRST:
                add_(DepthFirst(first=first, start=start, 
                                stop=stop, rest=rest)),
            BREADTH_FIRST: 
                add_(BreadthFirst(first=first, start=start, 
                                  stop=stop, rest=rest)),
            GREEDY:        
                add_(OrderByResultCount(BreadthFirst(first=first, start=start, 
                                                     stop=stop, rest=rest))),
            NON_GREEDY:
                add_(OrderByResultCount(BreadthFirst(first=first, start=start, 
                                                     stop=stop, rest=rest),
                                       False))
            }[algorithm]
Exemplo n.º 14
0
def ContinuedLineFactory(matcher):
    '''
    Create a replacement for ``Line()`` that can match multiple lines if they
    end in the given character/matcher.
    '''
    matcher = coerce_(matcher, lambda regexp: Token(regexp))
    restricted = RestrictTokensBy(matcher, LineEnd(), LineStart())

    def factory(matcher, indent=True):
        return restricted(Line(matcher, indent=indent))

    return factory
Exemplo n.º 15
0
 def __init__(self,
              min_,
              max_,
              escape='\\',
              escaped=ILLEGAL,
              illegal=ILLEGAL,
              range=None,
              parser_factory=make_str_parser):
     from lepl.matchers.core import Any, Never
     super(StrAlphabet, self).__init__(min_, max_)
     self.__escape = escape
     self.__escaped = coerce_(escaped, Any)
     self.__illegal = illegal
     self.__range = range if range else Never()
     self._parser = parser_factory(self)
Exemplo n.º 16
0
def _ContinuedLineFactory(continuation, base):
    '''
    Return the base (line) matcher, modified so that it applies its contents 
    to a stream which continues past line breaks if the given token is present.
    '''
    continuation = coerce_(continuation, Token)
    
    def ContinuedLine(matcher):
        '''
        Like `base`, but continues over multiple lines if the continuation 
        token is found at the end of each line.
        '''
        multiple = ExcludeSequence(any_token, [continuation, Eol(), Indent()])
        return base(multiple(matcher))
    return ContinuedLine
Exemplo n.º 17
0
def _ContinuedLineFactory(continuation, base):
    '''
    Return the base (line) matcher, modified so that it applies its contents 
    to a stream which continues past line breaks if the given token is present.
    '''
    continuation = coerce_(continuation, Token)

    def ContinuedLine(matcher):
        '''
        Like `base`, but continues over multiple lines if the continuation 
        token is found at the end of each line.
        '''
        multiple = ExcludeSequence(any_token, [continuation, Eol(), Indent()])
        return base(multiple(matcher))

    return ContinuedLine
Exemplo n.º 18
0
 def __init__(self, separator):
     '''
     If the separator is a string it is coerced to `Regexp()`; if None
     then any previous defined separator is effectively removed.
     '''
     # Handle circular dependencies
     from lepl.matchers.core import Regexp
     from lepl.matchers.combine import And
     from lepl.matchers.support import coerce_
     if separator is None:
         and_ = And
         repeat = RepeatWrapper
     else:
         separator = coerce_(separator, Regexp)
         (and_, repeat) = self._replacements(separator)
     super(_BaseSeparator, self).__init__(and_=and_, repeat=repeat)
Exemplo n.º 19
0
 def __init__(self, separator):
     '''
     If the separator is a string it is coerced to `Regexp()`; if None
     then any previous defined separator is effectively removed.
     '''
     # Handle circular dependencies
     from lepl.matchers.core import Regexp
     from lepl.matchers.combine import And
     from lepl.matchers.support import coerce_
     if separator is None:
         and_ = And
         repeat = RepeatWrapper
     else:
         separator = coerce_(separator, Regexp)
         (and_, repeat) = self._replacements(separator)
     super(_BaseSeparator, self).__init__(and_=and_, repeat=repeat)
Exemplo n.º 20
0
def Identity(matcher):
    '''Functions identically to the matcher given as an argument.'''
    return coerce_(matcher)
Exemplo n.º 21
0
def Star(matcher):
    '''
    Match zero or more instances of a matcher (**[0:]**)
    '''
    return Repeat(coerce_(matcher))
Exemplo n.º 22
0
def Optional(matcher):
    '''
    Match zero or one instances of a matcher (**[0:1]**).
    '''
    return Repeat(coerce_(matcher), stop=1)
Exemplo n.º 23
0
def Repeat(matcher, start=0, stop=None, limit=None, algorithm=DEPTH_FIRST, 
            separator=None, add_=False, reduce=None):
    '''
    This is called by the [] operator.  It repeats the given matcher between
    `start` and `stop` number of times (inclusive).
    
    If `limit` is given it is an upper limit on the number of different
    results returned on backtracking.
    
    `algorithm` selects the repeat algorithm to use.
    
    If `separator` is given then each repetition is separated by that matcher.
    
    If `add_` is true then the results are joined with `Add` (once all
    results are obtained).
    
    If `reduce` is given it should be a pair (zero, join) where
    `join(results, next)` is used to accumulate results and `zero` is the
    initial value of `results`.  This is implemented via `Reduce`.

    `reduce` and `add_` cannot be given together.
    '''
    first = coerce_(matcher)
    if separator is None:
        rest = first
    else:
        rest = And(coerce_(separator, Regexp), first)
    if start is None:
        start = 0
    # allow duck typing (mutable values - IntVar etc)
#    assert_type('The start index for Repeat or [...]', start, int)
#    assert_type('The stop index for Repeat or [...]', stop, int, none_ok=True)
#    assert_type('The limit value (step index) for Repeat or [...]', limit, int, none_ok=True)
#    assert_type('The algorithm (step index) for Repeat or [...]', algorithm, str)
#    if start < 0:
#        raise ValueError('Repeat or [...] cannot have a negative start.')
#    if stop is not None and stop < start:
#        raise ValueError('Repeat or [...] must have a stop '
#                         'value greater than or equal to the start.')
#    if 'dbgn'.find(algorithm) == -1:
#        raise ValueError('Repeat or [...] must have a step (algorithm) '
#                         'of d, b, g or n.')
    if add_ and reduce:
        raise ValueError('Repeat cannot apply both add_ and reduce')
    elif add_:
        process = Add
    elif reduce:
        process = lambda r: Reduce(r, reduce[0], reduce[1])
    else:
        process = Identity
    matcher = {DEPTH_FIRST:
                process(DepthFirst(first=first, start=start, stop=stop, rest=rest)),
               BREADTH_FIRST: 
                process(BreadthFirst(first=first, start=start, stop=stop, rest=rest)),
               GREEDY:
                process(OrderByResultCount(
                        BreadthFirst(first=first, start=start, stop=stop, rest=rest))),
               NON_GREEDY:
                process(OrderByResultCount(
                        BreadthFirst(first=first, start=start, stop=stop, rest=rest),
                        False))
            }[algorithm]
    if limit is not None:
        matcher = Limit(matcher, count=limit)
    return matcher
Exemplo n.º 24
0
def Plus(matcher):
    '''
    Match one or more instances of a matcher (**[1:]**)
    ''' 
    return Repeat(coerce_(matcher), start=1)
Exemplo n.º 25
0
def Optional(matcher):
    '''
    Match zero or one instances of a matcher (**[0:1]**).
    '''
    return Repeat(coerce_(matcher), stop=1)
Exemplo n.º 26
0
def Repeat(matcher,
           start=0,
           stop=None,
           limit=None,
           algorithm=DEPTH_FIRST,
           separator=None,
           add_=False,
           reduce=None):
    '''
    This is called by the [] operator.  It repeats the given matcher between
    `start` and `stop` number of times (inclusive).
    
    If `limit` is given it is an upper limit on the number of different
    results returned on backtracking.
    
    `algorithm` selects the repeat algorithm to use.
    
    If `separator` is given then each repetition is separated by that matcher.
    
    If `add_` is true then the results are joined with `Add` (once all
    results are obtained).
    
    If `reduce` is given it should be a pair (zero, join) where
    `join(results, next)` is used to accumulate results and `zero` is the
    initial value of `results`.  This is implemented via `Reduce`.

    `reduce` and `add_` cannot be given together.
    '''
    first = coerce_(matcher)
    if separator is None:
        rest = first
    else:
        rest = And(coerce_(separator, Regexp), first)
    if start is None:
        start = 0
    # allow duck typing (mutable values - IntVar etc)


#    assert_type('The start index for Repeat or [...]', start, int)
#    assert_type('The stop index for Repeat or [...]', stop, int, none_ok=True)
#    assert_type('The limit value (step index) for Repeat or [...]', limit, int, none_ok=True)
#    assert_type('The algorithm (step index) for Repeat or [...]', algorithm, str)
#    if start < 0:
#        raise ValueError('Repeat or [...] cannot have a negative start.')
#    if stop is not None and stop < start:
#        raise ValueError('Repeat or [...] must have a stop '
#                         'value greater than or equal to the start.')
#    if 'dbgn'.find(algorithm) == -1:
#        raise ValueError('Repeat or [...] must have a step (algorithm) '
#                         'of d, b, g or n.')
    if add_ and reduce:
        raise ValueError('Repeat cannot apply both add_ and reduce')
    elif add_:
        process = Add
    elif reduce:
        process = lambda r: Reduce(r, reduce[0], reduce[1])
    else:
        process = Identity
    matcher = {
        DEPTH_FIRST:
        process(DepthFirst(first=first, start=start, stop=stop, rest=rest)),
        BREADTH_FIRST:
        process(BreadthFirst(first=first, start=start, stop=stop, rest=rest)),
        GREEDY:
        process(
            OrderByResultCount(
                BreadthFirst(first=first, start=start, stop=stop, rest=rest))),
        NON_GREEDY:
        process(
            OrderByResultCount(
                BreadthFirst(first=first, start=start, stop=stop, rest=rest),
                False))
    }[algorithm]
    if limit is not None:
        matcher = Limit(matcher, count=limit)
    return matcher
Exemplo n.º 27
0
def Identity(matcher):
    '''Functions identically to the matcher given as an argument.'''
    return coerce_(matcher)
Exemplo n.º 28
0
def Plus(matcher):
    '''
    Match one or more instances of a matcher (**[1:]**)
    '''
    return Repeat(coerce_(matcher), start=1)
Exemplo n.º 29
0
def Star(matcher):
    '''
    Match zero or more instances of a matcher (**[0:]**)
    '''
    return Repeat(coerce_(matcher))