Пример #1
0
 def run_test(self, name, text, parse, match2, match3, error, 
              config=lambda x: None, **kargs):
     matcher = Any()[:, ...]
     config(matcher)
     parser = getattr(matcher, 'parse' + name)
     result = str(parser(text, **kargs))
     assert_str(result, parse)
     matcher = Any()[2, ...]
     matcher.config.no_full_first_match()
     config(matcher)
     parser = getattr(matcher, 'match' + name)
     result = str(list(parser(text, **kargs)))
     assert_str(result, match2)
     matcher = Any()[3, ...]
     matcher.config.no_full_first_match()
     config(matcher)
     parser = getattr(matcher, 'match' + name)
     result = str(list(parser(text, **kargs)))
     assert_str(result, match3)
     matcher = Any()
     config(matcher)
     parser = getattr(matcher, 'parse' + name)
     try:
         parser(text, **kargs)
     except FullFirstMatchException as e:
         assert_str(e, error)
Пример #2
0
 def test_complex(self):
     '''
     This test requires evaluation of sub-matchers via trampolining; if
     it fails then there may be an issue with generator_matcher.
     '''
     parser = Consumer(Any() & Any('b')).get_parse()
     result = parser('ab')
     assert ['a', 'b'] == result, result
Пример #3
0
 def test_post_condition(self):
     self.assert_fail('abc', PostCondition(Any()[3, ...], lambda x: False))
     self.assert_fail(
         'abc', PostCondition(Any()[3, ...], lambda x: len(x[0]) == 2))
     self.assert_literal('abc', PostCondition(Any()[3, ...],
                                              lambda x: True))
     self.assert_literal(
         'abc', PostCondition(Any()[3, ...], lambda x: len(x[0]) == 3))
Пример #4
0
def UnsignedReal(decimal='.'):
    '''
    Match a sequence of digits that may include a decimal point.  This
    will match both integer and float values.
    '''
    return Or(
        Join(Optional(UnsignedInteger()), Any(decimal), UnsignedInteger()),
        Join(UnsignedInteger(), Optional(Any(decimal))))
Пример #5
0
def UnsignedFloat(decimal='.'):
    '''
    Match a sequence of digits that must include a decimal point.  This
    will match real values that are not integers.
    '''
    return Or(
        Join(Optional(UnsignedInteger()), Any(decimal), UnsignedInteger()),
        Join(UnsignedInteger(), Any(decimal)))
Пример #6
0
 def assert_separator(self, stream, start, stop, algorithm, target):
     matcher = Repeat(Any('abc'),
                      start,
                      stop,
                      algorithm=algorithm,
                      separator=Any(','))
     matcher.config.no_full_first_match()
     result = [''.join(l) for (l, _s) in matcher.match_string(stream)]
     assert target == result, result
Пример #7
0
 def test_exclude_sequence(self):
     #basicConfig(level=DEBUG)
     stream = 'ababcdababcabcdbcd'
     matcher = ExcludeSequence(eq, 'abc')
     try:
         matcher(Any()[:, ...]).parse_null(stream)
         assert False, 'expected error'
     except FilterException as error:
         assert str(error) == 'Can only filter LocationStream instances.'
     result = matcher(Any()[:, ...]).parse_string(stream)
     assert result == ['abdabdbcd'], result
Пример #8
0
    def test_table(self):
        #basicConfig(level=DEBUG)
        self.assert_direct(
'''0123456789
abcdefghij
''', 
                           Columns(((0,3), Any()[3:,...]),
                                   ((0,4), Any()[4:,...]),
                                   ((5,8), Any()[3:,...]))[2],
                           [['012', '0123', '567',
                             'abc', 'abcd', 'fgh']])
Пример #9
0
def SkipString(quote='"', escape='\\', ignore='\n', empty='', join=__add__):
    '''
    Like `String`, matching across multiple lines, but will silently 
    drop newlines.
    '''
    q = Literal(quote)
    content = AnyBut(Or(q, Any(ignore)))
    if escape:
        content = Or(content, And(Drop(escape), q))
    content = Or(content, Drop(Any(ignore)))
    content = Repeat(content, reduce=(empty, join))
    return And(Drop(q), content, Drop(q))
Пример #10
0
 def test_add(self):
     #basicConfig(level=DEBUG)
     self.assert_list(['1','2'], Any() + Any(), [['12']], 
                      sub_list=False, join=''.join)
     self.assert_list(['1','2','3'], Any() + Any(), [['12']], 
                      sub_list=False, join=''.join)
     self.assert_list(['1','2','3'], Any() + Any() + Any(), [['123']], 
                      sub_list=False, join=''.join)
     self.assert_list(['1'], Any() + Any(), [])
Пример #11
0
def UnsignedEFloat(decimal='.', exponent='eE'):
    '''
    As `UnsignedEReal`, but must contain a decimal or exponent.  This
    will match real values that are not integers.
    '''
    return Or(Join(UnsignedReal(decimal), Any(exponent), SignedInteger()),
              UnsignedFloat(decimal))
Пример #12
0
def UnsignedEReal(decimal='.', exponent='eE'):
    '''
    Match an `UnsignedReal` followed by an optional exponent 
    (e+02 etc).  This will match both integer and float values.
    '''
    return Join(UnsignedReal(decimal),
                Optional(And(Any(exponent), SignedInteger())))
Пример #13
0
    def __init__(self):
        from lepl.matchers.core import Any
        from lepl.matchers.combine import Or
        max_ = chr(maxunicode)

        def mkhex(char, n):
            from lepl.matchers.derived import Drop
            return Drop(Any(char)) + Any('0123456789abcdefABCDEF')[n,...] >> \
                        (lambda x: chr(int(x, 16)))

        def mkchr(char, range, invert=False):
            from lepl.matchers.core import Literal
            from lepl.matchers.derived import Map
            from lepl.regexp.core import Character
            intervals = lmap(lambda x: (x, x), range)
            if invert:
                # this delays call to invert until after creation of self
                func = lambda _: Character(self.invert(intervals), self)
            else:
                func = lambda _: Character(intervals, self)
            return Map(Literal(char), func)

        range = Or(mkchr('s', _WHITESPACE), mkchr('S',
                                                  _WHITESPACE,
                                                  invert=True))
        escaped = Any(ILLEGAL) | mkhex('x', 2) | mkhex('u', 4) | mkhex('U', 8)
        super(UnicodeAlphabet, self).__init__(chr(0),
                                              max_,
                                              escaped=escaped,
                                              range=range)
Пример #14
0
 def test_depth(self):
     matcher = Any()[:,...]
     matcher.config.clear()
     matcher = matcher.get_match_string()
     #print(repr(matcher.matcher))
     results = [m for (m, _s) in matcher('abc')]
     assert results == [['abc'], ['ab'], ['a'], []], results
Пример #15
0
def SignedEFloat(decimal='.', exponent='eE'):
    '''
    Match a `SignedFloat` followed by an optional exponent 
    (e+02 etc).
    '''
    return Join(SignedFloat(decimal), 
                Optional(Join(Any(exponent), SignedInteger())))
Пример #16
0
 def test_slash(self):
     ab = Any('ab')
     self.assert_direct('ab', ab / ab, [['a', 'b']])
     self.assert_direct('a b', ab / ab, [['a', ' ', 'b']])
     self.assert_direct('a  b', ab / ab, [['a', '  ', 'b']])
     self.assert_direct('ab', ab // ab, [])
     self.assert_direct('a b', ab // ab, [['a', ' ', 'b']])
     self.assert_direct('a  b', ab // ab, [['a', '  ', 'b']])
Пример #17
0
 def test_double(self):
     matcher = (Any() > append('x')) > append('y')
     matcher.config.clear().compose_transforms()
     parser = matcher.get_parse()
     result = parser('a')[0]
     assert result == 'axy', result
     assert isinstance(parser.matcher, TransformableWrapper)
     assert len(parser.matcher.wrapper.functions) == 2
Пример #18
0
 def test_loop(self):
     matcher = Delayed()
     matcher += (Any() | matcher) > append('x')
     matcher.config.clear().compose_transforms()
     parser = matcher.get_parse()
     result = parser('a')[0]
     assert result == 'ax', result
     assert isinstance(parser.matcher, Delayed)
Пример #19
0
 def test_liberal(self):
     matcher = Delayed()
     matcher += matcher | Any()
     assert isinstance(matcher.matcher.matchers[0], Delayed)
     matcher.config.clear().optimize_or(False)
     matcher.get_parse_string()
     # TODO - better test
     assert isinstance(matcher.matcher.matchers[0], 
                       TransformableWrapper)
Пример #20
0
def AnyBut(exclude=None):
    '''
    Match any character except those specified (or, if a matcher is used as
    the exclude, if the matcher fails).
    
    The argument should be a list of tokens (or a string of suitable 
    characters) to exclude, or a matcher.  If omitted all tokens are accepted.
    '''
    return And(~Lookahead(coerce_(exclude, Any)), Any())
Пример #21
0
 def test_all(self):
     first = Join(UnsignedFloat(), Any('eE'), SignedInteger())
     second = UnsignedFloat()
     all = Or(first, second)
     all.config.default()  # wrong order
     #all.config.compile_to_dfa() # gives 1.e3 only
     #all.config.compile_to_nfa() # wrong order
     #all.config.no_compile_to_regexp() # ok
     #all.config.clear() # ok
     self.assert_direct('1.e3', all, [['1.e3'], ['1.']])
Пример #22
0
def SingleLineString(quote='"', escape='\\', exclude='\n'):
    '''
    Like `String`,  but will not match across multiple lines.
    '''
    q = Literal(quote)
    content = AnyBut(Or(q, Any(exclude)))
    if escape:
        content = Or(content, And(Drop(escape), q))
    content = Repeat(content, add_=True)
    return And(Drop(q), content, Drop(q))
Пример #23
0
 def test_separator_mixin(self):
     #basicConfig(level=DEBUG)
     abc = Any('abc')
     self.assert_separator_mixin(abc[1:1:'d', ','], 'a', ['a'])
     self.assert_separator_mixin(abc[1:1:'b', ','], 'a', ['a'])
     self.assert_separator_mixin(abc[1:2:'d', ','], 'a,b', ['a,b', 'a'])
     self.assert_separator_mixin(abc[1:2:'b', ','], 'a,b', ['a', 'a,b'])
     self.assert_separator_mixin(abc[2:3:'d', ','], 'a,b,c,a',
                                 ['a,b,c', 'a,b'])
     self.assert_separator_mixin(abc[2:3:'b', ','], 'a,b,c,a',
                                 ['a,b', 'a,b,c'])
Пример #24
0
    def test_exclude(self):
        #basicConfig(level=DEBUG)
        def vowel(x):
            return x in 'aeiou'

        def parser(matcher):
            matcher.config.no_full_first_match()
            return matcher.get_match_string()

        stream1 = 'abcdef\nghijklm\n'
        (match, _stream) = next(
            parser(Exclude(vowel)(Any()[:]))('abcdef\nghijklm\n'))
        assert match[0:2] == ['b', 'c'], match[0:2]
        (_result, stream) = next(parser(Exclude(vowel)(Any()[0]))(stream1))
        assert stream[0] == 'a', stream[0]
        (_result, stream) = next(parser(Exclude(vowel)(Any()))(stream1))
        assert stream[0] == 'c', stream[0]
        (_result, stream) = next(parser(Exclude(vowel)(Any()[5]))(stream1))
        assert stream.line_number == 2, stream.line_number == 2
        assert stream.line_offset == 0, stream.line_offset == 0
        assert len(match) == 12, len(match)
Пример #25
0
def SignedEReal(decimal='.', exponent='eE'):
    '''
    Match a `SignedReal` followed by an optional exponent 
    (e+02 etc).  This will match both integer and float values.
    '''
    if decimal == '.' and exponent == 'eE':
        # hack to faster direct implementation for now
        return NfaRegexp(
            r'[\+\-]?(?:[0-9]*\.[0-9]+|[0-9]+\.|[0-9]+)(?:[eE][\+\-]?[0-9]+)?')
    else:
        return Join(SignedReal(decimal),
                    Optional(Join(Any(exponent), SignedInteger())))
Пример #26
0
    def __init__(self, clean_html=True):
        self.clean_html = clean_html

        self._punctuation = '!"#&\'()*+,.;<=>?@[\\]^_`{|}~'
        self._lctx_1_exceptions = set('/ :'.split())
        self._lctx_2_exceptions = set('discount redeem voucher'.split())
        self._rctx_1_exceptions = set('/ : th am pm hour hours %'.split())
        self._rctx_2_exceptions = set('discount redeem voucher'.split())

        # LEPL Real Number Matchers (w/thousands)
        _comma_three_digits = Join(Drop(','), Add(Digit()[3]))[:]
        _thousand_group = Or(
            Join(_comma_three_digits, Any('.'), UnsignedInteger()),
            Join(_comma_three_digits, Optional(Any('.'))))
        _real = Or(Join(UnsignedInteger(), _thousand_group),
                   UnsignedReal()) >> float
        _any = Join(Star(AnyBut(_real)))
        self._real_partition_matcher = Star(And(_any, _real, _any))
        self._real_simple_matcher = _real[:,
                                          Drop(
                                              Star(Or(Whitespace(), Any(',-')))
                                          )]
Пример #27
0
def SignedEFloat(decimal='.', exponent='eE'):
    '''
    As `SignedEReal`, but must contain a decimal or exponent.  This
    will match real values that are not integers.
    '''
    if decimal == '.' and exponent == 'eE':
        # hack to faster direct implementation for now
        return NfaRegexp(
            r'[\+\-]?(?:[0-9]*\.[0-9]+(?:[eE][\+\-]?[0-9]+)?|[0-9]+\.(?:[eE][\+\-]?[0-9]+)?|[0-9]+[eE][\+\-]?[0-9]+)'
        )
    else:
        return Or(Join(SignedReal(decimal), Any(exponent), SignedInteger()),
                  SignedFloat(decimal))
Пример #28
0
    def test_context(self):
        #basicConfig(level=DEBUG)
        output = StringIO()
        with TraceVariables(out=output):
            bar = Any()
        bar.config.no_full_first_match()
        repr(bar)
        list(bar.match('abc'))
        text = output.getvalue()
        assert_str(
            text,
            '''         bar = ['a']                            stream = 'bc'
         bar failed                             stream = 'abc'
''')
Пример #29
0
def SingleLineString(quote='"',
                     escape='\\',
                     exclude='\n',
                     empty='',
                     join=__add__):
    '''
    Like `String`,  but will not match across multiple lines.
    '''
    q = Literal(quote)
    content = AnyBut(Or(q, Any(exclude)))
    if escape:
        content = Or(content, And(Drop(escape), q))
    content = Repeat(content, reduce=(empty, join))
    return And(Drop(q), content, Drop(q))
Пример #30
0
    def test_node(self):
        
        class Term(Node): pass

        number      = Any('1')                             > 'number'
        term        = number                               > Term
        factor      = term | Drop(Optional(term))
        
        factor.config.clear().compose_transforms()
        p = factor.get_parse_string()
        ast = p('1')[0]
        assert type(ast) == Term, type(ast)
        assert ast[0] == '1', ast[0]
        assert str26(ast) == """Term
 `- number '1'""", ast