def namer(stream_in, matcher): try: (result, stream_out) = matcher() except StopIteration: if show_failures: stream = \ _adjust(fmt('stream = {rest}', **s_kargs(stream_in)), right) str_name = _adjust(name, left // 4, True, True) match = _adjust(fmt(' {0} failed', str_name), left, True) # Python bug #4618 print(match + ' ' + stream, file=out, end=str('\n')) raise StopIteration else: try: try: rest = fmt('{rest}', **s_kargs(stream_out)) except StopIteration: rest = '<EOS>' stream = _adjust(fmt('stream = {0}', rest), right) str_name = _adjust(name, left // 4, True, True) match = _adjust(fmt(' {0} = {1}', str_name, result), left, True) # Python bug #4618 print(match + ' ' + stream, file=out, end=str('\n')) return (result, stream_out) except Exception as e: print('Error in trace', file=out, end=str('\n')) print(repr(e), file=out, end=str('\n')) return (result, stream_out)
def test_match_7(self): alphabet = LineAwareAlphabet(UnicodeAlphabet.instance(), make_str_parser) expr = Compiler.single(alphabet, '[^(*SOL)(*EOL)a]*').dfa() result = list(expr.match([str('1'), EOL])) assert result == [[str('label')], [str('1')], [EOL]], \ result
def __str__(self): return ','.join([ str(self.matcher.__class__), str(self.regexp), str(self.use), str(self.add_reqd) ])
def TraceVariables(on=True, show_failures=True, width=80, out=stderr): ''' Add this as a context (`with TraceVariables():`) and you will see debug logging indicating how variables are bound during matching. ''' if on: before = _getframe(2).f_locals.copy() yield None if on: after = _getframe(2).f_locals for key in after: value = after[key] if key not in before or value != before[key]: try: try: value.wrapper.append( name(key, show_failures, width, out)) except AttributeError: value.trace_variables = name(key, show_failures, width, out) except: # what exception? print( 'Unfortunately the following matchers cannot ' 'be tracked:', end=str('\n')) print(fmt(' {0} = {1}', key, value), end=str('\n'))
def _escape_char(self, char): ''' Escape a character if necessary. ''' if self.escape is not None and str(char) in self.illegal: return self.escape + str(char) else: return str(char)
def _escape_char(self, char): ''' Escape a character if necessary. ''' if self.escape is not None and str(char) in self.escaped: return self.escape + str(char) else: return str(char)
def test_invert_bug_6(self): #basicConfig(level=DEBUG) bad = BLine(Token(str('[^(*SOL)(*EOL)a]*'))) bad.config.default_line_aware(block_policy=2, parser_factory=make_str_parser) bad.config.trace(True) parser = bad.get_parse_string() result = parser(str('123')) assert result == [str('123')], result
def location(self, offset, line, location_state): ''' A tuple containing line number, line offset, character offset, the line currently being processed, and a description of the source. ''' character_count = location_state if line is None: return (-1, -1, -1, None, str(self)) else: return (None, offset, character_count + offset, line, str(self))
def test_good_error_msg(self): ''' Better error message with streams. ''' #basicConfig(level=DEBUG) words = Token('[a-z]+')[:] words.config.lexer() parser = words.get_parse_string() try: parser('abc defXghi') assert False, 'expected error' except RuntimeLexerError as err: assert str(err) == 'No lexer for \'Xghi\' at line 1 character 7 ' \ 'of str: \'abc defXghi\'.', str(err)
def test_bad_space(self): ''' An unexpected character fails to match. ''' token = Token('a') token.config.clear().lexer(discard='b') parser = token.get_parse() assert parser('a') == ['a'], parser('a') assert parser('b') == None, parser('b') try: parser('c') assert False, 'expected failure' except RuntimeLexerError as err: assert str(err) == "No token for 'c' at line 1, character 1 of 'c'.", str(err)
def test_bad_error_msg(self): ''' An ugly error message (can't we improve this?) ''' #basicConfig(level=DEBUG) words = Token('[a-z]+')[:] words.config.lexer() parser = words.get_parse() try: parser('abc defXghi') assert False, 'expected error' except RuntimeLexerError as err: assert str(err) == "No lexer for 'Xghi' at line 1 " \ "character 7 of str: 'abc defXghi'.", str(err)
def test_bad_space(self): ''' An unexpected character fails to match. ''' token = Token('a') token.config.clear().lexer(discard='b') parser = token.get_parse() assert parser('a') == ['a'], parser('a') assert parser('b') == None, parser('b') try: parser('c') assert False, 'expected failure' except RuntimeLexerError as err: assert str(err) == "No lexer for 'c' at line 1 " \ "character 0 of str: 'c'.", str(err)
def location(self, offset, line, location_state): ''' Correct the location for the initial SOL character. ''' (character_count, line_count) = location_state return (line_count, offset - 1, character_count + offset - 1, line, str(self))
def location(self, offset, line, location_state): ''' Remove the SOL/EOL characters. ''' (character_count, line_count) = location_state return (line_count, offset, character_count + offset, self.join([line]), str(self))
def __str__(self): ''' Example: 0: 3, 4; 1: 2; 2(Tk1); 3: [{u'\x00'}-`b-{u'\U0010ffff'}]->3, 1; 4: {$}->5, 7; 5: 6; 6($); 7: {^}->10; 8: 9; 9(^); 10: 11; 11: [ ]->11, 8 Node 0 leads to 3 and 4 (both empty) Node 1 leads to 2 (empty) Node 2 is terminal, labelled with "Tk1" Node 3 loops back to 3 for a character in the given range, or to 1 etc. ''' lines = [] for node in self: edges = [] for (dest, edge) in self.transitions(node): edges.append(fmt('{0}->{1}', edge, dest)) for dest in self.empty_transitions(node): edges.append(str(dest)) label = '' if self.terminal(node) is None \ else fmt('({0})', self.terminal(node)) if edges: lines.append( fmt('{0}{1}: {2}', node, label, ', '.join(edges))) else: lines.append(fmt('{0}{1}', node, label)) return '; '.join(lines)
def __str__(self): # pylint: disable-msg=W0702 # we want this to be robust try: return str(self.text) except: return repr(self)
def test_escaped_range(self): self.assert_graphs(parse(str(r'[\x00-`b-oq-y]')), r"""digraph { 0 [label="[\\x00-`b-oq-y]"] 1 [label="Match"] 0 -> 1 }""")
def __str__(self): ''' Example: 0: 3, 4; 1: 2; 2(Tk1); 3: [{u'\x00'}-`b-{u'\U0010ffff'}]->3, 1; 4: {$}->5, 7; 5: 6; 6($); 7: {^}->10; 8: 9; 9(^); 10: 11; 11: [ ]->11, 8 Node 0 leads to 3 and 4 (both empty) Node 1 leads to 2 (empty) Node 2 is terminal, labelled with "Tk1" Node 3 loops back to 3 for a character in the given range, or to 1 etc. ''' lines = [] for node in self: edges = [] for (dest, edge) in self.transitions(node): edges.append(format('{0}->{1}', edge, dest)) for dest in self.empty_transitions(node): edges.append(str(dest)) label = '' if self.terminal(node) is None \ else format('({0})', self.terminal(node)) if edges: lines.append( format('{0}{1}: {2}', node, label, ', '.join(edges))) else: lines.append(format('{0}{1}', node, label)) return '; '.join(lines)
def kargs(self, state, prefix='', kargs=None): ''' Generate a dictionary of values that describe the stream. These may be extended by subclasses. They are provided to `syntax_error_kargs`, for example. Note: Calculating this can be expensive; use only for error messages, not debug messages (that may be discarded). Implementation note: Because some values are ''' offset = state + self._delta[OFFSET] if kargs is None: kargs = {} add_defaults(kargs, self._kargs, prefix=prefix) within = -1 < offset < len(self._sequence) data = self._fmt(self._sequence, state) text = self._fmt(self._sequence, state, index=False) # some values below may be already present in self._global_kargs defaults = {'data': data, 'global_data': data, 'text': text, 'global_text': text, 'offset': state, 'global_offset': offset, 'rest': self._fmt(self._sequence[offset:], 0, index=False), 'repr': repr(self._sequence[offset]) if within else '<EOS>', 'str': str(self._sequence[offset]) if within else '', 'line_no': 1, 'char': offset+1} add_defaults(kargs, defaults, prefix=prefix) add_defaults(kargs, {prefix + 'location': self._location(kargs, prefix)}) return kargs
def kargs(self, state, prefix='', kargs=None): ''' Generate a dictionary of values that describe the stream. These may be extended by subclasses. They are provided to `syntax_error_kargs`, for example. Note: Calculating this can be expensive; use only for error messages, not debug messages (that may be discarded). Implementation note: Because some values are ''' offset = state + self._delta[OFFSET] if kargs is None: kargs = {} add_defaults(kargs, self._kargs, prefix=prefix) within = offset > -1 and offset < len(self._sequence) data = self._fmt(self._sequence, state) text = self._fmt(self._sequence, state, index=False) # some values below may be already present in self._global_kargs defaults = {'data': data, 'global_data': data, 'text': text, 'global_text': text, 'offset': state, 'global_offset': offset, 'rest': self._fmt(self._sequence[offset:], 0, index=False), 'repr': repr(self._sequence[offset]) if within else '<EOS>', 'str': str(self._sequence[offset]) if within else '', 'lineno': 1, 'char': offset+1} add_defaults(kargs, defaults, prefix=prefix) add_defaults(kargs, {prefix + 'location': self._location(kargs, prefix)}) return kargs
def location(self, offset, line, location_state): ''' A tuple containing line number, line offset, character offset, the line currently being processed, and a description of the source. ''' (character_count, line_count) = location_state return (line_count, offset, character_count + offset, line, str(self))
def record_success(count, stream_in, result): (value, stream_out) = result count_desc = format(' ({0})', count) if count > 1 else '' # Python bug #4618 print(format('{0}{1} = {2}\n "{3}" -> "{4}"', name, count_desc, value, format_stream(stream_in), format_stream(stream_out)), file=out, end=str('\n'))
def clean(x): x = str(x) x = x.replace("u'", "'") x = x.replace("lepl.matchers.error.Error", "Error") x = x.replace("lepl.stream.maxdepth.FullFirstMatchException", "FullFirstMatchException") x = sub('<(.+) 0x[0-9a-fA-F]*>', '<\\1 0x...>', x) x = sub('(\\d+)L', '\\1', x) return x
def __init__(self, transform, stream): if not isinstance(stream, LocationStream): raise FilterException('Can only filter LocationStream instances.') # join is unused here, but used by `StreamView` super(BaseTransformedSource, self).__init__(str(stream.source), list_join(stream.source.join)) self.__length = 0 self.__iterator = transform(stream)
def fmt_sequence(self, children): ''' Generate a string representation of a sequence. This must fully describe the data in the children (it is used to hash the data). ''' return self.join(str(c) for c in children)
def fmt_choice(self, children): ''' Generate a string representation of a choice. This must fully describe the data in the children (it is used to hash the data). ''' return fmt('(?:{0})', '|'.join(str(child) for child in children))
def namer(stream_in, matcher): try: (result, stream_out) = matcher() stream = _adjust(format('stream = \'{0}\'', stream_out), right) str_name = _adjust(name, left // 4, True, True) match = _adjust(format(' {0} = {1}', str_name, result), left, True) # Python bug #4618 print(match + ' ' + stream, file=out, end=str('\n')) return (result, stream_out) except StopIteration: if show_failures: stream = _adjust(format('stream = \'{0}\'', stream_in), right) str_name = _adjust(name, left // 4, True, True) match = _adjust(format(' {0} failed', str_name), left, True) # Python bug #4618 print(match + ' ' + stream, file=out, end=str('\n')) raise StopIteration
def __init__(self, generator, epoch): self.__hash = hash(generator) self.__wrapper = ref(generator) self.__last_known_epoch = epoch self.order_epoch = epoch # readable externally self.__count = 1 # add with 1 as we test for discard immediately after self.gced = False self.__describe = str(generator)
def record_success(count, stream_in, result): (value, stream_out) = result count_desc = fmt(' ({0})', count) if count > 1 else '' # Python bug #4618 print(fmt('{0}{1} = {2}\n {3} -> {4}', name, count_desc, value, fmt_stream(stream_in), fmt_stream(stream_out)), file=out, end=str('\n'))
def test_mixed(self): ''' Cannot mix tokens and non-tokens at same level. ''' bad = Token(Any()) & Any() try: bad.get_parse() assert False, 'expected failure' except LexerError as err: assert str(err) == 'The grammar contains a mix of Tokens and ' \ 'non-Token matchers at the top level. If ' \ 'Tokens are used then non-token matchers ' \ 'that consume input must only appear "inside" ' \ 'Tokens. The non-Token matchers include: ' \ 'Any(None).', str(err) else: assert False, 'wrong exception'
def fmt_choice(self, children): ''' Generate a string representation of a choice. This must fully describe the data in the children (it is used to hash the data). ''' return format('({0})', '|'.join(str(child) for child in children))
def clean(x): x = str(x) x = x.replace("u'", "'") x = x.replace("lepl.matchers.error.Error", "Error") x = x.replace("lepl.stream.maxdepth.FullFirstMatchException", "FullFirstMatchException") x = sub("<(.+) 0x[0-9a-fA-F]*>", "<\\1 0x...>", x) x = sub("(\\d+)L", "\\1", x) return x
def fmt_stream(stream): try: (line, _) = s_line(stream, False) text = str(line) if len(text) > 20: text = text[:17] + '...' return repr(text) except StopIteration: return '<EOS>'
def from_string(self, text, source=None, join=''.join, base=None): ''' Wrap a string. ''' if source is None: source = sample('str: ', repr(text)) if base is None: base = text return self(LineSource(StringIO(str(text)), source, join, base=base))
def TraceVariables(on=True, show_failures=True, width=80, out=stderr): before = _getframe(2).f_locals.copy() yield None after = _getframe(2).f_locals warned = False for key in after: value = after[key] if on and key not in before or value != before[key]: try: value.wrapper.append(name(key, show_failures, width, out)) except: if not warned: # Python bug #4618 print('Unfortunately the following matchers cannot ' 'be tracked:', end=str('\n')) warned = True # Python bug #4618 print(format(' {0} = {1}', key, value), end=str('\n'))
def pretty(c): x = self._escape_char(c) if len(x) > 1 or 32 <= ord(x) <= 127: return str(x) elif ord(c) < 0x100: return fmt('\\x{0:02x}', ord(c)) elif ord(c) < 0x10000: return fmt('\\u{0:04x}', ord(c)) else: return fmt('\\U{0:08x}', ord(c))
def pretty(c): x = self._escape_char(c) if len(x) > 1 or 32 <= ord(x) <= 127: return str(x) elif ord(c) < 0x100: return format('\\x{0:02x}', ord(c)) elif ord(c) < 0x10000: return format('\\u{0:04x}', ord(c)) else: return format('\\U{0:08x}', ord(c))
def __init__(self, tokens, stream): ''' tokens is an iterator over the (terminals, size, stream_before) tuples. ''' assert isinstance(stream, LocationStream) # join is unused(?) but passed on to ContentStream super(TokenSource, self).__init__(str(stream.source), stream.source.join, stream.source) self.__tokens = iter(tokens) self.__token_count = 0
def kargs(self, state, prefix='', kargs=None): if kargs is None: kargs = {} (_, line_no, char) = self.delta(state) start = self._sequence.rfind('\n', 0, state) + 1 # omit \n end = self._sequence.find('\n', state) # omit \n # all is str() because passed to SyntaxError constructor if end < 0: rest = repr(self._sequence[state:]) all = str(self._sequence[start:]) else: rest = repr(self._sequence[state:end]) all = str(self._sequence[start:end]) add_defaults(kargs, { 'type': '<string>', 'filename': '<string>', 'rest': rest, 'all': all, 'line_no': line_no, 'char': char}, prefix=prefix) return super(StringHelper, self).kargs(state, prefix=prefix, kargs=kargs)
def TraceVariables(on=True, show_failures=True, width=80, out=stderr): before = _getframe(2).f_locals.copy() yield None after = _getframe(2).f_locals warned = False for key in after: value = after[key] if on and key not in before or value != before[key]: try: value.wrapper.append(name(key, show_failures, width, out)) except: if not warned: # Python bug #4618 print( 'Unfortunately the following matchers cannot ' 'be tracked:', end=str('\n')) warned = True # Python bug #4618 print(format(' {0} = {1}', key, value), end=str('\n'))
def __init__(self, text, stream): ''' There's just a single line from the token contents. ''' super(ContentSource, self).__init__(str(stream.source), stream.source.join, base=text) self.__line = text self.__stream = stream self.__used = False self.total_length = len(text)
def __init__(self, transform, stream): if not isinstance(stream, LocationStream): raise FilterException('Can only filter LocationStream instances.') # join is unused here, but used by `StreamView` super(CachingTransformedSource, self).__init__(str(stream.source), list_join(stream.source.join)) self.__length = 0 self.__iterator = transform(stream) # map from character offset to underlying stream self.__lookup = {} self.__previous_stream = stream
def kargs(self, state, prefix='', kargs=None): if kargs is None: kargs = {} (_, lineno, char) = self.delta(state) start = self._sequence.rfind('\n', 0, state) + 1 # omit \n end = self._sequence.find('\n', state) # omit \n # all is str() because passed to SyntaxError constructor if end < 0: rest = repr(self._sequence[state:]) all = str(self._sequence[start:]) else: rest = repr(self._sequence[state:end]) all = str(self._sequence[start:end]) add_defaults(kargs, { 'type': '<string>', 'filename': '<string>', 'rest': rest, 'all': all, 'lineno': lineno, 'char': char}, prefix=prefix) return super(StringHelper, self).kargs(state, prefix=prefix, kargs=kargs)