def test_string_lines(self): f = DEFAULT_STREAM_FACTORY s = f.from_string('line 1\nline 2\nline 3\n') (l, s) = s_line(s, False) assert l == 'line 1\n', l (l, _) = s_line(s, False) assert l == 'line 2\n', repr(l) locn = s_fmt(s, '{location}') assert locn == 'line 2, character 1', locn sl = s_stream(s, l) (_, sl) = s_next(sl, count=2) locn = s_fmt(sl, '{location}') assert locn == 'line 2, character 3', locn
def _match(self, stream_in): ''' Pull indent and call the policy and update the global value, then evaluate the contents. ''' # detect a nested call key = s_key(stream_in) if key in self.__streams: self._debug('Avoided left recursive call to Block.') return self.__streams.add(key) try: ((tokens, token_stream), _) = s_next(stream_in) (indent, _) = s_line(token_stream, True) if START not in tokens: raise StopIteration current = self.__monitor.indent policy = self.policy(current, indent) generator = And(*self.lines)._match(stream_in) while True: self.__monitor.push_level(policy) try: results = yield generator finally: self.__monitor.pop_level() yield results finally: self.__streams.remove(key)
def size_match(self, stream): ''' Match against the stream, but return the length of the match. ''' state = 0 size = 0 longest = (self.__empty_labels, 0, stream) \ if self.__empty_labels else None (line, _) = s_line(stream, True) while size < len(line): future = self.__table[state][line[size]] if future is None: break # update state (state, terminals) = future size += 1 # match is strictly increasing, so storing the length is enough # (no need to make an expensive copy) if terminals: try: (_, next_stream) = s_next(stream, count=size) longest = (terminals, size, next_stream) except StopIteration: pass return longest
def fmt_stream(stream): try: (line, _) = s_line(stream, False) text = str(line) if len(text) > 20: text = text[:17] + '...' return repr(text) except StopIteration: return '<EOS>'
def match(support, stream): (line, _) = s_line(stream, True) match = pattern.match(line) if match: eaten = len(match.group()) if match.groups(): return (list(match.groups()), s_next(stream, count=eaten)[1]) else: return ([match.group()], s_next(stream, count=eaten)[1])
def test_empty(self): f = DEFAULT_STREAM_FACTORY for (constructor, data) in ((f.from_sequence, ''), (f.from_sequence, []), (f.from_sequence, ()), (f.from_string, ''), (f.from_list, [])): s = constructor(data) assert s_empty(s) try: s_next(s) assert False, fmt('expected error: {0}', s) except StopIteration: pass try: s_line(s, False) assert False, fmt('expected error: {0}', s) except StopIteration: pass
def line(self, state, empty_ok): try: (cons, line_stream) = state if s_empty(line_stream): cons = cons.tail line_stream = self._next_line(cons, line_stream) (value, empty_line_stream) = s_line(line_stream, empty_ok) return (value, ((cons, empty_line_stream), self)) except StopIteration: if empty_ok: raise TypeError('Iterable stream cannot return an empty line') else: raise
def test_all(self): lines = iter(['first line', 'second line', 'third line']) f = DEFAULT_STREAM_FACTORY s1 = f(lines) # just created assert not s_empty(s1) # get first line (l1, s2) = s_line(s1, False) assert 'first line' == l1, l1 # get first character of next line (c21, s21) = s_next(s2) assert c21 == 's', c21 # and test fmtting locn = s_fmt(s21, '{location}: {rest}') assert locn == "line 2, character 2: 'econd line'", locn # then get rest of second line (c22, s3) = s_next(s21, count=len('econd line')) assert c22 == 'econd line', c22 d = s_debug(s21) assert d == "1:'e'", d # and move on to third line (c31, s31) = s_next(s3) assert c31 == 't', c31 (c32, s32) = s_next(s31) assert c32 == 'h', c32 # now try branching (think tokens) at line 1 s10 = s_stream(s2, l1) (l1, s20) = s_line(s10, False) assert l1 == 'first line', l1 assert not s_empty(s20) (c1, s11) = s_next(s10) assert c1 == 'f', c1 d = s_debug(s11) assert d == "1:'i'", d # finally look at max depth (which was after 'h' in third line) m = s_deepest(s1) locn = s_fmt(m, '{location}: {rest}') assert locn == "line 3, character 3: 'ird line'", locn
def test_single_value(self): f = DEFAULT_STREAM_FACTORY for (constructor, data) in ((f.from_sequence, 'a'), (f.from_sequence, [1]), (f.from_sequence, (2, )), (f.from_string, 'b'), (f.from_list, ['c'])): s = constructor(data) assert not s_empty(s) (value, n) = s_next(s) assert value == data assert s_empty(n) (line, n) = s_line(s, False) assert line == data assert s_empty(n)
def line(self, cons, empty_ok): ''' This doesn't have much meaning in terms of tokens, but might be used for some debug output, so return something vaguely useful. ''' try: # implement in terms of next so that filtering works as expected ((_, line_stream), _) = self.next(cons) return s_line(line_stream, empty_ok) except StopIteration: if empty_ok: raise TypeError('Token stream cannot return an empty line') else: raise
def test_single_value(self): f = DEFAULT_STREAM_FACTORY for (constructor, data) in ((f.from_sequence, 'a'), (f.from_sequence, [1]), (f.from_sequence, (2,)), (f.from_string, 'b'), (f.from_list, ['c'])): s = constructor(data) assert not s_empty(s) (value, n) = s_next(s) assert value == data assert s_empty(n) (line, n) = s_line(s, False) assert line == data assert s_empty(n)
def fmt_stream(self): ''' Provide a standard fmt for location. ''' try: (offset, line_no, char) = s_delta(self.generator.stream) locn = fmt('{0}/{1}.{2}', offset, line_no, char) try: stream = sample('', s_line(self.generator.stream, False)[0], 9) except StopIteration: stream = '<EOS>' return (stream, offset, locn) except StopIteration: return ('<EOS>', -1, '') except TypeError: return (self.generator.stream, -1, '')
def fmt_stream(self): ''' Provide a standard fmt for location. ''' try: (offset, lineno, char) = s_delta(self.generator.stream) locn = fmt('{0}/{1}.{2}', offset, lineno, char) try: stream = sample('', s_line(self.generator.stream, False)[0], 9) except StopIteration: stream = '<EOS>' return (stream, offset, locn) except StopIteration: return ('<EOS>', -1, '') except TypeError: return (self.generator.stream, -1, '')
def test_two_values(self): f = DEFAULT_STREAM_FACTORY for (constructor, data) in ((f.from_sequence, 'ab'), (f.from_sequence, [1, 2]), (f.from_sequence, (2, 3)), (f.from_string, 'bc'), (f.from_list, ['c', 6])): s = constructor(data) assert not s_empty(s) (value, n) = s_next(s) assert value == data[0:1] (value, n) = s_next(n) assert value == data[1:2] assert s_empty(n) (line, n) = s_line(s, False) assert line == data assert s_empty(n)
def _tokens(self, stream, max): ''' Generate tokens, on demand. ''' id_ = s_id(stream) try: while not s_empty(stream): # caches for different tokens with same contents differ id_ += 1 (line, next_stream) = s_line(stream, False) line_stream = s_stream(stream, line) size = 0 # if we use blocks, match leading space if self.blocks: try: (_, size, _) = self.s_regexp.size_match(line_stream) except TypeError: pass # this will be empty (size=0) if blocks unused (indent, next_line_stream) = s_next(line_stream, count=size) indent = indent.replace('\t', self._tab) yield ((START,), s_stream(line_stream, indent, id_=id_, max=max)) line_stream = next_line_stream while not s_empty(line_stream): id_ += 1 try: (terminals, match, next_line_stream) = \ self.t_regexp.match(line_stream) yield (terminals, s_stream(line_stream, match, max=max, id_=id_)) except TypeError: (terminals, _size, next_line_stream) = \ self.s_regexp.size_match(line_stream) line_stream = next_line_stream id_ += 1 yield ((END,), s_stream(line_stream, '', max=max, id_=id_)) stream = next_stream except TypeError: raise RuntimeLexerError( s_fmt(stream, 'No token for {rest} at {location} of {text}.'))
def test_two_values(self): f = DEFAULT_STREAM_FACTORY for (constructor, data) in ((f.from_sequence, 'ab'), (f.from_sequence, [1, 2]), (f.from_sequence, (2,3)), (f.from_string, 'bc'), (f.from_list, ['c', 6])): s = constructor(data) assert not s_empty(s) (value, n) = s_next(s) assert value == data[0:1] (value, n) = s_next(n) assert value == data[1:2] assert s_empty(n) (line, n) = s_line(s, False) assert line == data assert s_empty(n)
def next(self, state, count=1): (cons, line_stream) = state try: (value, next_line_stream) = s_next(line_stream, count=count) return (value, ((cons, next_line_stream), self)) except StopIteration: # the general approach here is to take what we can from the # current line, create the next, and take the rest from that. # of course, that may also not have enough, in which case it # will recurse. cons = cons.tail if s_empty(line_stream): next_line_stream = self._next_line(cons, line_stream) next_stream = ((cons, next_line_stream), self) return s_next(next_stream, count=count) else: (line, end_line_stream) = s_line(line_stream, False) next_line_stream = self._next_line(cons, end_line_stream) next_stream = ((cons, next_line_stream), self) (extra, final_stream) = s_next(next_stream, count=count-len(line)) value = s_join(line_stream, line, extra) return (value, final_stream)
def match(support, stream): # we increment id so that different strings (which might overlap or # be contiguous) don't affect each other's memoisation (the hash key # is based on offset and ('one past the') end of one column can have # the same offset as the start of the next). id_ = s_id(stream) # extract a line (line, next_stream) = s_line(stream, False) line_stream = s_stream(stream, line) results = [] for ((left, right), matcher) in zip(indices, matchers): id_ += 1 # extract the location in the line (_, left_aligned_line_stream) = s_next(line_stream, count=left) (word, _) = s_next(left_aligned_line_stream, count=right - left) support._debug(fmt('Columns {0}-{1} {2!r}', left, right, word)) word_stream = s_stream(left_aligned_line_stream, word, id_=id_) # do the match support._debug(s_fmt(word_stream, 'matching {rest}')) (result, _) = yield matcher._match(word_stream) results.extend(result) support._debug(repr(results)) yield (results, next_stream)
def match(support, stream): # we increment id so that different strings (which might overlap or # be contiguous) don't affect each other's memoisation (the hash key # is based on offset and ('one past the') end of one column can have # the same offset as the start of the next). id_ = s_id(stream) # extract a line (line, next_stream) = s_line(stream, False) line_stream = s_stream(stream, line) results = [] for ((left, right), matcher) in zip(indices, matchers): id_ += 1 # extract the location in the line (_, left_aligned_line_stream) = s_next(line_stream, count=left) (word, _) = s_next(left_aligned_line_stream, count=right-left) support._debug(fmt('Columns {0}-{1} {2!r}', left, right, word)) word_stream = s_stream(left_aligned_line_stream, word, id_=id_) # do the match support._debug(s_fmt(word_stream, 'matching {rest}')) (result, _) = yield matcher._match(word_stream) results.extend(result) support._debug(repr(results)) yield (results, next_stream)
def next(self, state, count=1): (cons, line_stream) = state try: (value, next_line_stream) = s_next(line_stream, count=count) return (value, ((cons, next_line_stream), self)) except StopIteration: # the general approach here is to take what we can from the # current line, create the next, and take the rest from that. # of course, that may also not have enough, in which case it # will recurse. cons = cons.tail if s_empty(line_stream): next_line_stream = self._next_line(cons, line_stream) next_stream = ((cons, next_line_stream), self) return s_next(next_stream, count=count) else: (line, end_line_stream) = s_line(line_stream, False) next_line_stream = self._next_line(cons, end_line_stream) next_stream = ((cons, next_line_stream), self) (extra, final_stream) = s_next(next_stream, count=count - len(line)) value = s_join(line_stream, line, extra) return (value, final_stream)
def _match(self, stream): ''' On matching we first assert that the token type is correct and then delegate to the content. ''' if not self.compiled: raise LexerError( fmt('A {0} token has not been compiled. ' 'You must use the lexer rewriter with Tokens. ' 'This can be done by using matcher.config.lexer().', self.__class__.__name__)) ((tokens, line_stream), next_stream) = s_next(stream) if self.id_ in tokens: if self.content is None: # result contains all data (use s_next not s_line to set max) (line, _) = s_line(line_stream, True) (line, _) = s_next(line_stream, count=len(line)) yield ([line], next_stream) else: generator = self.content._match(line_stream) while True: (result, next_line_stream) = yield generator if s_empty(next_line_stream) or not self.complete: yield (result, next_stream)