def _tokens(self, stream, max): ''' Generate tokens, on demand. ''' try: id_ = s_id(stream) while not s_empty(stream): # avoid conflicts between tokens id_ += 1 try: (terminals, match, next_stream) = \ self.t_regexp.match(stream) self._debug( fmt('Token: {0!r} {1!r} {2!s}', terminals, match, s_debug(stream))) yield (terminals, s_stream(stream, match, max=max, id_=id_)) except TypeError: (terminals, _size, next_stream) = \ self.s_regexp.size_match(stream) self._debug( fmt('Space: {0!r} {1!s}', terminals, s_debug(stream))) stream = next_stream except TypeError: raise RuntimeLexerError( s_fmt(stream, 'No token for {rest} at {location} of {text}.'))
def _tokens(self, stream, max): ''' Generate tokens, on demand. ''' try: id_ = s_id(stream) while not s_empty(stream): # avoid conflicts between tokens id_ += 1 try: (terminals, match, next_stream) = \ self.t_regexp.match(stream) self._debug(fmt('Token: {0!r} {1!r} {2!s}', terminals, match, s_debug(stream))) yield (terminals, s_stream(stream, match, max=max, id_=id_)) except TypeError: (terminals, _size, next_stream) = \ self.s_regexp.size_match(stream) self._debug(fmt('Space: {0!r} {1!s}', terminals, s_debug(stream))) stream = next_stream except TypeError: raise RuntimeLexerError( s_fmt(stream, 'No token for {rest} at {location} of {text}.'))
def _tokens(self, stream, max): ''' Generate tokens, on demand. ''' id_ = s_id(stream) try: while not s_empty(stream): # caches for different tokens with same contents differ id_ += 1 (line, next_stream) = s_line(stream, False) line_stream = s_stream(stream, line) size = 0 # if we use blocks, match leading space if self.blocks: try: (_, size, _) = self.s_regexp.size_match(line_stream) except TypeError: pass # this will be empty (size=0) if blocks unused (indent, next_line_stream) = s_next(line_stream, count=size) indent = indent.replace('\t', self._tab) yield ((START,), s_stream(line_stream, indent, id_=id_, max=max)) line_stream = next_line_stream while not s_empty(line_stream): id_ += 1 try: (terminals, match, next_line_stream) = \ self.t_regexp.match(line_stream) yield (terminals, s_stream(line_stream, match, max=max, id_=id_)) except TypeError: (terminals, _size, next_line_stream) = \ self.s_regexp.size_match(line_stream) line_stream = next_line_stream id_ += 1 yield ((END,), s_stream(line_stream, '', max=max, id_=id_)) stream = next_stream except TypeError: raise RuntimeLexerError( s_fmt(stream, 'No token for {rest} at {location} of {text}.'))
def test_string_lines(self): f = DEFAULT_STREAM_FACTORY s = f.from_string('line 1\nline 2\nline 3\n') (l, s) = s_line(s, False) assert l == 'line 1\n', l (l, _) = s_line(s, False) assert l == 'line 2\n', repr(l) locn = s_fmt(s, '{location}') assert locn == 'line 2, character 1', locn sl = s_stream(s, l) (_, sl) = s_next(sl, count=2) locn = s_fmt(sl, '{location}') assert locn == 'line 2, character 3', locn
def match(support, stream): # we increment id so that different strings (which might overlap or # be contiguous) don't affect each other's memoisation (the hash key # is based on offset and ('one past the') end of one column can have # the same offset as the start of the next). id_ = s_id(stream) # extract a line (line, next_stream) = s_line(stream, False) line_stream = s_stream(stream, line) results = [] for ((left, right), matcher) in zip(indices, matchers): id_ += 1 # extract the location in the line (_, left_aligned_line_stream) = s_next(line_stream, count=left) (word, _) = s_next(left_aligned_line_stream, count=right-left) support._debug(fmt('Columns {0}-{1} {2!r}', left, right, word)) word_stream = s_stream(left_aligned_line_stream, word, id_=id_) # do the match support._debug(s_fmt(word_stream, 'matching {rest}')) (result, _) = yield matcher._match(word_stream) results.extend(result) support._debug(repr(results)) yield (results, next_stream)
def match(support, stream): # we increment id so that different strings (which might overlap or # be contiguous) don't affect each other's memoisation (the hash key # is based on offset and ('one past the') end of one column can have # the same offset as the start of the next). id_ = s_id(stream) # extract a line (line, next_stream) = s_line(stream, False) line_stream = s_stream(stream, line) results = [] for ((left, right), matcher) in zip(indices, matchers): id_ += 1 # extract the location in the line (_, left_aligned_line_stream) = s_next(line_stream, count=left) (word, _) = s_next(left_aligned_line_stream, count=right - left) support._debug(fmt('Columns {0}-{1} {2!r}', left, right, word)) word_stream = s_stream(left_aligned_line_stream, word, id_=id_) # do the match support._debug(s_fmt(word_stream, 'matching {rest}')) (result, _) = yield matcher._match(word_stream) results.extend(result) support._debug(repr(results)) yield (results, next_stream)
def lookahead(self, next, equal, forwards, mutates, reads, length): # todo - could also cache things that read groups by state # discard old values if self._lookaheads[0] != self._offset: self._lookaheads = (self._offset, {}) lookaheads = self._lookaheads[1] # approach here different from simple engine as not all # results can be cached match = False if next[1] in lookaheads: success = lookaheads[next[1]] else: # we need to match the lookahead search = False size = None if (reads and mutates) else \ length(self._state.groups(self._parser_state.groups)) if forwards: stream = self._initial_stream offset = self._offset else: (text, _) = s_next(self._initial_stream, self._offset) stream = s_stream(self._initial_stream, text) if size is None: offset = 0 search = True else: offset = self._offset - size if offset >= 0: new_state = self._state.clone(next[1], stream=stream) self._push() try: match = self._run_from(new_state, stream, offset, search) new_state = self._state finally: self._pop() success = bool(match) == equal if not (mutates or reads): lookaheads[next[1]] = success # if lookahead succeeded, continue if success: if mutates and match: self._state.merge_groups(new_state) self._states.append(self._state.advance(next[0])) raise Fail
def lookahead(self, next, equal, forwards, mutates, reads, length): # discard old values if self._lookaheads[0] != self._offset: self._lookaheads = (self._offset, {}) lookaheads = self._lookaheads[1] if next[1] not in lookaheads: # requires complex engine if reads: raise UnsupportedOperation('lookahead') size = None if (reads and mutates) else length(None) # invoke simple engine and cache self._push() try: if forwards: stream = self._initial_stream pos = self._offset search = False else: (text, _) = s_next(self._initial_stream, self._offset) stream = s_stream(self._initial_stream, text) if size is None: pos = 0 search = True else: pos = self._offset - size search = False if pos >= 0: result = bool(self._run_from(next[1], stream, pos, search)) == equal else: result = not equal finally: self._pop() lookaheads[next[1]] = result if lookaheads[next[1]]: return next[0] else: raise Fail
def lookahead(self, next, equal, forwards, mutates, reads, length): self.ticks += 1 alternate = next[1] if alternate not in self.__lookaheads: self.__lookaheads[alternate] = {} if self.__state._offset in self.__lookaheads[alternate]: success = self.__lookaheads[alternate[self.__state._offset]] else: size = None if (reads and mutates) else length(self.__state.groups) search = False if forwards: clone = State(self.__state._parser_state, self.__state._stream, self.__state.groups.clone()) else: if size is not None and size > self.__state._offset and equal: raise Fail (text, _) = s_next(self.__stream, self.__state._offset) stream = s_stream(self.__stream, text) if size is None or size > self.__state._offset: search = True pos = None else: pos = self.__state._offset - size clone = State(self.__state._parser_state, stream, self.__state.groups.clone(), pos=pos) (match, clone) = self.__run(alternate, clone, search=search) success = match == equal if not (reads or mutates): self.__lookaheads[alternate][self.__state._offset] = success # if lookahead succeeded, continue if success: if mutates: self.__state = self.__state.clone(groups=clone.groups) return next[0] else: raise Fail
def test_all(self): lines = iter(['first line', 'second line', 'third line']) f = DEFAULT_STREAM_FACTORY s1 = f(lines) # just created assert not s_empty(s1) # get first line (l1, s2) = s_line(s1, False) assert 'first line' == l1, l1 # get first character of next line (c21, s21) = s_next(s2) assert c21 == 's', c21 # and test fmtting locn = s_fmt(s21, '{location}: {rest}') assert locn == "line 2, character 2: 'econd line'", locn # then get rest of second line (c22, s3) = s_next(s21, count=len('econd line')) assert c22 == 'econd line', c22 d = s_debug(s21) assert d == "1:'e'", d # and move on to third line (c31, s31) = s_next(s3) assert c31 == 't', c31 (c32, s32) = s_next(s31) assert c32 == 'h', c32 # now try branching (think tokens) at line 1 s10 = s_stream(s2, l1) (l1, s20) = s_line(s10, False) assert l1 == 'first line', l1 assert not s_empty(s20) (c1, s11) = s_next(s10) assert c1 == 'f', c1 d = s_debug(s11) assert d == "1:'i'", d # finally look at max depth (which was after 'h' in third line) m = s_deepest(s1) locn = s_fmt(m, '{location}: {rest}') assert locn == "line 3, character 3: 'ird line'", locn