def get_tokens_unprocessed(self, text): row_tokenizer = RowTokenizer() var_tokenizer = VariableTokenizer() index = 0 for row in text.splitlines(): for value, token in row_tokenizer.tokenize(row): for value, token in var_tokenizer.tokenize(value, token): if value: yield index, token, text_type(value) index += len(value)