def test_tokenize_multiline_II(): # Make sure multiline string having no newlines have the end marker on # same line fundef = '''""""''' token_list = _get_token_list(fundef) assert token_list == [PythonToken(ERRORTOKEN, '""""', (1, 0), ''), PythonToken(ENDMARKER, '', (1, 4), '')]
def test_tokenize_multiline_III(): # Make sure multiline string having newlines have the end marker on the # next line even if several newline fundef = '''""""\n\n''' token_list = _get_token_list(fundef) assert token_list == [PythonToken(ERRORTOKEN, '""""\n\n', (1, 0), ''), PythonToken(ENDMARKER, '', (3, 0), '')]
def _diff_tokenize(self, lines, until_line, line_offset=0): was_newline = False indents = self._nodes_tree.indents initial_indentation_count = len(indents) tokens = self._tokenizer( lines, start_pos=(line_offset + 1, 0), indents=indents, is_first_token=line_offset == 0, ) stack = self._active_parser.stack self._replace_tos_indent = None self._keyword_token_indents = {} # print('start', line_offset + 1, indents) for token in tokens: # print(token, indents) typ = token.type if typ == DEDENT: if len(indents) < initial_indentation_count: # We are done here, only thing that can come now is an # endmarker or another dedented code block. while True: typ, string, start_pos, prefix = token = next(tokens) if typ in (DEDENT, ERROR_DEDENT): if typ == ERROR_DEDENT: # We want to force an error dedent in the next # parser/pass. To make this possible we just # increase the location by one. self._replace_tos_indent = start_pos[1] + 1 pass else: break if '\n' in prefix or '\r' in prefix: prefix = re.sub(r'[^\n\r]+\Z', '', prefix) else: assert start_pos[1] >= len(prefix), repr(prefix) if start_pos[1] - len(prefix) == 0: prefix = '' yield PythonToken( ENDMARKER, '', start_pos, prefix ) break elif typ == NEWLINE and token.start_pos[0] >= until_line: was_newline = True elif was_newline: was_newline = False if len(indents) == initial_indentation_count: # Check if the parser is actually in a valid suite state. if _suite_or_file_input_is_valid(self._pgen_grammar, stack): yield PythonToken(ENDMARKER, '', token.start_pos, '') break if typ == NAME and token.string in ('class', 'def'): self._keyword_token_indents[token.start_pos] = list(indents) yield token
def _diff_tokenize(self, lines, until_line, line_offset=0): is_first_token = True omitted_first_indent = False indents = [] tokens = self._tokenizer(lines, (1, 0)) stack = self._active_parser.stack for typ, string, start_pos, prefix in tokens: start_pos = start_pos[0] + line_offset, start_pos[1] if typ == PythonTokenTypes.INDENT: indents.append(start_pos[1]) if is_first_token: omitted_first_indent = True # We want to get rid of indents that are only here because # we only parse part of the file. These indents would only # get parsed as error leafs, which doesn't make any sense. is_first_token = False continue is_first_token = False # In case of omitted_first_indent, it might not be dedented fully. # However this is a sign for us that a dedent happened. if typ == PythonTokenTypes.DEDENT \ or typ == PythonTokenTypes.ERROR_DEDENT \ and omitted_first_indent and len(indents) == 1: indents.pop() if omitted_first_indent and not indents: # We are done here, only thing that can come now is an # endmarker or another dedented code block. typ, string, start_pos, prefix = next(tokens) if '\n' in prefix or '\r' in prefix: prefix = re.sub(r'[^\n\r]+\Z', '', prefix) else: assert start_pos[1] >= len(prefix), repr(prefix) if start_pos[1] - len(prefix) == 0: prefix = '' yield PythonToken( PythonTokenTypes.ENDMARKER, '', (start_pos[0] + line_offset, 0), prefix ) break elif typ == PythonTokenTypes.NEWLINE and start_pos[0] >= until_line: yield PythonToken(typ, string, start_pos, prefix) # Check if the parser is actually in a valid suite state. if _suite_or_file_input_is_valid(self._pgen_grammar, stack): start_pos = start_pos[0] + 1, 0 while len(indents) > int(omitted_first_indent): indents.pop() yield PythonToken(PythonTokenTypes.DEDENT, '', start_pos, '') yield PythonToken(PythonTokenTypes.ENDMARKER, '', start_pos, '') break else: continue yield PythonToken(typ, string, start_pos, prefix)
def tokenize_without_endmarker(code): safeword = 'ZZZ_USER_WANTS_TO_COMPLETE_HERE' for token in tokenize(code + safeword, (2, 7)): if token.string == safeword: return elif token.string.endswith(safeword): yield PythonToken(token.type, token.string[:-len(safeword)], token.start_pos, token.prefix) return else: yield token
def tokenize_without_endmarker(code): safeword = 'ZZZ_USER_WANTS_TO_COMPLETE_HERE' grammar = load_grammar() tokens = grammar._tokenize(code + safeword) for token_ in tokens: if token_.string == safeword: return elif token_.string.endswith(safeword): yield PythonToken(token_.type, token_.string[:-len(safeword)], token_.start_pos, token_.prefix) return else: yield token_