Exemplo n.º 1
0
def decode(b, errors='strict'):
    u, length = utf_8.decode(b, errors)
    tokens = tokenize_rt.src_to_tokens(u)
    new_tokens = []
    for token in tokens:
        if token.name == 'NUMBER':
            new_tokens.extend(
                tokenize_rt.src_to_tokens("blurse({})".format(token.src)))
        else:
            new_tokens.append(token)
    return tokenize_rt.tokens_to_src(new_tokens), length
Exemplo n.º 2
0
def _upgrade(source: str) -> str:
    ast_obj = _ast_parse(source)
    visitor = _FindAssignment()
    visitor.visit(ast_obj)
    tokens = src_to_tokens(source)
    _mutate_found(tokens, visitor)
    return tokens_to_src(tokens)
Exemplo n.º 3
0
def _fix_six(contents_text):
    try:
        ast_obj = ast_parse(contents_text)
    except SyntaxError:
        return contents_text

    visitor = FindSixUsage()
    visitor.visit(ast_obj)

    tokens = src_to_tokens(contents_text)
    for i, token in reversed_enumerate(tokens):
        if token.offset in visitor.simple_names:
            node = visitor.simple_names[token.offset]
            tokens[i] = Token('CODE', SIX_SIMPLE_ATTRS[node.id])
        elif token.offset in visitor.simple_attrs:
            node = visitor.simple_attrs[token.offset]
            if tokens[i + 1].src == '.' and tokens[i + 2].src == node.attr:
                tokens[i:i + 3] = [Token('CODE', SIX_SIMPLE_ATTRS[node.attr])]
        elif token.offset in visitor.remove_decorators:
            if tokens[i - 1].src == '@':
                end = i + 1
                while tokens[end].name != 'NEWLINE':
                    end += 1
                del tokens[i - 1:end + 1]

    return tokens_to_src(tokens)
Exemplo n.º 4
0
def _has_trailing_semicolon(src: str) -> Tuple[str, bool]:
    """
    Check if cell has trailing semicolon.

    Parameters
    ----------
    src
        Notebook cell source.

    Returns
    -------
    bool
        Whether notebook has trailing semicolon.
    """
    tokens = tokenize_rt.src_to_tokens(src)
    trailing_semicolon = False
    for idx, token in tokenize_rt.reversed_enumerate(tokens):
        if not token.src.strip(" \n") or token.name == "COMMENT":
            continue
        if token.name == "OP" and token.src == ";":
            tokens[idx] = token._replace(src="")
            trailing_semicolon = True
        break
    if not trailing_semicolon:
        return src, False
    return tokenize_rt.tokens_to_src(tokens), True
Exemplo n.º 5
0
def test_src_to_tokens_octal_literal_normalization():
    ret = src_to_tokens('0755\n')
    assert ret == [
        Token('NUMBER', '0755', line=1, utf8_byte_offset=0),
        Token('NEWLINE', '\n', line=1, utf8_byte_offset=4),
        Token('ENDMARKER', '', line=2, utf8_byte_offset=0),
    ]
def decode(b, errors='strict'):
    import tokenize_rt  # pip install future-fstrings[rewrite]

    u, length = utf_8.decode(b, errors)
    tokens = tokenize_rt.src_to_tokens(u)

    to_replace = []
    start = end = seen_f = None

    for i, token in enumerate(tokens):
        if start is None:
            if token.name == 'STRING':
                start, end = i, i + 1
                seen_f = _is_f(token)
        elif token.name == 'STRING':
            end = i + 1
            seen_f |= _is_f(token)
        elif token.name not in tokenize_rt.NON_CODING_TOKENS:
            if seen_f:
                to_replace.append((start, end))
            start = end = seen_f = None

    for start, end in reversed(to_replace):
        try:
            tokens[start:end] = _make_fstring(tokens[start:end])
        except TokenSyntaxError as e:
            msg = str(e.e)
            line = u.splitlines()[e.token.line - 1]
            bts = line.encode('UTF-8')[:e.token.utf8_byte_offset]
            indent = len(bts.decode('UTF-8'))
            raise SyntaxError(msg + '\n\n' + line + '\n' + ' ' * indent + '^')
    return tokenize_rt.tokens_to_src(tokens), length
Exemplo n.º 7
0
def _fix_src(contents_text: str, min_version: Tuple[int, ...]) -> str:
    try:
        ast_obj = ast_parse(contents_text)
    except SyntaxError:
        return contents_text

    callbacks = visit(FUNCS, ast_obj, min_version)

    tokens = src_to_tokens(contents_text)
    for i, token in _changing_list(tokens):
        # DEDENT is a zero length token
        if not token.src:
            continue

        # though this is a defaultdict, by using `.get()` this function's
        # self time is almost 50% faster
        for callback in callbacks.get(token.offset, ()):
            callback(i, tokens)

        if token.src in START_BRACES:
            fix_brace(
                tokens,
                find_simple(i, tokens),
                add_comma=False,
                remove_comma=False,
            )

    return tokens_to_src(tokens)
Exemplo n.º 8
0
def _fix_format_literals(contents_text):
    tokens = src_to_tokens(contents_text)

    to_replace = []
    string_start = None
    string_end = None
    seen_dot = False

    for i, token in enumerate(tokens):
        if string_start is None and token.name == 'STRING':
            string_start = i
            string_end = i + 1
        elif string_start is not None and token.name == 'STRING':
            string_end = i + 1
        elif string_start is not None and token.src == '.':
            seen_dot = True
        elif seen_dot and token.src == 'format':
            to_replace.append((string_start, string_end))
            string_start, string_end, seen_dot = None, None, False
        elif token.name not in NON_CODING_TOKENS:
            string_start, string_end, seen_dot = None, None, False

    for start, end in reversed(to_replace):
        src = tokens_to_src(tokens[start:end])
        new_src = _rewrite_string_literal(src)
        tokens[start:end] = [Token('STRING', new_src)]

    return tokens_to_src(tokens)
Exemplo n.º 9
0
def _fix_fstrings(contents_text):
    try:
        ast_obj = ast_parse(contents_text)
    except SyntaxError:
        return contents_text

    visitor = FindSimpleFormats()
    visitor.visit(ast_obj)

    tokens = src_to_tokens(contents_text)
    for i, token in reversed(tuple(enumerate(tokens))):
        node = visitor.found.get(Offset(token.line, token.utf8_byte_offset))
        if node is None:
            continue

        if _is_bytestring(token.src):  # pragma: no cover (py2-only)
            continue

        paren = i + 3
        if tokens_to_src(tokens[i + 1:paren + 1]) != '.format(':
            continue

        # we don't actually care about arg position, so we pass `node`
        victims = _victims(tokens, paren, node, gen=False)
        end = victims.ends[-1]
        # if it spans more than one line, bail
        if tokens[end].line != token.line:
            continue

        tokens[i] = token._replace(src=_to_fstring(token.src, node))
        del tokens[i + 1:end + 1]

    return tokens_to_src(tokens)
Exemplo n.º 10
0
def _fix_percent_format(contents_text):
    try:
        ast_obj = ast_parse(contents_text)
    except SyntaxError:
        return contents_text

    visitor = FindPercentFormats()
    visitor.visit(ast_obj)

    if not visitor.found:
        return contents_text

    tokens = src_to_tokens(contents_text)

    for i, token in reversed_enumerate(tokens):
        node = visitor.found.get(token.offset)
        if node is None:
            continue

        # no .format() equivalent for bytestrings in py3
        # note that this code is only necessary when running in python2
        if _is_bytestring(tokens[i].src):  # pragma: no cover (py2-only)
            continue

        if isinstance(node.right, ast.Tuple):
            _fix_percent_format_tuple(tokens, i, node)
        elif isinstance(node.right, ast.Dict):
            _fix_percent_format_dict(tokens, i, node)

    return tokens_to_src(tokens)
Exemplo n.º 11
0
def _fix_py2_compatible(contents_text):
    try:
        ast_obj = ast_parse(contents_text)
    except SyntaxError:
        return contents_text
    visitor = Py2CompatibleVisitor()
    visitor.visit(ast_obj)
    if not any((
            visitor.dicts,
            visitor.sets,
            visitor.set_empty_literals,
            visitor.is_literal,
    )):
        return contents_text

    tokens = src_to_tokens(contents_text)
    for i, token in reversed_enumerate(tokens):
        if token.offset in visitor.dicts:
            _process_dict_comp(tokens, i, visitor.dicts[token.offset])
        elif token.offset in visitor.set_empty_literals:
            _process_set_empty_literal(tokens, i)
        elif token.offset in visitor.sets:
            _process_set_literal(tokens, i, visitor.sets[token.offset])
        elif token.offset in visitor.is_literal:
            _process_is_literal(tokens, i, visitor.is_literal[token.offset])
    return tokens_to_src(tokens)
Exemplo n.º 12
0
def _fix_plugins(contents_text: str, settings: Settings) -> str:
    try:
        ast_obj = ast_parse(contents_text)
    except SyntaxError:
        return contents_text

    callbacks = visit(FUNCS, ast_obj, settings)

    if not callbacks:
        return contents_text

    try:
        tokens = src_to_tokens(contents_text)
    except tokenize.TokenError:  # pragma: no cover (bpo-2180)
        return contents_text

    _fixup_dedent_tokens(tokens)

    for i, token in reversed_enumerate(tokens):
        if not token.src:
            continue
        # though this is a defaultdict, by using `.get()` this function's
        # self time is almost 50% faster
        for callback in callbacks.get(token.offset, ()):
            callback(i, tokens)

    return tokens_to_src(tokens)
Exemplo n.º 13
0
def remove_trailing_semicolon(src: str) -> Tuple[str, bool]:
    """Remove trailing semicolon from Jupyter notebook cell.

    For example,

        fig, ax = plt.subplots()
        ax.plot(x_data, y_data);  # plot data

    would become

        fig, ax = plt.subplots()
        ax.plot(x_data, y_data)  # plot data

    Mirrors the logic in `quiet` from `IPython.core.displayhook`, but uses
    ``tokenize_rt`` so that round-tripping works fine.
    """
    from tokenize_rt import (
        src_to_tokens,
        tokens_to_src,
        reversed_enumerate,
    )

    tokens = src_to_tokens(src)
    trailing_semicolon = False
    for idx, token in reversed_enumerate(tokens):
        if token.name in TOKENS_TO_IGNORE:
            continue
        if token.name == "OP" and token.src == ";":
            del tokens[idx]
            trailing_semicolon = True
        break
    if not trailing_semicolon:
        return src, False
    return tokens_to_src(tokens), True
Exemplo n.º 14
0
def fix_file(filename: str, show_diff: bool = False, dry_run: bool = False) -> int:
    with open(filename, 'rb') as f:
        contents_bytes = f.read()

    try:
        contents_text = contents_bytes.decode()
    except UnicodeDecodeError:
        print(f'{filename} is non-utf8 (not supported)')
        return 1

    tokens = tokenize_rt.src_to_tokens(contents_text)

    tokens_no_comments = _remove_comments(tokens)
    src_no_comments = tokenize_rt.tokens_to_src(tokens_no_comments)

    if src_no_comments == contents_text:
        return 0

    with tempfile.NamedTemporaryFile(
        dir=os.path.dirname(filename),
        prefix=os.path.basename(filename),
        suffix='.py',
    ) as tmpfile:
        tmpfile.write(src_no_comments.encode())
        tmpfile.flush()
        flake8_results = _run_flake8(tmpfile.name)

    if any('E999' in v for v in flake8_results.values()):
        print(f'{filename}: syntax error (skipping)')
        return 0

    for i, token in tokenize_rt.reversed_enumerate(tokens):
        if token.name != 'COMMENT':
            continue

        if NOQA_RE.search(token.src):
            _rewrite_noqa_comment(tokens, i, flake8_results)
        elif NOQA_FILE_RE.match(token.src) and not flake8_results:
            if i == 0 or tokens[i - 1].name == 'NEWLINE':
                del tokens[i: i + 2]
            else:
                _remove_comment(tokens, i)

    newsrc = tokenize_rt.tokens_to_src(tokens)
    if newsrc != contents_text:
        if (show_diff or dry_run):
            diff = difflib.unified_diff(
                contents_text.splitlines(keepends=True),
                newsrc.splitlines(keepends=True),
                fromfile=filename,
                tofile=filename,
            )
            print(''.join(diff), end='')
        if (not dry_run):
            print(f'Rewriting {filename}')
            with open(filename, 'wb') as f:
                f.write(newsrc.encode())
        return 1
    else:
        return 0
Exemplo n.º 15
0
def _fix_tokens(contents_text: str, min_version: Version) -> str:
    remove_u = (min_version >= (3, )
                or _imports_future(contents_text, 'unicode_literals'))

    try:
        tokens = src_to_tokens(contents_text)
    except tokenize.TokenError:
        return contents_text
    for i, token in reversed_enumerate(tokens):
        if token.name == 'NUMBER':
            tokens[i] = token._replace(src=_fix_long(_fix_octal(token.src)))
        elif token.name == 'STRING':
            tokens[i] = _fix_ur_literals(tokens[i])
            if remove_u:
                tokens[i] = _remove_u_prefix(tokens[i])
            tokens[i] = _fix_escape_sequences(tokens[i])
        elif token.src == '(':
            _fix_extraneous_parens(tokens, i)
        elif token.src == 'format' and i > 0 and tokens[i - 1].src == '.':
            _fix_format_literal(tokens, i - 2)
        elif token.src == 'encode' and i > 0 and tokens[i - 1].src == '.':
            _fix_encode_to_binary(tokens, i)
        elif (min_version >= (3, ) and token.utf8_byte_offset == 0
              and token.line < 3 and token.name == 'COMMENT'
              and tokenize.cookie_re.match(token.src)):
            del tokens[i]
            assert tokens[i].name == 'NL', tokens[i].name
            del tokens[i]
        elif token.src == 'from' and token.utf8_byte_offset == 0:
            _fix_import_removals(tokens, i, min_version)
    return tokens_to_src(tokens).lstrip()
Exemplo n.º 16
0
def test_src_to_tokens_string_prefix_normalization(prefix):
    src = f"{prefix}'foo'\n"
    ret = src_to_tokens(src)
    assert ret == [
        Token('STRING', f"{prefix}'foo'", line=1, utf8_byte_offset=0),
        Token('NEWLINE', '\n', line=1, utf8_byte_offset=5 + len(prefix)),
        Token('ENDMARKER', '', line=2, utf8_byte_offset=0),
    ]
Exemplo n.º 17
0
def test_src_to_tokens_long_literal_normalization(postfix):
    src = f'123{postfix}\n'
    ret = src_to_tokens(src)
    assert ret == [
        Token('NUMBER', f'123{postfix}', line=1, utf8_byte_offset=0),
        Token('NEWLINE', '\n', line=1, utf8_byte_offset=4),
        Token('ENDMARKER', '', line=2, utf8_byte_offset=0),
    ]
Exemplo n.º 18
0
Arquivo: t.py Projeto: asottile/t
def _fix_calls(contents_text: str) -> str:
    try:
        ast_obj = ast_parse(contents_text)
    except SyntaxError:
        return contents_text

    visitor = Visitor()
    visitor.visit(ast_obj)

    if not visitor.calls:
        return contents_text

    try:
        tokens = src_to_tokens(contents_text)
    except tokenize.TokenError:  # pragma: no cover (bpo-2180)
        return contents_text

    for i, token in reversed_enumerate(tokens):
        if token.offset in visitor.calls:
            visitor.calls.discard(token.offset)

            # search forward for the opening brace
            while tokens[i].src != '(':
                i += 1

            call_start = i
            i += 1
            brace_depth = 1
            start = -1
            end = -1

            while brace_depth:
                if tokens[i].src in {'(', '{', '['}:
                    if brace_depth == 1:
                        start = i
                    brace_depth += 1
                elif tokens[i].src in {')', '}', ']'}:
                    brace_depth -= 1
                    if brace_depth == 1:
                        end = i
                i += 1

            assert start != -1
            assert end != -1
            call_end = i - 1

            # dedent everything inside the brackets
            for i in range(call_start, call_end):
                if (tokens[i - 1].name == 'NL'
                        and tokens[i].name == UNIMPORTANT_WS):
                    tokens[i] = tokens[i]._replace(src=tokens[i].src[4:])

            del tokens[end + 1:call_end]
            del tokens[call_start + 1:start]

    return tokens_to_src(tokens)
Exemplo n.º 19
0
def decode(b, errors="strict"):

    non_coding_tokens = frozenset(
        ("COMMENT", tokenize_rt.ESCAPED_NL, "NL", tokenize_rt.UNIMPORTANT_WS))

    u, length = utf_8.decode(b, errors)
    tokens = tokenize_rt.src_to_tokens(u)

    to_replace = []

    started = -1
    end = -1

    for i in range(0, 1 + len(tokens)):
        if i < len(tokens):
            token = tokens[i]
        else:
            token = None

        if token:
            if fstr(token):
                if started < 0:
                    started = i
                continue
            end = i

        if started >= 0:
            if peek_is_fstr(tokens, i + 1):
                continue
            if peek_is_str(tokens, i + 1):
                continue

            if token is None:
                pass
            elif token.name in non_coding_tokens or peek_is_str(tokens, i):
                #multiline f-string+str
                continue

            to_replace.append((started, end))
            started = -1

    for start, end in reversed(to_replace):
        if end - start > 1:
            #move ending line away from format of multiline fstrings
            if tokens[end - 1].name in non_coding_tokens:
                end -= 1
        try:
            tokens[start:end] = _make_fstring(tokens[start:end])
        except TokenSyntaxError as e:
            msg = str(e.e)
            line = u.splitlines()[e.token.line - 1]
            bts = line.encode("UTF-8")[:e.token.utf8_byte_offset]
            indent = len(bts.decode("UTF-8"))
            raise SyntaxError(msg + "\n\n" + line + "\n" + " " * indent + "^")
    return tokenize_rt.tokens_to_src(tokens), length
Exemplo n.º 20
0
def fix_file(filename: str) -> int:
    with open(filename, 'rb') as f:
        contents_bytes = f.read()

    try:
        contents_text = contents_bytes.decode()
    except UnicodeDecodeError:
        print(f'{filename} is non-utf8 (not supported)')
        return 1

    tokens = tokenize_rt.src_to_tokens(contents_text)

    tokens_no_comments = _remove_comments(tokens)
    src_no_comments = tokenize_rt.tokens_to_src(tokens_no_comments)

    if src_no_comments == contents_text:
        return 0

    fd, path = tempfile.mkstemp(
        dir=os.path.dirname(filename),
        prefix=os.path.basename(filename),
        suffix='.py',
    )
    try:
        with open(fd, 'wb') as f:
            f.write(src_no_comments.encode())
        flake8_results = _run_flake8(path)
    finally:
        os.remove(path)

    if any('E999' in v for v in flake8_results.values()):
        print(f'{filename}: syntax error (skipping)')
        return 0

    for i, token in tokenize_rt.reversed_enumerate(tokens):
        if token.name != 'COMMENT':
            continue

        if NOQA_RE.search(token.src):
            _rewrite_noqa_comment(tokens, i, flake8_results)
        elif NOQA_FILE_RE.match(token.src) and not flake8_results:
            if i == 0 or tokens[i - 1].name == 'NEWLINE':
                del tokens[i:i + 2]
            else:
                _remove_comment(tokens, i)

    newsrc = tokenize_rt.tokens_to_src(tokens)
    if newsrc != contents_text:
        print(f'Rewriting {filename}')
        with open(filename, 'wb') as f:
            f.write(newsrc.encode())
        return 1
    else:
        return 0
Exemplo n.º 21
0
def test_src_to_tokens_simple():
    src = 'x = 5\n'
    ret = src_to_tokens(src)
    assert ret == [
        Token('NAME', 'x', line=1, utf8_byte_offset=0),
        Token(UNIMPORTANT_WS, ' ', line=None, utf8_byte_offset=None),
        Token('OP', '=', line=1, utf8_byte_offset=2),
        Token(UNIMPORTANT_WS, ' ', line=None, utf8_byte_offset=None),
        Token('NUMBER', '5', line=1, utf8_byte_offset=4),
        Token('NEWLINE', '\n', line=1, utf8_byte_offset=5),
        Token('ENDMARKER', '', line=2, utf8_byte_offset=0),
    ]
Exemplo n.º 22
0
def _fix_octal_literals(contents_text):
    def _fix_octal(s):
        if not s.startswith('0') or not s.isdigit() or s == len(s) * '0':
            return s
        else:  # pragma: no cover (py2 only)
            return '0o' + s[1:]

    tokens = src_to_tokens(contents_text)
    for i, token in enumerate(tokens):
        if token.name == 'NUMBER':
            tokens[i] = token._replace(src=_fix_octal(token.src))
    return tokens_to_src(tokens)
Exemplo n.º 23
0
def test_src_to_tokens_escaped_nl_no_left_ws():
    src = ('x =\\\n' '    5\n')
    ret = src_to_tokens(src)
    assert ret == [
        Token('NAME', 'x', line=1, utf8_byte_offset=0),
        Token(UNIMPORTANT_WS, ' ', line=None, utf8_byte_offset=None),
        Token('OP', '=', line=1, utf8_byte_offset=2),
        Token(ESCAPED_NL, '\\\n', line=None, utf8_byte_offset=None),
        Token(UNIMPORTANT_WS, '    ', line=None, utf8_byte_offset=None),
        Token('NUMBER', '5', line=2, utf8_byte_offset=4),
        Token('NEWLINE', '\n', line=2, utf8_byte_offset=5),
        Token('ENDMARKER', '', line=3, utf8_byte_offset=0),
    ]
Exemplo n.º 24
0
def _fix_unicode_literals(contents_text, py3_plus):
    if not py3_plus and not _imports_unicode_literals(contents_text):
        return contents_text
    tokens = src_to_tokens(contents_text)
    for i, token in enumerate(tokens):
        if token.name != 'STRING':
            continue

        match = STRING_PREFIXES_RE.match(token.src)
        prefix = match.group(1)
        rest = match.group(2)
        new_prefix = prefix.replace('u', '').replace('U', '')
        tokens[i] = Token('STRING', new_prefix + rest)
    return tokens_to_src(tokens)
Exemplo n.º 25
0
def test_reversed_enumerate():
    tokens = src_to_tokens('x = 5\n')
    ret = reversed_enumerate(tokens)
    assert next(ret) == (6, Token('ENDMARKER', '', line=2, utf8_byte_offset=0))

    rest = list(ret)
    assert rest == [
        (5, Token(name='NEWLINE', src='\n', line=1, utf8_byte_offset=5)),
        (4, Token('NUMBER', '5', line=1, utf8_byte_offset=4)),
        (3, Token(UNIMPORTANT_WS, ' ')),
        (2, Token('OP', '=', line=1, utf8_byte_offset=2)),
        (1, Token(UNIMPORTANT_WS, ' ')),
        (0, Token('NAME', 'x', line=1, utf8_byte_offset=0)),
    ]
Exemplo n.º 26
0
def _fix_dictcomps(contents_text):
    try:
        ast_obj = ast_parse(contents_text)
    except SyntaxError:
        return contents_text
    visitor = FindDictsVisitor()
    visitor.visit(ast_obj)
    if not visitor.dicts:
        return contents_text

    tokens = src_to_tokens(contents_text)
    for i, token in reversed_enumerate(tokens):
        if token.offset in visitor.dicts:
            _process_dict_comp(tokens, i, visitor.dicts[token.offset])
    return tokens_to_src(tokens)
Exemplo n.º 27
0
def _fix_dictcomps(contents_text):
    try:
        ast_obj = ast_parse(contents_text)
    except SyntaxError:
        return contents_text
    visitor = FindDictsVisitor()
    visitor.visit(ast_obj)
    if not visitor.dicts:
        return contents_text

    tokens = src_to_tokens(contents_text)
    for i, token in reversed(tuple(enumerate(tokens))):
        key = (token.line, token.utf8_byte_offset)
        if key in visitor.dicts:
            _process_dict_comp(tokens, i, visitor.dicts[key])
    return tokens_to_src(tokens)
Exemplo n.º 28
0
def _fix_sets(contents_text):
    try:
        ast_obj = ast_parse(contents_text)
    except SyntaxError:
        return contents_text
    visitor = FindSetsVisitor()
    visitor.visit(ast_obj)
    if not visitor.sets and not visitor.set_empty_literals:
        return contents_text

    tokens = src_to_tokens(contents_text)
    for i, token in reversed_enumerate(tokens):
        if token.offset in visitor.set_empty_literals:
            _process_set_empty_literal(tokens, i)
        elif token.offset in visitor.sets:
            _process_set_literal(tokens, i, visitor.sets[token.offset])
    return tokens_to_src(tokens)
Exemplo n.º 29
0
def _fix_src(contents_text, py35_plus, py36_plus):
    try:
        ast_obj = ast_parse(contents_text)
    except SyntaxError:
        return contents_text

    visitor = FindNodes()
    visitor.visit(ast_obj)

    tokens = src_to_tokens(contents_text)
    for i, token in _changing_list(tokens):
        # DEDENT is a zero length token
        if not token.src:
            continue
        key = Offset(token.line, token.utf8_byte_offset)

        fixes = []
        if key in visitor.calls:
            for call in visitor.calls[key]:
                # Only fix stararg calls if asked to
                add_comma = not call.star_args or py35_plus
                fixes.append((add_comma, _find_call(call, i, tokens)))
        elif key in visitor.funcs:
            func = visitor.funcs[key]
            add_comma = not func.star_args or py36_plus
            # functions can be treated as calls
            fixes.append((add_comma, _find_call(func, i, tokens)))
        elif key in visitor.literals:
            fixes.append((True, _find_simple(i, tokens)))
        # Handle parenthesized things, unhug of tuples, and comprehensions
        elif token.src in START_BRACES:
            fixes.append((False, _find_simple(i, tokens)))

        for add_comma, fix_data in fixes:
            if fix_data is not None:
                _fix_brace(fix_data, add_comma, tokens)

        # need to handle tuples afterwards as tuples report their starting
        # starting index as the first element, which may be one of the above
        # things.
        if key in visitor.tuples:
            fix_data = _find_tuple(i, tokens)
            if fix_data is not None:
                _fix_brace(fix_data, True, tokens)

    return tokens_to_src(tokens)
Exemplo n.º 30
0
def _fix_escape_sequences(contents_text):
    last_name = None
    tokens = src_to_tokens(contents_text)
    for i, token in enumerate(tokens):
        if token.name == 'NAME':
            last_name = token
            continue
        elif token.name != 'STRING':
            last_name = None
            continue

        match = STRING_PREFIXES_RE.match(token.src)
        prefix = match.group(1)
        rest = match.group(2)

        if last_name is not None:  # pragma: no cover (py2 bug)
            actual_prefix = (last_name.src + prefix).lower()
        else:  # pragma: no cover (py3 only)
            actual_prefix = prefix.lower()

        if 'r' in actual_prefix or '\\' not in rest:
            continue

        if 'b' in actual_prefix:
            valid_escapes = ESCAPE_STARTS_BYTES
        else:
            valid_escapes = ESCAPE_STARTS

        escape_sequences = {m[1] for m in ESCAPE_RE.findall(rest)}
        has_valid_escapes = escape_sequences & valid_escapes
        has_invalid_escapes = escape_sequences - valid_escapes

        def cb(match):
            matched = match.group()
            if matched[1] in valid_escapes:
                return matched
            else:
                return r'\{}'.format(matched)

        if has_invalid_escapes and (has_valid_escapes or 'u' in actual_prefix):
            tokens[i] = token._replace(src=prefix + ESCAPE_RE.sub(cb, rest))
        elif has_invalid_escapes and not has_valid_escapes:
            tokens[i] = token._replace(src=prefix + 'r' + rest)

    return tokens_to_src(tokens)