def normalize_invisible_parens(node: Node, parens_after: Set[str], *, preview: bool) -> None: """Make existing optional parentheses invisible or create new ones. `parens_after` is a set of string leaf values immediately after which parens should be put. Standardizes on visible parentheses for single-element tuples, and keeps existing visible parentheses for other tuples and generator expressions. """ for pc in list_comments(node.prefix, is_endmarker=False, preview=preview): if pc.value in FMT_OFF: # This `node` has a prefix with `# fmt: off`, don't mess with parens. return check_lpar = False for index, child in enumerate(list(node.children)): # Fixes a bug where invisible parens are not properly stripped from # assignment statements that contain type annotations. if isinstance(child, Node) and child.type == syms.annassign: normalize_invisible_parens(child, parens_after=parens_after, preview=preview) # Add parentheses around long tuple unpacking in assignments. if (index == 0 and isinstance(child, Node) and child.type == syms.testlist_star_expr): check_lpar = True if check_lpar: if child.type == syms.atom: if maybe_make_parens_invisible_in_atom( child, parent=node, preview=preview, ): wrap_in_parentheses(node, child, visible=False) elif is_one_tuple(child): wrap_in_parentheses(node, child, visible=True) elif node.type == syms.import_from: # "import from" nodes store parentheses directly as part of # the statement if is_lpar_token(child): assert is_rpar_token(node.children[-1]) # make parentheses invisible child.value = "" node.children[-1].value = "" elif child.type != token.STAR: # insert invisible parentheses node.insert_child(index, Leaf(token.LPAR, "")) node.append_child(Leaf(token.RPAR, "")) break elif not (isinstance(child, Leaf) and is_multiline_string(child)): wrap_in_parentheses(node, child, visible=False) check_lpar = isinstance(child, Leaf) and child.value in parens_after
def visit_factor(self, node: Node) -> Iterator[Line]: """Force parentheses between a unary op and a binary power: -2 ** 8 -> -(2 ** 8) """ _operator, operand = node.children if (operand.type == syms.power and len(operand.children) == 3 and operand.children[1].type == token.DOUBLESTAR): lpar = Leaf(token.LPAR, "(") rpar = Leaf(token.RPAR, ")") index = operand.remove() or 0 node.insert_child(index, Node(syms.atom, [lpar, operand, rpar])) yield from self.visit_default(node)
def lib2to3_parse( src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node: """Given a string with source, return the lib2to3 Node.""" if not src_txt.endswith("\n"): src_txt += "\n" for grammar in get_grammars(set(target_versions)): drv = driver.Driver(grammar, pytree.convert) try: result = drv.parse_string(src_txt, True) break except ParseError as pe: lineno, column = pe.context[1] lines = src_txt.splitlines() try: faulty_line = lines[lineno - 1] except IndexError: faulty_line = "<line number missing in source>" exc = InvalidInput( f"Cannot parse: {lineno}:{column}: {faulty_line}") else: raise exc from None if isinstance(result, Leaf): result = Node(syms.file_input, [result]) return result
def wrap_in_parentheses(parent: Node, child: LN, *, visible: bool = True) -> None: """Wrap `child` in parentheses. This replaces `child` with an atom holding the parentheses and the old child. That requires moving the prefix. If `visible` is False, the leaves will be valueless (and thus invisible). """ lpar = Leaf(token.LPAR, "(" if visible else "") rpar = Leaf(token.RPAR, ")" if visible else "") prefix = child.prefix child.prefix = "" index = child.remove() or 0 new_child = Node(syms.atom, [lpar, child, rpar]) new_child.prefix = prefix parent.insert_child(index, new_child)
def convert_one_fmt_off_pair(node: Node) -> bool: """Convert content of a single `# fmt: off`/`# fmt: on` into a standalone comment. Returns True if a pair was converted. """ for leaf in node.leaves(): previous_consumed = 0 for comment in list_comments(leaf.prefix, is_endmarker=False): if comment.value not in FMT_PASS: previous_consumed = comment.consumed continue # We only want standalone comments. If there's no previous leaf or # the previous leaf is indentation, it's a standalone comment in # disguise. if comment.value in FMT_PASS and comment.type != STANDALONE_COMMENT: prev = preceding_leaf(leaf) if prev: if comment.value in FMT_OFF and prev.type not in WHITESPACE: continue if comment.value in FMT_SKIP and prev.type in WHITESPACE: continue ignored_nodes = list(generate_ignored_nodes(leaf, comment)) if not ignored_nodes: continue first = ignored_nodes[ 0] # Can be a container node with the `leaf`. parent = first.parent prefix = first.prefix first.prefix = prefix[comment.consumed:] hidden_value = "".join(str(n) for n in ignored_nodes) if comment.value in FMT_OFF: hidden_value = comment.value + "\n" + hidden_value if comment.value in FMT_SKIP: hidden_value += " " + comment.value if hidden_value.endswith("\n"): # That happens when one of the `ignored_nodes` ended with a NEWLINE # leaf (possibly followed by a DEDENT). hidden_value = hidden_value[:-1] first_idx: Optional[int] = None for ignored in ignored_nodes: index = ignored.remove() if first_idx is None: first_idx = index assert parent is not None, "INTERNAL ERROR: fmt: on/off handling (1)" assert first_idx is not None, "INTERNAL ERROR: fmt: on/off handling (2)" parent.insert_child( first_idx, Leaf( STANDALONE_COMMENT, hidden_value, prefix=prefix[:previous_consumed] + "\n" * comment.newlines, ), ) return True return False
def get_features_used(node: Node) -> Set[Feature]: """Return a set of (relatively) new Python features used in this file. Currently looking for: - f-strings; - underscores in numeric literals; - trailing commas after * or ** in function signatures and calls; - positional only arguments in function signatures and lambdas; - assignment expression; - relaxed decorator syntax; """ features: Set[Feature] = set() for n in node.pre_order(): if n.type == token.STRING: value_head = n.value[:2] # type: ignore if value_head in {'f"', 'F"', "f'", "F'", "rf", "fr", "RF", "FR"}: features.add(Feature.F_STRINGS) elif n.type == token.NUMBER: if "_" in n.value: # type: ignore features.add(Feature.NUMERIC_UNDERSCORES) elif n.type == token.SLASH: if n.parent and n.parent.type in {syms.typedargslist, syms.arglist}: features.add(Feature.POS_ONLY_ARGUMENTS) elif n.type == token.COLONEQUAL: features.add(Feature.ASSIGNMENT_EXPRESSIONS) elif n.type == syms.decorator: if len(n.children) > 1 and not is_simple_decorator_expression( n.children[1] ): features.add(Feature.RELAXED_DECORATORS) elif ( n.type in {syms.typedargslist, syms.arglist} and n.children and n.children[-1].type == token.COMMA ): if n.type == syms.typedargslist: feature = Feature.TRAILING_COMMA_IN_DEF else: feature = Feature.TRAILING_COMMA_IN_CALL for ch in n.children: if ch.type in STARS: features.add(feature) if ch.type == syms.argument: for argch in ch.children: if argch.type in STARS: features.add(feature) return features
def lib2to3_parse( src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node: """Given a string with source, return the lib2to3 Node.""" if not src_txt.endswith("\n"): src_txt += "\n" grammars = get_grammars(set(target_versions)) errors = {} for grammar in grammars: drv = driver.Driver(grammar) try: result = drv.parse_string(src_txt, True) break except ParseError as pe: lineno, column = pe.context[1] lines = src_txt.splitlines() try: faulty_line = lines[lineno - 1] except IndexError: faulty_line = "<line number missing in source>" errors[grammar.version] = InvalidInput( f"Cannot parse: {lineno}:{column}: {faulty_line}") except TokenError as te: # In edge cases these are raised; and typically don't have a "faulty_line". lineno, column = te.args[1] errors[grammar.version] = InvalidInput( f"Cannot parse: {lineno}:{column}: {te.args[0]}") else: # Choose the latest version when raising the actual parsing error. assert len(errors) >= 1 exc = errors[max(errors)] if matches_grammar(src_txt, pygram.python_grammar) or matches_grammar( src_txt, pygram.python_grammar_no_print_statement): original_msg = exc.args[0] msg = f"{original_msg}\n{PY2_HINT}" raise InvalidInput(msg) from None raise exc from None if isinstance(result, Leaf): result = Node(syms.file_input, [result]) return result
def is_python36(node: Node) -> bool: """Returns True if the current file is using Python 3.6+ features. Currently looking for: - f-strings; and - trailing commas after * or ** in function signatures. """ for n in node.pre_order(): if n.type == token.STRING: value_head = n.value[:2] # type: ignore if value_head in {'f"', 'F"', "f'", "F'", 'rf', 'fr', 'RF', 'FR'}: return True elif (n.type == syms.typedargslist and n.children and n.children[-1].type == token.COMMA): for ch in n.children: if ch.type == token.STAR or ch.type == token.DOUBLESTAR: return True return False
def lib2to3_parse(src_txt: str) -> Node: """Given a string with source, return the lib2to3 Node.""" grammar = pygram.python_grammar_no_print_statement drv = driver.Driver(grammar, pytree.convert) if src_txt[-1] != '\n': nl = '\r\n' if '\r\n' in src_txt[:1024] else '\n' src_txt += nl try: result = drv.parse_string(src_txt, True) except ParseError as pe: lineno, column = pe.context[1] lines = src_txt.splitlines() try: faulty_line = lines[lineno - 1] except IndexError: faulty_line = "<line number missing in source>" raise ValueError(f"Cannot parse: {lineno}:{column}: {faulty_line}") from None if isinstance(result, Leaf): result = Node(syms.file_input, [result]) return result
def remove_with_parens(node: Node, parent: Node) -> None: """Recursively hide optional parens in `with` statements.""" # Removing all unnecessary parentheses in with statements in one pass is a tad # complex as different variations of bracketed statements result in pretty # different parse trees: # # with (open("file")) as f: # this is an asexpr_test # ... # # with (open("file") as f): # this is an atom containing an # ... # asexpr_test # # with (open("file")) as f, (open("file")) as f: # this is asexpr_test, COMMA, # ... # asexpr_test # # with (open("file") as f, open("file") as f): # an atom containing a # ... # testlist_gexp which then # # contains multiple asexpr_test(s) if node.type == syms.atom: if maybe_make_parens_invisible_in_atom( node, parent=parent, remove_brackets_around_comma=True, ): wrap_in_parentheses(parent, node, visible=False) if isinstance(node.children[1], Node): remove_with_parens(node.children[1], node) elif node.type == syms.testlist_gexp: for child in node.children: if isinstance(child, Node): remove_with_parens(child, node) elif node.type == syms.asexpr_test and not any( leaf.type == token.COLONEQUAL for leaf in node.leaves() ): if maybe_make_parens_invisible_in_atom( node.children[0], parent=node, remove_brackets_around_comma=True, ): wrap_in_parentheses(node, node.children[0], visible=False)
def get_features_used(node: Node) -> Set[Feature]: # noqa: C901 """Return a set of (relatively) new Python features used in this file. Currently looking for: - f-strings; - underscores in numeric literals; - trailing commas after * or ** in function signatures and calls; - positional only arguments in function signatures and lambdas; - assignment expression; - relaxed decorator syntax; - print / exec statements; """ features: Set[Feature] = set() for n in node.pre_order(): if n.type == token.STRING: value_head = n.value[:2] # type: ignore if value_head in {'f"', 'F"', "f'", "F'", "rf", "fr", "RF", "FR"}: features.add(Feature.F_STRINGS) elif n.type == token.NUMBER: assert isinstance(n, Leaf) if "_" in n.value: features.add(Feature.NUMERIC_UNDERSCORES) elif n.value.endswith(("L", "l")): # Python 2: 10L features.add(Feature.LONG_INT_LITERAL) elif len(n.value ) >= 2 and n.value[0] == "0" and n.value[1].isdigit(): # Python 2: 0123; 00123; ... if not all(char == "0" for char in n.value): # although we don't want to match 0000 or similar features.add(Feature.OCTAL_INT_LITERAL) elif n.type == token.SLASH: if n.parent and n.parent.type in { syms.typedargslist, syms.arglist, syms.varargslist, }: features.add(Feature.POS_ONLY_ARGUMENTS) elif n.type == token.COLONEQUAL: features.add(Feature.ASSIGNMENT_EXPRESSIONS) elif n.type == syms.decorator: if len(n.children) > 1 and not is_simple_decorator_expression( n.children[1]): features.add(Feature.RELAXED_DECORATORS) elif (n.type in {syms.typedargslist, syms.arglist} and n.children and n.children[-1].type == token.COMMA): if n.type == syms.typedargslist: feature = Feature.TRAILING_COMMA_IN_DEF else: feature = Feature.TRAILING_COMMA_IN_CALL for ch in n.children: if ch.type in STARS: features.add(feature) if ch.type == syms.argument: for argch in ch.children: if argch.type in STARS: features.add(feature) # Python 2 only features (for its deprecation) except for integers, see above elif n.type == syms.print_stmt: features.add(Feature.PRINT_STMT) elif n.type == syms.exec_stmt: features.add(Feature.EXEC_STMT) elif n.type == syms.tfpdef: # def set_position((x, y), value): # ... features.add(Feature.AUTOMATIC_PARAMETER_UNPACKING) elif n.type == syms.except_clause: # try: # ... # except Exception, err: # ... if len(n.children) >= 4: if n.children[-2].type == token.COMMA: features.add(Feature.COMMA_STYLE_EXCEPT) elif n.type == syms.raise_stmt: # raise Exception, "msg" if len(n.children) >= 4: if n.children[-2].type == token.COMMA: features.add(Feature.COMMA_STYLE_RAISE) elif n.type == token.BACKQUOTE: # `i'm surprised this ever existed` features.add(Feature.BACKQUOTE_REPR) return features
def get_features_used( # noqa: C901 node: Node, *, future_imports: Optional[Set[str]] = None) -> Set[Feature]: """Return a set of (relatively) new Python features used in this file. Currently looking for: - f-strings; - underscores in numeric literals; - trailing commas after * or ** in function signatures and calls; - positional only arguments in function signatures and lambdas; - assignment expression; - relaxed decorator syntax; - usage of __future__ flags (annotations); - print / exec statements; """ features: Set[Feature] = set() if future_imports: features |= { FUTURE_FLAG_TO_FEATURE[future_import] for future_import in future_imports if future_import in FUTURE_FLAG_TO_FEATURE } for n in node.pre_order(): if is_string_token(n): value_head = n.value[:2] if value_head in {'f"', 'F"', "f'", "F'", "rf", "fr", "RF", "FR"}: features.add(Feature.F_STRINGS) elif n.type == token.NUMBER: assert isinstance(n, Leaf) if "_" in n.value: features.add(Feature.NUMERIC_UNDERSCORES) elif n.type == token.SLASH: if n.parent and n.parent.type in { syms.typedargslist, syms.arglist, syms.varargslist, }: features.add(Feature.POS_ONLY_ARGUMENTS) elif n.type == token.COLONEQUAL: features.add(Feature.ASSIGNMENT_EXPRESSIONS) elif n.type == syms.decorator: if len(n.children) > 1 and not is_simple_decorator_expression( n.children[1]): features.add(Feature.RELAXED_DECORATORS) elif (n.type in {syms.typedargslist, syms.arglist} and n.children and n.children[-1].type == token.COMMA): if n.type == syms.typedargslist: feature = Feature.TRAILING_COMMA_IN_DEF else: feature = Feature.TRAILING_COMMA_IN_CALL for ch in n.children: if ch.type in STARS: features.add(feature) if ch.type == syms.argument: for argch in ch.children: if argch.type in STARS: features.add(feature) elif (n.type in {syms.return_stmt, syms.yield_expr} and len(n.children) >= 2 and n.children[1].type == syms.testlist_star_expr and any(child.type == syms.star_expr for child in n.children[1].children)): features.add(Feature.UNPACKING_ON_FLOW) elif (n.type == syms.annassign and len(n.children) >= 4 and n.children[3].type == syms.testlist_star_expr): features.add(Feature.ANN_ASSIGN_EXTENDED_RHS) elif (n.type == syms.except_clause and len(n.children) >= 2 and n.children[1].type == token.STAR): features.add(Feature.EXCEPT_STAR) return features
def lam_sub(grammar: Grammar, node: RawNode) -> NL: assert node[3] is not None return Node(type=node[0], children=node[3], context=node[2])