Пример #1
0
def _parse_lambda(lam):
  """Returns the AST and source code of given lambda function.

  Args:
    lam: types.LambdaType, Python function/method/class

  Returns:
    gast.AST, Text: the parsed AST node; the source code that was parsed to
    generate the AST (including any prefixes that this function may have added).
  """
  # TODO(mdan): Use a fast path if the definition is not multi-line.
  # We could detect that the lambda is in a multi-line expression by looking
  # at the surrounding code - an surrounding set of parentheses indicates a
  # potential multi-line definition.

  mod = inspect.getmodule(lam)
  f = inspect.getsourcefile(lam)
  def_line = lam.__code__.co_firstlineno

  # This method is more robust that just calling inspect.getsource(mod), as it
  # works in interactive shells, where getsource would fail. This is the
  # same procedure followed by inspect for non-modules:
  # https://github.com/python/cpython/blob/3.8/Lib/inspect.py#L772
  lines = linecache.getlines(f, mod.__dict__)
  source = ''.join(lines)

  # Narrow down to the last node starting before our definition node.
  all_nodes = parse(source, preamble_len=0, single_node=False)
  search_nodes = []
  for node in all_nodes:
    # Also include nodes without a line number, for safety. This is defensive -
    # we don't know whether such nodes might exist, and if they do, whether
    # they are not safe to skip.
    # TODO(mdan): Replace this check with an assertion or skip such nodes.
    if getattr(node, 'lineno', def_line) <= def_line:
      search_nodes.append(node)
    else:
      # Found a node starting past our lambda - can stop the search.
      break

  # Extract all lambda nodes from the shortlist.
  lambda_nodes = []
  for node in search_nodes:
    lambda_nodes.extend(
        n for n in gast.walk(node) if isinstance(n, gast.Lambda))

  # Filter down to lambda nodes which span our actual lambda.
  candidates = []
  for ln in lambda_nodes:
    minl, maxl = MAX_SIZE, 0
    for n in gast.walk(ln):
      minl = min(minl, getattr(n, 'lineno', minl))
      lineno = getattr(n, 'lineno', maxl)
      end_lineno = getattr(n, 'end_lineno', None)
      if end_lineno is not None:
        # end_lineno is more precise, but lineno should almost always work too.
        lineno = end_lineno
      maxl = max(maxl, lineno)
    if minl <= def_line <= maxl:
      candidates.append((ln, minl, maxl))

  # Happy path: exactly one node found.
  if len(candidates) == 1:
    (node, minl, maxl), = candidates  # pylint:disable=unbalanced-tuple-unpacking
    return _without_context(node, lines, minl, maxl)

  elif not candidates:
    lambda_codes = '\n'.join([unparse(l) for l in lambda_nodes])
    raise errors.UnsupportedLanguageElementError(
        f'could not parse the source code of {lam}:'
        f' no matching AST found among candidates:\n{lambda_codes}')

  # Attempt to narrow down selection by signature is multiple nodes are found.
  matches = [v for v in candidates if _node_matches_argspec(v[0], lam)]
  if len(matches) == 1:
    (node, minl, maxl), = matches
    return _without_context(node, lines, minl, maxl)

  # Give up if could not narrow down to a single node.
  matches = '\n'.join(
      'Match {}:\n{}\n'.format(i, unparse(node, include_encoding_marker=False))
      for i, (node, _, _) in enumerate(matches))
  raise errors.UnsupportedLanguageElementError(
      f'could not parse the source code of {lam}: found multiple definitions'
      ' with identical signatures at the location. This error'
      ' may be avoided by defining each lambda on a single line and with'
      f' unique argument names. The matching definitions were:\n{matches}')
Пример #2
0
def dedent_block(code_string):
  """Dedents a code so that its first line starts at row zero."""

  code_string = _unfold_continuations(code_string)

  token_gen = tokenize.generate_tokens(six.StringIO(code_string).readline)

  block_indentation = None
  tokens = []
  try:
    for tok in token_gen:
      tokens.append(tok)
  except tokenize.TokenError:
    # Resolution of lambda functions may yield incomplete code, which can
    # in turn generate this error. We silently ignore this error because the
    # parser may still be able to deal with it.
    pass

  for tok in tokens:
    tok_type, tok_string, _, _, _ = tok
    if tok_type == tokenize.INDENT:
      block_indentation = tok_string
      block_level = len(block_indentation)
      break
    elif tok_type not in (
        tokenize.NL, tokenize.NEWLINE, tokenize.STRING, tokenize.COMMENT):
      block_indentation = ''
      break

  if not block_indentation:
    return code_string

  block_level = len(block_indentation)
  first_indent_uses_tabs = '\t' in block_indentation
  for i, tok in enumerate(tokens):
    tok_type, tok_string, _, _, _ = tok
    if tok_type == tokenize.INDENT:
      if ((' ' in tok_string and first_indent_uses_tabs)
          or ('\t' in tok_string and not first_indent_uses_tabs)):
        # TODO(mdan): We could attempt to convert tabs to spaces by unix rule.
        # See:
        # https://docs.python.org/3/reference/lexical_analysis.html#indentation
        raise errors.UnsupportedLanguageElementError(
            'code mixing tabs and spaces for indentation is not allowed')
      if len(tok_string) >= block_level:
        tok_string = tok_string[block_level:]
      tokens[i] = (tok_type, tok_string)

  new_code = tokenize.untokenize(tokens)

  # Note: untokenize respects the line structure, but not the whitespace within
  # lines. For example, `def foo()` may be untokenized as `def foo ()`
  # So instead of using the output of dedent, we match the leading whitespace
  # on each line.
  dedented_code = []
  for line, new_line in zip(code_string.split('\n'), new_code.split('\n')):
    original_indent = re.match(_LEADING_WHITESPACE, line).group()
    new_indent = re.match(_LEADING_WHITESPACE, new_line).group()
    if len(original_indent) > len(new_indent):
      dedented_line = line[len(original_indent) - len(new_indent):]
    else:
      dedented_line = line
    dedented_code.append(dedented_line)
  new_code = '\n'.join(dedented_code)

  return new_code
Пример #3
0
 def visit_YieldFrom(self, node):
   raise errors.UnsupportedLanguageElementError(
       'generators are not supported by AutoGraph')
Пример #4
0
 def visit_Attribute(self, node):
   if (node.attr is not None
       and node.attr.startswith('__') and not node.attr.endswith('__')):
     raise errors.UnsupportedLanguageElementError(
         'mangled names are not yet supported by AutoGraph')
Пример #5
0
 def fail():
   raise errors.UnsupportedLanguageElementError(
       'could not parse the source code:'
       '\n\n{}\n'
       'This error may be avoided by creating the lambda in a standalone'
       ' statement.\n'.format(original_source))
 def visit_Yield(self, node):
   raise errors.UnsupportedLanguageElementError('generators are not supported')
 def visit_While(self, node):
   if node.orelse:
     raise errors.UnsupportedLanguageElementError(
         'while/else statement not yet supported')
   self.generic_visit(node)