Exemplo n.º 1
0
    def is_async_fn(node):
        code_node = node[0][0]
        for n in node[0]:
            if hasattr(n, "attr") and iscode(n.attr):
                code_node = n
                break
            pass
        pass

        is_code = hasattr(code_node, "attr") and iscode(code_node.attr)
        return is_code and co_flags_is_async(code_node.attr.co_flags)
Exemplo n.º 2
0
def disco_loop(disasm, queue, real_out):
    while len(queue) > 0:
        co = queue.popleft()
        if co.co_name != "<module>":
            real_out.write("\n# %s line %d of %s\n" %
                      (co.co_name, co.co_firstlineno, co.co_filename))
        tokens, customize = disasm(co)
        for t in tokens:
            if iscode(t.pattr):
                queue.append(t.pattr)
            elif iscode(t.attr):
                queue.append(t.attr)
            real_out.write(t)
            pass
        pass
Exemplo n.º 3
0
def python_parser(
    version: str,
    co,
    out=sys.stdout,
    showasm=False,
    parser_debug=PARSER_DEFAULT_DEBUG,
    is_pypy=False,
    is_lambda=False,
):
    """
    Parse a code object to an abstract syntax tree representation.

    :param version:         The python version this code is from as a float, for
                            example 2.6, 2.7, 3.2, 3.3, 3.4, 3.5 etc.
    :param co:              The code object to parse.
    :param out:             File like object to write the output to.
    :param showasm:         Flag which determines whether the disassembled and
                            ingested code is written to sys.stdout or not.
    :param parser_debug:    dict containing debug flags for the spark parser.

    :return: Abstract syntax tree representation of the code object.
    """

    assert iscode(co)
    from decompyle3.scanner import get_scanner

    scanner = get_scanner(version, is_pypy)
    tokens, customize = scanner.ingest(co)
    maybe_show_asm(showasm, tokens)

    # For heavy grammar debugging
    # parser_debug = {'rules': True, 'transition': True, 'reduce' : True,
    #                 'showstack': 'full'}
    p = get_python_parser(version, parser_debug)
    return parse(p, tokens, customize, is_lambda)
Exemplo n.º 4
0
def find_code_node(node, start):
    for i in range(-start, len(node) + 1):
        if node[-i].kind == "LOAD_CODE":
            code_node = node[-i]
            assert iscode(code_node.attr)
            return code_node
        pass
    assert False, "did not find code node starting at %d in %s" % (start, node)
Exemplo n.º 5
0
 def _recursively_extract_all_code_objects(co) -> List[bytes]:
     """Co is a code object, with potentially nested code objects."""
     co_code_objects: List[bytes] = [co.co_code]
     search_list: List[Union[Any]] = list(co.co_consts)
     co_obj: Any
     for co_obj in search_list:
         if iscode(co_obj):
             if co_obj not in co_code_objects:
                 co_code_objects.append(co_obj.co_code)
                 search_list.extend(co_obj.co_consts)
     return co_code_objects
Exemplo n.º 6
0
def number_loop(queue, mappings, opc):
    while len(queue) > 0:
        code1 = queue.popleft()
        code2 = queue.popleft()
        assert code1.co_name == code2.co_name
        linestarts_orig = findlinestarts(code1)
        linestarts_uncompiled = list(findlinestarts(code2))
        mappings += [
            [line, offset2line(offset, linestarts_uncompiled)]
            for offset, line in linestarts_orig
        ]
        bytecode1 = Bytecode(code1, opc)
        bytecode2 = Bytecode(code2, opc)
        instr2s = bytecode2.get_instructions(code2)
        seen = set([code1.co_name])
        for instr in bytecode1.get_instructions(code1):
            next_code1 = None
            if iscode(instr.argval):
                next_code1 = instr.argval
            if next_code1:
                next_code2 = None
                while not next_code2:
                    try:
                        instr2 = next(instr2s)
                        if iscode(instr2.argval):
                            next_code2 = instr2.argval
                            pass
                    except StopIteration:
                        break
                    pass
                if next_code2:
                    assert next_code1.co_name == next_code2.co_name
                    if next_code1.co_name not in seen:
                        seen.add(next_code1.co_name)
                        queue.append(next_code1)
                        queue.append(next_code2)
                        pass
                    pass
            pass
        pass
Exemplo n.º 7
0
def disco(version, co, out=None, is_pypy=False):
    """
    diassembles and deparses a given code block 'co'
    """

    assert iscode(co)

    # store final output stream for case of error
    real_out = out or sys.stdout
    print("# Python %s" % version, file=real_out)
    if co.co_filename:
        print("# Embedded file name: %s" % co.co_filename, file=real_out)

    scanner = get_scanner(version, is_pypy=is_pypy)

    queue = deque([co])
    disco_loop(scanner.ingest, queue, real_out)
Exemplo n.º 8
0
 def _build_opcode_index(co_code_objects,
                         HAVE_ARGUMENT=90,
                         version: str = None) -> List[int]:
     """Build a list of opcodes contained within the list of co_code objects."""
     # Helpful for learning about opcode + arg length:
     # https://laike9m.com/blog/demystifying-extended_arg,124/
     if iscode(co_code_objects):
         co_code_objects: List[bytes] = [co_code_objects]
     opcode_index: List[int] = []
     co_code: bytes
     for co_code in co_code_objects:
         i: int = 0
         while i < len(co_code):
             incrementer: int = 1
             opcode: int = co_code[i]
             if opcode >= HAVE_ARGUMENT:
                 incrementer = 3
             opcode_index.append(opcode)
             if version and float(version[:3]) >= 3.6:
                 # After 3.6 all opcodes are two bytes, and the second byte
                 # is empty if the opcode doesn't take an argument.
                 incrementer = 2
             i += incrementer
     return opcode_index
Exemplo n.º 9
0
def code_deparse_align(
    co,
    out=sys.stderr,
    version=None,
    is_pypy=None,
    debug_opts=DEFAULT_DEBUG_OPTS,
    code_objects={},
    compile_mode="exec",
):
    """
    ingests and deparses a given code block 'co'
    """

    assert iscode(co)

    if version is None:
        version = float(sys.version[0:3])
    if is_pypy is None:
        is_pypy = IS_PYPY

    # store final output stream for case of error
    scanner = get_scanner(version, is_pypy=is_pypy)

    tokens, customize = scanner.ingest(co, code_objects=code_objects)
    show_asm = debug_opts.get("asm", None)
    maybe_show_asm(show_asm, tokens)

    debug_parser = dict(PARSER_DEFAULT_DEBUG)
    show_grammar = debug_opts.get("grammar", None)
    show_grammar = debug_opts.get("grammar", None)
    if show_grammar:
        debug_parser["reduce"] = show_grammar
        debug_parser["errorstack"] = True

    #  Build a parse tree from tokenized and massaged disassembly.
    show_ast = debug_opts.get("ast", None)
    deparsed = AligningWalker(
        version,
        scanner,
        out,
        showast=show_ast,
        debug_parser=debug_parser,
        compile_mode=compile_mode,
        is_pypy=is_pypy,
    )

    isTopLevel = co.co_name == "<module>"
    deparsed.ast = deparsed.build_ast(tokens, customize, isTopLevel=isTopLevel)

    assert deparsed.ast == "stmts", "Should have parsed grammar start"

    del tokens  # save memory

    deparsed.mod_globs = find_globals(deparsed.ast, set())

    # convert leading '__doc__ = "..." into doc string
    try:
        if deparsed.ast[0][0] == ASSIGN_DOC_STRING(co.co_consts[0]):
            deparsed.print_docstring("", co.co_consts[0])
            del deparsed.ast[0]
        if deparsed.ast[-1] == RETURN_NONE:
            deparsed.ast.pop()  # remove last node
            # todo: if empty, add 'pass'
    except:
        pass

    # What we've been waiting for: Generate Python source from the parse tree!
    deparsed.gen_source(deparsed.ast, co.co_name, customize)

    for g in sorted(deparsed.mod_globs):
        deparsed.write("# global %s ## Warning: Unused global\n" % g)

    if deparsed.ERROR:
        raise SourceWalkerError("Deparsing stopped due to parse error")
    return deparsed
Exemplo n.º 10
0
def diff_opcode(code_standard: CodeType,
                code_remapped: CodeType,
                version: str = None) -> Dict[int, Dict[int, int]]:
    """Calculate remapped opcodes from two Code objects of the same sourcecode.

    Parameters
    ----------
    code_standard : Code (xdis.CodeX or types.CodeType)
        The standard-opcode Code object
    code_remapped : Code (xdis.CodeX or types.CodeType)
        The remapped-opcode Code object
    version : str, optional
        The Python version that marshaled the former two arguments. Used for
        figuring out what operations push arguments to the stack.

    Returns
    -------
    Dict[int, Dict[int, int]]
        A dictionary of original_opcode to
        Dict[replacement_opcode:replacement_count]. replacement_opcode is an
        opcode that was seen in place of original_opcode, and the
        replacement_count is the amount of times it was seen replacing the
        original_opcode throughout all the bytecode that was analyzed.

    Raises
    ------
    RuntimeError
        Args aren't correct type or differ in total opcode count too much.
    """
    def _recursively_extract_all_code_objects(co) -> List[bytes]:
        """Co is a code object, with potentially nested code objects."""
        co_code_objects: List[bytes] = [co.co_code]
        search_list: List[Union[Any]] = list(co.co_consts)
        co_obj: Any
        for co_obj in search_list:
            if iscode(co_obj):
                if co_obj not in co_code_objects:
                    co_code_objects.append(co_obj.co_code)
                    search_list.extend(co_obj.co_consts)
        return co_code_objects

    def _build_opcode_index(co_code_objects,
                            HAVE_ARGUMENT=90,
                            version: str = None) -> List[int]:
        """Build a list of opcodes contained within the list of co_code objects."""
        # Helpful for learning about opcode + arg length:
        # https://laike9m.com/blog/demystifying-extended_arg,124/
        if iscode(co_code_objects):
            co_code_objects: List[bytes] = [co_code_objects]
        opcode_index: List[int] = []
        co_code: bytes
        for co_code in co_code_objects:
            i: int = 0
            while i < len(co_code):
                incrementer: int = 1
                opcode: int = co_code[i]
                if opcode >= HAVE_ARGUMENT:
                    incrementer = 3
                opcode_index.append(opcode)
                if version and float(version[:3]) >= 3.6:
                    # After 3.6 all opcodes are two bytes, and the second byte
                    # is empty if the opcode doesn't take an argument.
                    incrementer = 2
                i += incrementer
        return opcode_index

    if not iscode(code_standard) or not iscode(code_remapped):
        raise RuntimeError(
            "diff_opcode requires two Code objects as arguments")

    HAVE_ARGUMENT: int = 90
    if version:
        try:
            xdis_opcode: ModuleType = xdis.main.get_opcode(
                version, is_pypy=("pypy" in version))
        except TypeError:
            logger.warning(
                "[!] Couldn't retrieve version {version}'s opcodes from xdis.")
        else:
            HAVE_ARGUMENT = xdis_opcode.HAVE_ARGUMENT

    standard_code_objects: List[bytes] = _recursively_extract_all_code_objects(
        code_standard)
    remapped_code_objects: List[bytes] = _recursively_extract_all_code_objects(
        code_remapped)
    standard_opcodes_list: List[int] = _build_opcode_index(
        standard_code_objects, HAVE_ARGUMENT, version=version)
    remapped_opcodes_list: List[int] = _build_opcode_index(
        remapped_code_objects, HAVE_ARGUMENT, version=version)

    if abs(len(standard_opcodes_list) - len(remapped_opcodes_list)):
        # This is to prevent cases where files are being compared that don't
        # share source code
        raise RuntimeError(
            "The two co_code objects differ in length and therefore cannot do a comparison of the opcodes."
        )

    i: int
    remappings: Dict[int, Dict[int, int]] = {}
    for i, remapped_opcode in enumerate(remapped_opcodes_list):
        if standard_opcodes_list[i] in remappings:
            existing_remap_options: Dict[int, int] = remappings[
                standard_opcodes_list[i]]
            if remapped_opcode in existing_remap_options:
                existing_remap_options[remapped_opcode] += 1
            else:
                existing_remap_options[remapped_opcode] = 1
        else:
            remappings[standard_opcodes_list[i]] = {remapped_opcode: 1}
    return remappings
Exemplo n.º 11
0
def cmp_code_objects(version, is_pypy, code_obj1, code_obj2, verify, name=""):
    """
    Compare two code-objects.

    This is the main part of this module.
    """
    # print code_obj1, type(code_obj2)
    assert iscode(
        code_obj1
    ), "cmp_code_object first object type is %s, not code" % type(code_obj1)
    assert iscode(
        code_obj2
    ), "cmp_code_object second object type is %s, not code" % type(code_obj2)
    # print dir(code_obj1)
    if isinstance(code_obj1, object):
        # new style classes (Python 2.2)
        # assume _both_ code objects to be new stle classes
        assert dir(code_obj1) == dir(code_obj2)
    else:
        # old style classes
        assert dir(code_obj1) == code_obj1.__members__
        assert dir(code_obj2) == code_obj2.__members__
        assert code_obj1.__members__ == code_obj2.__members__

    if name == "__main__":
        name = code_obj1.co_name
    else:
        name = "%s.%s" % (name, code_obj1.co_name)
        if name == ".?":
            name = "__main__"

    if isinstance(code_obj1, object) and code_equal(code_obj1, code_obj2):
        # use the new style code-classes' __cmp__ method, which
        # should be faster and more sophisticated
        # if this compare fails, we use the old routine to
        # find out, what exactly is nor equal
        # if this compare succeds, simply return
        # return
        pass

    if isinstance(code_obj1, object):
        members = [x for x in dir(code_obj1) if x.startswith("co_")]
    else:
        members = dir(code_obj1)
    members.sort()  # ; members.reverse()

    tokens1 = None
    for member in members:
        if member in __IGNORE_CODE_MEMBERS__ or verify != "verify":
            pass
        elif member == "co_code":
            if verify != "strong":
                continue
            scanner = get_scanner(version, is_pypy, show_asm=False)

            global JUMP_OPS
            JUMP_OPS = list(scan.JUMP_OPS) + ["JUMP_BACK"]

            # use changed Token class
            # We (re)set this here to save exception handling,
            # which would get confusing.
            scanner.setTokenClass(Token)
            try:
                # ingest both code-objects
                tokens1, customize = scanner.ingest(code_obj1)
                del customize  # save memory
                tokens2, customize = scanner.ingest(code_obj2)
                del customize  # save memory
            finally:
                scanner.resetTokenClass()  # restore Token class

            targets1 = dis.findlabels(code_obj1.co_code)
            tokens1 = [t for t in tokens1 if t.kind != "COME_FROM"]
            tokens2 = [t for t in tokens2 if t.kind != "COME_FROM"]

            i1 = 0
            i2 = 0
            offset_map = {}
            check_jumps = {}
            while i1 < len(tokens1):
                if i2 >= len(tokens2):
                    if (len(tokens1) == len(tokens2) + 2
                            and tokens1[-1].kind == "RETURN_VALUE"
                            and tokens1[-2].kind == "LOAD_CONST"
                            and tokens1[-2].pattr is None
                            and tokens1[-3].kind == "RETURN_VALUE"):
                        break
                    else:
                        raise CmpErrorCodeLen(name, tokens1, tokens2)

                offset_map[tokens1[i1].offset] = tokens2[i2].offset

                for idx1, idx2, offset2 in check_jumps.get(
                        tokens1[i1].offset, []):
                    if offset2 != tokens2[i2].offset:
                        raise CmpErrorCode(
                            name,
                            tokens1[idx1].offset,
                            tokens1[idx1],
                            tokens2[idx2],
                            tokens1,
                            tokens2,
                        )

                if tokens1[i1].kind != tokens2[i2].kind:
                    if tokens1[i1].kind == "LOAD_CONST" == tokens2[i2].kind:
                        i = 1
                        while tokens1[i1 + i].kind == "LOAD_CONST":
                            i += 1
                        if tokens1[i1 + i].kind.startswith(
                            ("BUILD_TUPLE", "BUILD_LIST")) and i == int(
                                tokens1[i1 + i].kind.split("_")[-1]):
                            t = tuple(
                                [elem.pattr for elem in tokens1[i1:i1 + i]])
                            if t != tokens2[i2].pattr:
                                raise CmpErrorCode(
                                    name,
                                    tokens1[i1].offset,
                                    tokens1[i1],
                                    tokens2[i2],
                                    tokens1,
                                    tokens2,
                                )
                            i1 += i + 1
                            i2 += 1
                            continue
                        elif (i == 2 and tokens1[i1 + i].kind == "ROT_TWO"
                              and tokens2[i2 + 1].kind == "UNPACK_SEQUENCE_2"):
                            i1 += 3
                            i2 += 2
                            continue
                        elif i == 2 and tokens1[i1 + i].kind in BIN_OP_FUNCS:
                            f = BIN_OP_FUNCS[tokens1[i1 + i].kind]
                            if (f(tokens1[i1].pattr,
                                  tokens1[i1 + 1].pattr) == tokens2[i2].pattr):
                                i1 += 3
                                i2 += 1
                                continue
                    elif tokens1[i1].kind == "UNARY_NOT":
                        if tokens2[i2].kind == "POP_JUMP_IF_TRUE":
                            if tokens1[i1 + 1].kind == "POP_JUMP_IF_FALSE":
                                i1 += 2
                                i2 += 1
                                continue
                        elif tokens2[i2].kind == "POP_JUMP_IF_FALSE":
                            if tokens1[i1 + 1].kind == "POP_JUMP_IF_TRUE":
                                i1 += 2
                                i2 += 1
                                continue
                    elif (tokens1[i1].kind in ("JUMP_FORWARD", "JUMP_BACK")
                          and tokens1[i1 - 1].kind == "RETURN_VALUE"
                          and tokens2[i2 - 1].kind
                          in ("RETURN_VALUE", "RETURN_END_IF")
                          and int(tokens1[i1].offset) not in targets1):
                        i1 += 1
                        continue
                    elif (tokens1[i1].kind == "JUMP_BACK"
                          and tokens2[i2].kind == "CONTINUE"):
                        # FIXME: should make sure that offset is inside loop, not outside of it
                        i1 += 2
                        i2 += 2
                        continue
                    elif (tokens1[i1].kind == "JUMP_FORWARD"
                          and tokens2[i2].kind == "JUMP_BACK"
                          and tokens1[i1 + 1].kind == "JUMP_BACK"
                          and tokens2[i2 + 1].kind == "JUMP_BACK"
                          and int(tokens1[i1].pattr)
                          == int(tokens1[i1].offset) + 3):
                        if int(tokens1[i1].pattr) == int(tokens1[i1 +
                                                                 1].offset):
                            i1 += 2
                            i2 += 2
                            continue
                    elif (tokens1[i1].kind == "LOAD_NAME"
                          and tokens2[i2].kind == "LOAD_CONST"
                          and tokens1[i1].pattr == "None"
                          and tokens2[i2].pattr is None):
                        pass
                    elif (tokens1[i1].kind == "LOAD_GLOBAL"
                          and tokens2[i2].kind == "LOAD_NAME"
                          and tokens1[i1].pattr == tokens2[i2].pattr):
                        pass
                    elif (tokens1[i1].kind == "LOAD_ASSERT"
                          and tokens2[i2].kind == "LOAD_NAME"
                          and tokens1[i1].pattr == tokens2[i2].pattr):
                        pass
                    elif (tokens1[i1].kind == "RETURN_VALUE"
                          and tokens2[i2].kind == "RETURN_END_IF"):
                        pass
                    elif (tokens1[i1].kind == "BUILD_TUPLE_0"
                          and tokens2[i2].pattr == ()):
                        pass
                    else:
                        raise CmpErrorCode(
                            name,
                            tokens1[i1].offset,
                            tokens1[i1],
                            tokens2[i2],
                            tokens1,
                            tokens2,
                        )
                elif (tokens1[i1].kind in JUMP_OPS
                      and tokens1[i1].pattr != tokens2[i2].pattr):
                    if tokens1[i1].kind == "JUMP_BACK":
                        dest1 = int(tokens1[i1].pattr)
                        dest2 = int(tokens2[i2].pattr)
                        if offset_map[dest1] != dest2:
                            raise CmpErrorCode(
                                name,
                                tokens1[i1].offset,
                                tokens1[i1],
                                tokens2[i2],
                                tokens1,
                                tokens2,
                            )
                    else:
                        # import pdb; pdb.set_trace()
                        try:
                            dest1 = int(tokens1[i1].pattr)
                            if dest1 in check_jumps:
                                check_jumps[dest1].append((i1, i2, dest2))
                            else:
                                check_jumps[dest1] = [(i1, i2, dest2)]
                        except:
                            pass

                i1 += 1
                i2 += 1
            del tokens1, tokens2  # save memory
        elif member == "co_consts":
            # partial optimization can make the co_consts look different,
            #   so we'll just compare the code consts
            codes1 = (c for c in code_obj1.co_consts
                      if hasattr(c, "co_consts"))
            codes2 = (c for c in code_obj2.co_consts
                      if hasattr(c, "co_consts"))

            for c1, c2 in zip(codes1, codes2):
                cmp_code_objects(version, is_pypy, c1, c2, verify, name=name)
        elif member == "co_flags":
            flags1 = code_obj1.co_flags
            flags2 = code_obj2.co_flags
            if is_pypy:
                # For PYPY for now we don't care about PYPY_SOURCE_IS_UTF8:
                flags2 &= ~0x0100  # PYPY_SOURCE_IS_UTF8
            # We also don't care about COROUTINE or GENERATOR for now
            flags1 &= ~0x000000A0
            flags2 &= ~0x000000A0
            if flags1 != flags2:
                raise CmpErrorMember(
                    name,
                    "co_flags",
                    pretty_code_flags(flags1),
                    pretty_code_flags(flags2),
                )
        else:
            # all other members must be equal
            if getattr(code_obj1, member) != getattr(code_obj2, member):
                raise CmpErrorMember(name, member, getattr(code_obj1, member),
                                     getattr(code_obj2, member))
Exemplo n.º 12
0
def make_function2(self, node, is_lambda, nested=1, code_node=None):
    """
    Dump function defintion, doc string, and function body.
    This code is specialied for Python 2.
    """
    def build_param(ast, name, default):
        """build parameters:
            - handle defaults
            - handle format tuple parameters
        """
        # if formal parameter is a tuple, the paramater name
        # starts with a dot (eg. '.1', '.2')
        if name.startswith("."):
            # replace the name with the tuple-string
            name = self.get_tuple_parameter(ast, name)
            pass

        if default:
            value = self.traverse(default, indent="")
            maybe_show_tree_param_default(self.showast, name, value)
            result = "%s=%s" % (name, value)
            if result[-2:] == "= ":  # default was 'LOAD_CONST None'
                result += "None"
            return result
        else:
            return name

    # MAKE_FUNCTION_... or MAKE_CLOSURE_...
    assert node[-1].kind.startswith("MAKE_")

    args_node = node[-1]
    if isinstance(args_node.attr, tuple):
        # positional args are after kwargs
        defparams = node[1:args_node.attr[0] + 1]
        pos_args, kw_args, annotate_argc = args_node.attr
    else:
        defparams = node[:args_node.attr]
        kw_args = 0
        pass

    lambda_index = None

    if lambda_index and is_lambda and iscode(node[lambda_index].attr):
        assert node[lambda_index].kind == "LOAD_LAMBDA"
        code = node[lambda_index].attr
    else:
        code = code_node.attr

    assert iscode(code)
    code = Code(code, self.scanner, self.currentclass)

    # add defaults values to parameter names
    argc = code.co_argcount
    paramnames = list(code.co_varnames[:argc])

    # defaults are for last n parameters, thus reverse
    paramnames.reverse()
    defparams.reverse()

    try:
        ast = self.build_ast(
            code._tokens,
            code._customize,
            code,
            is_lambda=is_lambda,
            noneInNames=("None" in code.co_names),
        )
    except ParserError, p:
        self.write(str(p))
        if not self.tolerate_errors:
            self.ERROR = p
        return
Exemplo n.º 13
0
def make_function3(self, node, is_lambda, nested=1, code_node=None):
    """Dump function definition, doc string, and function body in
      Python version 3.0 and above
    """

    # For Python 3.3, the evaluation stack in MAKE_FUNCTION is:

    # * default argument objects in positional order
    # * pairs of name and default argument, with the name just below
    #   the object on the stack, for keyword-only parameters
    # * parameter annotation objects
    # * a tuple listing the parameter names for the annotations
    #   (only if there are ony annotation objects)
    # * the code associated with the function (at TOS1)
    # * the qualified name of the function (at TOS)

    # For Python 3.0 .. 3.2 the evaluation stack is:
    # The function object is defined to have argc default parameters,
    # which are found below TOS.
    # * first come positional args in the order they are given in the source,
    # * next come the keyword args in the order they given in the source,
    # * finally is the code associated with the function (at TOS)
    #
    # Note: There is no qualified name at TOS

    # MAKE_CLOSURE adds an additional closure slot

    # In Python 3.6 stack entries change again. I understand
    # 3.7 changes some of those changes. Yes, it is hard to follow
    # and I am sure I haven't been able to keep up.

    # Thank you, Python.

    def build_param(ast, name, default, annotation=None):
        """build parameters:
            - handle defaults
            - handle format tuple parameters
        """
        value = self.traverse(default, indent="")
        maybe_show_tree_param_default(self.showast, name, value)
        if annotation:
            result = "%s: %s=%s" % (name, annotation, value)
        else:
            result = "%s=%s" % (name, value)

        # The below can probably be removed. This is probably
        # a holdover from days when LOAD_CONST erroneously
        # didn't handle LOAD_CONST None properly
        if result[-2:] == "= ":  # default was 'LOAD_CONST None'
            result += "None"

        return result

    # MAKE_FUNCTION_... or MAKE_CLOSURE_...
    assert node[-1].kind.startswith("MAKE_")

    # Python 3.3+ adds a qualified name at TOS (-1)
    # moving down the LOAD_LAMBDA instruction
    if 3.0 <= self.version <= 3.2:
        lambda_index = -2
    elif 3.03 <= self.version:
        lambda_index = -3
    else:
        lambda_index = None

    args_node = node[-1]

    annotate_dict = {}

    # Get a list of tree nodes that constitute the values for the "default
    # parameters"; these are default values that appear before any *, and are
    # not to be confused with keyword parameters which may appear after *.
    args_attr = args_node.attr

    if isinstance(args_attr, tuple):
        if len(args_attr) == 3:
            pos_args, kw_args, annotate_argc = args_attr
        else:
            pos_args, kw_args, annotate_argc, closure = args_attr

            i = -4
            kw_pairs = 0
            if closure:
                # FIXME: fill in
                i -= 1
            if annotate_argc:
                # Turn into subroutine and DRY with other use
                annotate_node = node[i]
                if annotate_node == "expr":
                    annotate_node = annotate_node[0]
                    annotate_name_node = annotate_node[-1]
                    if annotate_node == "dict" and annotate_name_node.kind.startswith(
                            "BUILD_CONST_KEY_MAP"):
                        types = [
                            self.traverse(n, indent="")
                            for n in annotate_node[:-2]
                        ]
                        names = annotate_node[-2].attr
                        l = len(types)
                        assert l == len(names)
                        for i in range(l):
                            annotate_dict[names[i]] = types[i]
                        pass
                    pass
                i -= 1
            if kw_args:
                kw_node = node[i]
                if kw_node == "expr":
                    kw_node = kw_node[0]
                if kw_node == "dict":
                    kw_pairs = kw_node[-1].attr

        # FIXME: there is probably a better way to classify this.
        have_kwargs = node[0].kind.startswith(
            "kwarg") or node[0] == "no_kwargs"
        if len(node) >= 4:
            lc_index = -4
        else:
            lc_index = -3
            pass

        if len(node) > 2 and (have_kwargs
                              or node[lc_index].kind != "load_closure"):

            # Find the index in "node" where the first default
            # parameter value is located. Note this is in contrast to
            # key-word arguments, pairs of (name, value), which appear after "*".
            # "default_values_start" is this location.
            default_values_start = 0
            if node[0] == "no_kwargs":
                default_values_start += 1

            # If in a lambda named args are a sequence of kwarg, not bundled.
            # If not in a lambda, named args are after kwargs; kwargs are bundled as one node.
            if node[default_values_start] == "kwarg":
                assert node[lambda_index] == "LOAD_LAMBDA"
                i = default_values_start
                defparams = []
                while node[i] == "kwarg":
                    defparams.append(node[i][1])
                    i += 1
            else:
                if node[default_values_start] == "kwargs":
                    default_values_start += 1
                defparams = node[default_values_start:default_values_start +
                                 args_node.attr[0]]
        else:
            defparams = node[:args_node.attr[0]]
            kw_args = 0
    else:
        defparams = node[:args_node.attr]
        kw_args = 0
        pass

    if 3.0 <= self.version <= 3.2:
        lambda_index = -2
    elif 3.03 <= self.version:
        lambda_index = -3
    else:
        lambda_index = None

    if lambda_index and is_lambda and iscode(node[lambda_index].attr):
        assert node[lambda_index].kind == "LOAD_LAMBDA"
        code = node[lambda_index].attr
    else:
        code = code_node.attr

    assert iscode(code)
    scanner_code = Code(code, self.scanner, self.currentclass)

    # add defaults values to parameter names
    argc = code.co_argcount
    kwonlyargcount = code.co_kwonlyargcount

    paramnames = list(scanner_code.co_varnames[:argc])
    if kwonlyargcount > 0:
        if is_lambda:
            kwargs = []
            for i in range(kwonlyargcount):
                paramnames.append(scanner_code.co_varnames[argc + i])
            pass
        else:
            kwargs = list(scanner_code.co_varnames[argc:argc + kwonlyargcount])

    # defaults are for last n parameters when not in a lambda, thus reverse
    paramnames.reverse()
    defparams.reverse()

    try:
        ast = self.build_ast(
            scanner_code._tokens,
            scanner_code._customize,
            scanner_code,
            is_lambda=is_lambda,
            noneInNames=("None" in code.co_names),
        )
    except (ParserError(p), ParserError2(p)):
        self.write(str(p))
        if not self.tolerate_errors:
            self.ERROR = p
        return

    i = len(paramnames) - len(defparams)

    # build parameters
    params = []
    if defparams:
        for i, defparam in enumerate(defparams):
            params.append(
                build_param(ast, paramnames[i], defparam,
                            annotate_dict.get(paramnames[i])))

        for param in paramnames[i + 1:]:
            if param in annotate_dict:
                params.append("%s: %s" % (param, annotate_dict[param]))
            else:
                params.append(param)
    else:
        for param in paramnames:
            if param in annotate_dict:
                params.append("%s: %s" % (param, annotate_dict[param]))
            else:
                params.append(param)

    params.reverse()  # back to correct order

    if code_has_star_arg(code):
        star_arg = code.co_varnames[argc + kwonlyargcount]
        if annotate_dict and star_arg in annotate_dict:
            params.append("*%s: %s" % (star_arg, annotate_dict[star_arg]))
        else:
            params.append("*%s" % star_arg)
            pass
        if is_lambda:
            params.reverse()
        if not is_lambda:
            argc += 1
        pass
    elif is_lambda and kwonlyargcount > 0:
        params.insert(0, "*")
        kwonlyargcount = 0

    # dump parameter list (with default values)
    if is_lambda:
        self.write("lambda ", ", ".join(params))
        # If the last statement is None (which is the
        # same thing as "return None" in a lambda) and the
        # next to last statement is a "yield". Then we want to
        # drop the (return) None since that was just put there
        # to have something to after the yield finishes.
        # FIXME: this is a bit hoaky and not general
        if (len(ast) > 1 and self.traverse(ast[-1]) == "None"
                and self.traverse(ast[-2]).strip().startswith("yield")):
            del ast[-1]
            # Now pick out the expr part of the last statement
            ast_expr = ast[-1]
            while ast_expr.kind != "expr":
                ast_expr = ast_expr[0]
            ast[-1] = ast_expr
            pass
    else:
        # FIXME: add annotations here
        self.write("(", ", ".join(params))
    # self.println(indent, '#flags:\t', int(code.co_flags))

    # FIXME: Could we remove ends_in_comma and its tests if we just
    # created a parameter list and at the very end did a join on that?
    # Unless careful, We might lose line breaks though.
    ends_in_comma = False
    if kwonlyargcount > 0:
        if not (4 & code.co_flags):
            if argc > 0:
                self.write(", *, ")
            else:
                self.write("*, ")
            pass
            ends_in_comma = True
        else:
            if argc > 0 and node[0] != "kwarg":
                self.write(", ")
                ends_in_comma = True

        kw_args = [None] * kwonlyargcount
        if self.version <= 3.3:
            kw_nodes = node[0]
        else:
            kw_nodes = node[args_node.attr[0]]
        if kw_nodes == "kwargs":
            for n in kw_nodes:
                name = eval(n[0].pattr)
                default = self.traverse(n[1], indent="")
                idx = kwargs.index(name)
                kw_args[idx] = "%s=%s" % (name, default)
                pass
            pass

        # FIXME: something weird is going on and the below
        # might not be right. On 3.4 kw_nodes != "kwarg"
        # because of some sort of type mismatch. I think
        # the test is for versions earlier than 3.3
        # on 3.5 if we have "kwarg" we still want to do this.
        # Perhaps we should be testing that kw_nodes is iterable?
        if kw_nodes != "kwarg" or self.version == 3.5:
            other_kw = [c == None for c in kw_args]

            for i, flag in enumerate(other_kw):
                if flag:
                    if i < len(kwargs):
                        kw_args[i] = "%s" % kwargs[i]
                    else:
                        del kw_args[i]
                    pass

            self.write(", ".join(kw_args))
            ends_in_comma = False
            pass

        pass
    else:
        if argc == 0:
            ends_in_comma = True

    if code_has_star_star_arg(code):
        if not ends_in_comma:
            self.write(", ")
        star_star_arg = code.co_varnames[argc + kwonlyargcount]
        if annotate_dict and star_star_arg in annotate_dict:
            self.write("**%s: %s" %
                       (star_star_arg, annotate_dict[star_star_arg]))
        else:
            self.write("**%s" % star_star_arg)

    if is_lambda:
        self.write(": ")
    else:
        self.write(")")
        if annotate_dict and "return" in annotate_dict:
            self.write(" -> %s" % annotate_dict["return"])
        self.println(":")

    if (len(code.co_consts) > 0 and code.co_consts[0] is not None
            and not is_lambda):  # ugly
        # docstring exists, dump it
        print_docstring(self, self.indent, code.co_consts[0])

    assert ast == "stmts"

    all_globals = find_all_globals(ast, set())
    globals, nonlocals = find_globals_and_nonlocals(ast, set(), set(), code,
                                                    self.version)

    for g in sorted((all_globals & self.mod_globs) | globals):
        self.println(self.indent, "global ", g)

    for nl in sorted(nonlocals):
        self.println(self.indent, "nonlocal ", nl)

    self.mod_globs -= all_globals
    has_none = "None" in code.co_names
    rn = has_none and not find_none(ast)
    self.gen_source(ast,
                    code.co_name,
                    scanner_code._customize,
                    is_lambda=is_lambda,
                    returnNone=rn)

    # In obscure cases, a function may be a generator but the "yield"
    # was optimized away. Here, we need to put in unreachable code to
    # add in "yield" just so that the compiler will mark
    # the GENERATOR bit of the function. See for example
    # Python 3.x's test_generator.py test program.
    if not is_lambda and code.co_flags & CO_GENERATOR:
        need_bogus_yield = True
        for token in scanner_code._tokens:
            if token in ("YIELD_VALUE", "YIELD_FROM"):
                need_bogus_yield = False
                break
            pass
        if need_bogus_yield:
            self.template_engine(("%|if False:\n%+%|yield None%-", ), node)

    scanner_code._tokens = None  # save memory
    scanner_code._customize = None  # save memory
Exemplo n.º 14
0
def make_function36(self, node, is_lambda, nested=1, code_node=None):
    """Dump function definition, doc string, and function body in
      Python version 3.6 and above.
    """
    # MAKE_CLOSURE adds an additional closure slot

    # In Python 3.6 and above stack change again. I understand
    # 3.7 changes some of those changes, although I don't
    # see it in this code yet. Yes, it is hard to follow
    # and I am sure I haven't been able to keep up.

    # Thank you, Python.

    def build_param(ast, name, default, annotation=None):
        """build parameters:
            - handle defaults
            - handle format tuple parameters
        """
        value = default
        maybe_show_tree_param_default(self.showast, name, value)
        if annotation:
            result = "%s: %s=%s" % (name, annotation, value)
        else:
            result = "%s=%s" % (name, value)

        # The below can probably be removed. This is probably
        # a holdover from days when LOAD_CONST erroneously
        # didn't handle LOAD_CONST None properly
        if result[-2:] == "= ":  # default was 'LOAD_CONST None'
            result += "None"

        return result

    # MAKE_FUNCTION_... or MAKE_CLOSURE_...
    assert node[-1].kind.startswith("MAKE_")

    # Python 3.3+ adds a qualified name at TOS (-1)
    # moving down the LOAD_LAMBDA instruction
    lambda_index = -3

    args_node = node[-1]

    annotate_dict = {}

    # Get a list of tree nodes that constitute the values for the "default
    # parameters"; these are default values that appear before any *, and are
    # not to be confused with keyword parameters which may appear after *.
    args_attr = args_node.attr

    if len(args_attr) == 3:
        pos_args, kw_args, annotate_argc = args_attr
    else:
        pos_args, kw_args, annotate_argc, closure = args_attr

    i = -4 if node[-2] != "docstring" else -5
    kw_pairs = 0
    if annotate_argc:
        # Turn into subroutine and DRY with other use
        annotate_node = node[i]
        if annotate_node == "expr":
            annotate_node = annotate_node[0]
            annotate_name_node = annotate_node[-1]
            if annotate_node == "dict" and annotate_name_node.kind.startswith(
                "BUILD_CONST_KEY_MAP"
            ):
                types = [
                    self.traverse(n, indent="") for n in annotate_node[:-2]
                ]
                names = annotate_node[-2].attr
                l = len(types)
                assert l == len(names)
                for i in range(l):
                    annotate_dict[names[i]] = types[i]
                pass
            pass
        i -= 1

    if closure:
        # FIXME: fill in
        # annotate = node[i]
        i -= 1

    if kw_args:
        kw_node = node[pos_args]
        if kw_node == "expr":
            kw_node = kw_node[0]
        if kw_node == "dict":
            kw_pairs = kw_node[-1].attr

    defparams = []
    # FIXME: DRY with code below
    default, kw_args, annotate_argc = args_node.attr[0:3]
    if default:
        expr_node = node[0]
        if node[0] == "pos_arg":
            expr_node = expr_node[0]
        assert expr_node == "expr", "expecting mkfunc default node to be an expr"
        if expr_node[0] == "LOAD_CONST" and isinstance(expr_node[0].attr, tuple):
            defparams = [repr(a) for a in expr_node[0].attr]
        elif expr_node[0] in frozenset(("list", "tuple", "dict", "set")):
            defparams = [self.traverse(n, indent="") for n in expr_node[0][:-1]]
    else:
        defparams = []
    pass

    if lambda_index and is_lambda and iscode(node[lambda_index].attr):
        assert node[lambda_index].kind == "LOAD_LAMBDA"
        code = node[lambda_index].attr
    else:
        code = code_node.attr

    assert iscode(code)
    scanner_code = Code(code, self.scanner, self.currentclass)

    # add defaults values to parameter names
    argc = code.co_argcount
    kwonlyargcount = code.co_kwonlyargcount

    paramnames = list(scanner_code.co_varnames[:argc])
    kwargs = list(scanner_code.co_varnames[argc : argc + kwonlyargcount])

    paramnames.reverse()
    defparams.reverse()

    try:
        ast = self.build_ast(
            scanner_code._tokens,
            scanner_code._customize,
            is_lambda=is_lambda,
            noneInNames=("None" in code.co_names),
        )
    except (ParserError, ParserError2) as p:
        self.write(str(p))
        if not self.tolerate_errors:
            self.ERROR = p
        return

    i = len(paramnames) - len(defparams)

    # build parameters
    params = []
    if defparams:
        for i, defparam in enumerate(defparams):
            params.append(
                build_param(
                    ast, paramnames[i], defparam, annotate_dict.get(paramnames[i])
                )
            )

        for param in paramnames[i + 1 :]:
            if param in annotate_dict:
                params.append("%s: %s" % (param, annotate_dict[param]))
            else:
                params.append(param)
    else:
        for param in paramnames:
            if param in annotate_dict:
                params.append("%s: %s" % (param, annotate_dict[param]))
            else:
                params.append(param)

    params.reverse()  # back to correct order

    if code_has_star_arg(code):
        star_arg = code.co_varnames[argc + kwonlyargcount]
        if star_arg in annotate_dict:
            params.append("*%s: %s" % (star_arg, annotate_dict[star_arg]))
        else:
            params.append("*%s" % star_arg)

        argc += 1

    # dump parameter list (with default values)
    if is_lambda:
        self.write("lambda")
        if len(params):
            self.write(" ", ", ".join(params))
        elif kwonlyargcount > 0 and not (4 & code.co_flags):
            assert argc == 0
            self.write(" ")

        # If the last statement is None (which is the
        # same thing as "return None" in a lambda) and the
        # next to last statement is a "yield". Then we want to
        # drop the (return) None since that was just put there
        # to have something to after the yield finishes.
        # FIXME: this is a bit hoaky and not general
        if (
            len(ast) > 1
            and self.traverse(ast[-1]) == "None"
            and self.traverse(ast[-2]).strip().startswith("yield")
        ):
            del ast[-1]
            # Now pick out the expr part of the last statement
            ast_expr = ast[-1]
            while ast_expr.kind != "expr":
                ast_expr = ast_expr[0]
            ast[-1] = ast_expr
            pass
    else:
        self.write("(", ", ".join(params))
    # self.println(indent, '#flags:\t', int(code.co_flags))

    ends_in_comma = False
    if kwonlyargcount > 0:
        if not (4 & code.co_flags):
            if argc > 0:
                self.write(", *, ")
            else:
                self.write("*, ")
            pass
            ends_in_comma = True
        else:
            if argc > 0:
                self.write(", ")
                ends_in_comma = True

        ann_dict = kw_dict = default_tup = None
        fn_bits = node[-1].attr
        # Skip over:
        #  MAKE_FUNCTION,
        #  optional docstring
        #  LOAD_CONST qualified name,
        #  LOAD_CONST code object
        index = -5 if node[-2] == "docstring" else -4
        if fn_bits[-1]:
            index -= 1
        if fn_bits[-2]:
            ann_dict = node[index]
            index -= 1
        if fn_bits[-3]:
            kw_dict = node[index]
            index -= 1
        if fn_bits[-4]:
            default_tup = node[index]

        if kw_dict == "expr":
            kw_dict = kw_dict[0]

        kw_args = [None] * kwonlyargcount

        # FIXME: handle free_tup, ann_dict, and default_tup
        if kw_dict:
            assert kw_dict == "dict"
            defaults = [self.traverse(n, indent="") for n in kw_dict[:-2]]
            names = eval(self.traverse(kw_dict[-2]))
            assert len(defaults) == len(names)
            sep = ""
            # FIXME: possibly handle line breaks
            for i, n in enumerate(names):
                idx = kwargs.index(n)
                if annotate_dict and n in annotate_dict:
                    t = "%s: %s=%s" % (n, annotate_dict[n], defaults[i])
                else:
                    t = "%s=%s" % (n, defaults[i])
                kw_args[idx] = t
                pass
            pass
        # handle others
        other_kw = [c == None for c in kw_args]

        for i, flag in enumerate(other_kw):
            if flag:
                n = kwargs[i]
                if n in annotate_dict:
                    kw_args[i] = "%s: %s" % (n, annotate_dict[n])
                else:
                    kw_args[i] = "%s" % n

        self.write(", ".join(kw_args))
        ends_in_comma = False
        pass
    else:
        if argc == 0:
            ends_in_comma = True

    if code_has_star_star_arg(code):
        if not ends_in_comma:
            self.write(", ")
        star_star_arg = code.co_varnames[argc + kwonlyargcount]
        if annotate_dict and star_star_arg in annotate_dict:
            self.write("**%s: %s" % (star_star_arg, annotate_dict[star_star_arg]))
        else:
            self.write("**%s" % star_star_arg)

    if is_lambda:
        self.write(": ")
    else:
        self.write(")")
        if annotate_dict and "return" in annotate_dict:
            self.write(" -> %s" % annotate_dict["return"])
        self.println(":")

    if (
        node[-2] == "docstring" and not is_lambda
    ):
        # docstring exists, dump it
        self.println(self.traverse(node[-2]))

    assert ast in ("stmts", "lambda_start")

    all_globals = find_all_globals(ast, set())
    globals, nonlocals = find_globals_and_nonlocals(
        ast, set(), set(), code, self.version
    )

    for g in sorted((all_globals & self.mod_globs) | globals):
        self.println(self.indent, "global ", g)

    for nl in sorted(nonlocals):
        self.println(self.indent, "nonlocal ", nl)

    self.mod_globs -= all_globals
    has_none = "None" in code.co_names
    rn = has_none and not find_none(ast)
    self.gen_source(
        ast, code.co_name, scanner_code._customize, is_lambda=is_lambda, returnNone=rn
    )

    # In obscure cases, a function may be a generator but the "yield"
    # was optimized away. Here, we need to put in unreachable code to
    # add in "yield" just so that the compiler will mark
    # the GENERATOR bit of the function. See for example
    # Python 3.x's test_connection.py and test_contexlib_async test programs.
    if not is_lambda and code.co_flags & (CO_GENERATOR | CO_ASYNC_GENERATOR):
        need_bogus_yield = True
        for token in scanner_code._tokens:
            if token == "YIELD_VALUE":
                need_bogus_yield = False
                break
            pass
        if need_bogus_yield:
            self.template_engine(("%|if False:\n%+%|yield None%-",), node)

    scanner_code._tokens = None # save memory
    scanner_code._customize = None  # save memory
Exemplo n.º 15
0
    def __init__(
        self,
        name,
        code,
        globs,
        argdefs,
        closure=None,
        vm=None,
        kwdefaults={},
        annotations={},
        doc=None,
        qualname=None,
    ):
        self._vm = vm
        self.version = vm.version
        self.__doc__ = doc

        if not name is None and not isinstance(name, str):
            raise TypeError(
                "Function() argument 1 (name) must None or string, not %s" %
                type(name))

        if not iscode(code):
            raise TypeError(
                "Function() argument 2 (code) must be code, not %s" %
                type(code))

        if not isinstance(globs, dict):
            raise TypeError(
                "Function() argument 3 (argdefs) must be dict, not %s" %
                type(globs))

        if closure is not None and not isinstance(closure, tuple):
            raise TypeError(
                "Function() argument 5 (closure) must None or tuple, not %s" %
                type(closure))

        if not vm:
            raise TypeError("Function() argument 6 (vm) must be passed")

        # Function field names below change between Python 2.7 and 3.x.
        # We create attributes for both names. Other code in this file assumes
        # 2.7ish names, while bytecode for 3.x will use 3.x names.
        # TODO: be more stringent based on vm version.
        self.func_code = self.__code__ = code
        self.func_name = self.__name__ = name or code.co_name
        self.func_defaults = self.__defaults__ = tuple(
            argdefs) if argdefs else tuple()
        self.func_closure = self.__closure__ = closure

        self.func_globals = globs
        self.func_locals = vm.frame.f_locals
        self.__dict__ = {
            "version": vm.version,
            "_vm": vm,
        }

        self.__doc__ = (code.co_consts[0] if hasattr(code, "co_consts")
                        and code.co_consts else None)

        if vm.version >= 3.0:
            self.__annotations__ = annotations
            self.__kwdefaults__ = kwdefaults
            if vm.version >= 3.4:
                self.__qualname__ = qualname if qualname else self.__name__
            else:
                assert qualname is None
        else:
            assert annotations == {}
            assert kwdefaults == {}

        # In Python 3.x is varous generators and list comprehensions have a .0 arg
        # but inspect doesn't show that. In the various MAKE_FUNCTION routines,
        # we will detect this and store True in this field when appropriate.
        if not argdefs and self.__name__.split(
                ".")[-1] in COMPREHENSION_FN_NAMES:
            self.has_dot_zero = True
        else:
            self.has_dot_zero = False

        # From byterun.py:
        #   Sometimes, we need a real Python function. This is for that.
        #
        #
        # An elaboration of the above pity comment may be helpful.
        # Until this project emulates more functions, we rely heavily
        # on some built-in, or standard library
        # functions. `__build_class__` is an example of a builtin;
        # `import` is another example.  Many of Python's standard
        # library inspect routines require native functions, not our
        # emulated classes and types.
        #
        # For the `inspect` module, we've started providing equivalent
        # alternatives, but overall more of this needs to be done.
        #
        # The intent in providing native functions is for use in type
        # testing, mostly. The functios should not be run, since that defeats our
        # ability to trace functions.
        kw = {
            "argdefs": self.func_defaults,
        }
        if closure:
            kw["closure"] = tuple(make_cell(0) for _ in closure)

        if not isinstance(code, types.CodeType) and hasattr(code, "to_native"):
            try:
                code = code.to_native()
            except:
                pass

        if isinstance(code, types.CodeType):
            try:
                self._func = types.FunctionType(code, globs, **kw)
                if vm.version >= 3.0:
                    # Above, types.FunctionType() above doesn't allow passing
                    # in the following attributes, so we set them as
                    # assignments below.
                    self._func.__kwdefaults__ = kwdefaults
                    self._func.__annotations__ = annotations
                    pass
            except:
                self._func = None
        else:
            # cross version interpreting... FIXME: fix this up
            self._func = None
Exemplo n.º 16
0
def make_function2(self, node, is_lambda, nested=1, code_node=None):
    """
    Dump function defintion, doc string, and function body.
    This code is specialied for Python 2.
    """
    def build_param(ast, name, default):
        """build parameters:
            - handle defaults
            - handle format tuple parameters
        """
        # if formal parameter is a tuple, the paramater name
        # starts with a dot (eg. '.1', '.2')
        if name.startswith("."):
            # replace the name with the tuple-string
            name = self.get_tuple_parameter(ast, name)
            pass

        if default:
            value = self.traverse(default, indent="")
            maybe_show_tree_param_default(self.showast, name, value)
            result = "%s=%s" % (name, value)
            if result[-2:] == "= ":  # default was 'LOAD_CONST None'
                result += "None"
            return result
        else:
            return name

    # MAKE_FUNCTION_... or MAKE_CLOSURE_...
    assert node[-1].kind.startswith("MAKE_")

    args_node = node[-1]
    if isinstance(args_node.attr, tuple):
        # positional args are after kwargs
        defparams = node[1:args_node.attr[0] + 1]
        pos_args, kw_args, annotate_argc = args_node.attr
    else:
        defparams = node[:args_node.attr]
        kw_args = 0
        pass

    lambda_index = None

    if lambda_index and is_lambda and iscode(node[lambda_index].attr):
        assert node[lambda_index].kind == "LOAD_LAMBDA"
        code = node[lambda_index].attr
    else:
        code = code_node.attr

    assert iscode(code)
    code = Code(code, self.scanner, self.currentclass)

    # add defaults values to parameter names
    argc = code.co_argcount
    paramnames = list(code.co_varnames[:argc])

    # defaults are for last n parameters, thus reverse
    paramnames.reverse()
    defparams.reverse()

    try:
        ast = self.build_ast(
            code._tokens,
            code._customize,
            is_lambda=is_lambda,
            noneInNames=("None" in code.co_names),
        )
    except (ParserError, ParserError2) as p:
        self.write(str(p))
        if not self.tolerate_errors:
            self.ERROR = p
        return

    kw_pairs = 0
    indent = self.indent

    # build parameters
    params = [
        build_param(ast, name, default)
        for name, default in zip_longest(paramnames, defparams, fillvalue=None)
    ]
    params.reverse()  # back to correct order

    if code_has_star_arg(code):
        params.append("*%s" % code.co_varnames[argc])
        argc += 1

    # dump parameter list (with default values)
    if is_lambda:
        self.write("lambda ", ", ".join(params))
        # If the last statement is None (which is the
        # same thing as "return None" in a lambda) and the
        # next to last statement is a "yield". Then we want to
        # drop the (return) None since that was just put there
        # to have something to after the yield finishes.
        # FIXME: this is a bit hoaky and not general
        if (len(ast) > 1 and self.traverse(ast[-1]) == "None"
                and self.traverse(ast[-2]).strip().startswith("yield")):
            del ast[-1]
            # Now pick out the expr part of the last statement
            ast_expr = ast[-1]
            while ast_expr.kind != "expr":
                ast_expr = ast_expr[0]
            ast[-1] = ast_expr
            pass
    else:
        self.write("(", ", ".join(params))

    if kw_args > 0:
        if not (4 & code.co_flags):
            if argc > 0:
                self.write(", *, ")
            else:
                self.write("*, ")
            pass
        else:
            self.write(", ")

        for n in node:
            if n == "pos_arg":
                continue
            else:
                self.preorder(n)
            break
        pass

    if code_has_star_star_arg(code):
        if argc > 0:
            self.write(", ")
        self.write("**%s" % code.co_varnames[argc + kw_pairs])

    if is_lambda:
        self.write(": ")
    else:
        self.println("):")

    if (len(code.co_consts) > 0 and code.co_consts[0] is not None
            and not is_lambda):  # ugly
        # docstring exists, dump it
        print_docstring(self, indent, code.co_consts[0])

    if not is_lambda:
        assert ast == "stmts"

    all_globals = find_all_globals(ast, set())

    globals, nonlocals = find_globals_and_nonlocals(ast, set(), set(), code,
                                                    self.version)

    # Python 2 doesn't support the "nonlocal" statement
    assert self.version >= 3.0 or not nonlocals

    for g in sorted((all_globals & self.mod_globs) | globals):
        self.println(self.indent, "global ", g)
    self.mod_globs -= all_globals
    has_none = "None" in code.co_names
    rn = has_none and not find_none(ast)
    self.gen_source(ast,
                    code.co_name,
                    code._customize,
                    is_lambda=is_lambda,
                    returnNone=rn)

    code._tokens = None  # save memory
    code._customize = None  # save memory
Exemplo n.º 17
0
    def n_classdef3(node):
        """Handle "classdef" nonterminal for 3.0 >= version 3.0 < 3.6
        """

        assert (3, 0) <= self.version < (3, 6)

        # class definition ('class X(A,B,C):')
        cclass = self.currentclass

        # Pick out various needed bits of information
        # * class_name - the name of the class
        # * subclass_info - the parameters to the class  e.g.
        #      class Foo(bar, baz)
        #               ----------
        # * subclass_code - the code for the subclass body
        subclass_info = None
        if node == "classdefdeco2":
            if self.version < (3, 4):
                class_name = node[2][0].attr
            else:
                class_name = node[1][2].attr
            build_class = node
        else:
            build_class = node[0]
            class_name = node[1][0].attr
            build_class = node[0]

        assert "mkfunc" == build_class[1]
        mkfunc = build_class[1]
        if mkfunc[0] in ("kwargs", "no_kwargs"):
            if (3, 0) <= self.version < (3, 3):
                for n in mkfunc:
                    if hasattr(n, "attr") and iscode(n.attr):
                        subclass_code = n.attr
                        break
                    elif n == "expr":
                        subclass_code = n[0].attr
                    pass
                pass
            else:
                for n in mkfunc:
                    if hasattr(n, "attr") and iscode(n.attr):
                        subclass_code = n.attr
                        break
                    pass
                pass
            if node == "classdefdeco2":
                subclass_info = node
            else:
                subclass_info = node[0]
        elif build_class[1][0] == "load_closure":
            # Python 3 with closures not functions
            load_closure = build_class[1]
            if hasattr(load_closure[-3], "attr"):
                # Python 3.3 classes with closures work like this.
                # Note have to test before 3.2 case because
                # index -2 also has an attr.
                subclass_code = find_code_node(load_closure, -3).attr
            elif hasattr(load_closure[-2], "attr"):
                # Python 3.2 works like this
                subclass_code = find_code_node(load_closure, -2).attr
            else:
                raise "Internal Error n_classdef: cannot find class body"
            if hasattr(build_class[3], "__len__"):
                if not subclass_info:
                    subclass_info = build_class[3]
            elif hasattr(build_class[2], "__len__"):
                subclass_info = build_class[2]
            else:
                raise "Internal Error n_classdef: cannot superclass name"
        elif not subclass_info:
            if mkfunc[0] in ("no_kwargs", "kwargs"):
                subclass_code = mkfunc[1].attr
            else:
                subclass_code = mkfunc[0].attr
            if node == "classdefdeco2":
                subclass_info = node
            else:
                subclass_info = node[0]

        if node == "classdefdeco2":
            self.write("\n")
        else:
            self.write("\n\n")

        self.currentclass = str(class_name)
        self.write(self.indent, "class ", self.currentclass)

        self.print_super_classes3(subclass_info)
        self.println(":")

        # class body
        self.indent_more()
        self.build_class(subclass_code)
        self.indent_less()

        self.currentclass = cclass
        if len(self.param_stack) > 1:
            self.write("\n\n")
        else:
            self.write("\n\n\n")

        self.prune()
Exemplo n.º 18
0
def make_function36(self, node, is_lambda, nested=1, code_node=None):
    """Dump function definition, doc string, and function body in
      Python version 3.6 and above.
    """
    # MAKE_CLOSURE adds an additional closure slot

    # In Python 3.6 and above stack change again. I understand
    # 3.7 changes some of those changes, although I don't
    # see it in this code yet. Yes, it is hard to follow
    # and I am sure I haven't been able to keep up.

    # Thank you, Python.

    def build_param(ast, name, default, annotation=None):
        """build parameters:
            - handle defaults
            - handle format tuple parameters
        """
        value = default
        maybe_show_tree_param_default(self.showast, name, value)
        if annotation:
            result = "%s: %s=%s" % (name, annotation, value)
        else:
            result = "%s=%s" % (name, value)

        # The below can probably be removed. This is probably
        # a holdover from days when LOAD_CONST erroneously
        # didn't handle LOAD_CONST None properly
        if result[-2:] == "= ":  # default was 'LOAD_CONST None'
            result += "None"

        return result

    # MAKE_FUNCTION_... or MAKE_CLOSURE_...
    assert node[-1].kind.startswith("MAKE_")

    # Python 3.3+ adds a qualified name at TOS (-1)
    # moving down the LOAD_LAMBDA instruction
    lambda_index = -3

    args_node = node[-1]

    annotate_dict = {}

    # Get a list of tree nodes that constitute the values for the "default
    # parameters"; these are default values that appear before any *, and are
    # not to be confused with keyword parameters which may appear after *.
    args_attr = args_node.attr

    if len(args_attr) == 3:
        pos_args, kw_args, annotate_argc = args_attr
    else:
        pos_args, kw_args, annotate_argc, closure = args_attr

    if node[-2] != "docstring":
        i = -4
    else:
        i = -5

    kw_pairs = 0
    if annotate_argc:
        # Turn into subroutine and DRY with other use
        annotate_node = node[i]
        if annotate_node == "expr":
            annotate_node = annotate_node[0]
            annotate_name_node = annotate_node[-1]
            if annotate_node == "dict" and annotate_name_node.kind.startswith(
                "BUILD_CONST_KEY_MAP"
            ):
                types = [self.traverse(n, indent="") for n in annotate_node[:-2]]
                names = annotate_node[-2].attr
                l = len(types)
                assert l == len(names)
                for i in range(l):
                    annotate_dict[names[i]] = types[i]
                pass
            pass
        i -= 1

    if closure:
        # FIXME: fill in
        # annotate = node[i]
        i -= 1

    if kw_args:
        kw_node = node[pos_args]
        if kw_node == "expr":
            kw_node = kw_node[0]
        if kw_node == "dict":
            kw_pairs = kw_node[-1].attr

    defparams = []
    # FIXME: DRY with code below
    default, kw_args, annotate_argc = args_node.attr[0:3]
    if default:
        expr_node = node[0]
        if node[0] == "pos_arg":
            expr_node = expr_node[0]
        assert expr_node == "expr", "expecting mkfunc default node to be an expr"
        if expr_node[0] == "LOAD_CONST" and isinstance(expr_node[0].attr, tuple):
            defparams = [repr(a) for a in expr_node[0].attr]
        elif expr_node[0] in frozenset(("list", "tuple", "dict", "set")):
            defparams = [self.traverse(n, indent="") for n in expr_node[0][:-1]]
    else:
        defparams = []
    pass

    if lambda_index and is_lambda and iscode(node[lambda_index].attr):
        assert node[lambda_index].kind == "LOAD_LAMBDA"
        code = node[lambda_index].attr
    else:
        code = code_node.attr

    assert iscode(code)
    scanner_code = Code(code, self.scanner, self.currentclass)

    # add defaults values to parameter names
    argc = code.co_argcount
    kwonlyargcount = code.co_kwonlyargcount

    paramnames = list(scanner_code.co_varnames[:argc])
    kwargs = list(scanner_code.co_varnames[argc : argc + kwonlyargcount])

    paramnames.reverse()
    defparams.reverse()

    try:
        ast = self.build_ast(
            scanner_code._tokens,
            scanner_code._customize,
            scanner_code,
            is_lambda=is_lambda,
            noneInNames=("None" in code.co_names),
        )
    except ParserError, p:
        self.write(str(p))
        if not self.tolerate_errors:
            self.ERROR = p
        return
Exemplo n.º 19
0
    def listcomp_closure3(node):
        """List comprehensions in Python 3 when handled as a closure.
        See if we can combine code.
        """
        p = self.prec
        self.prec = 27

        code_obj = node[1].attr
        assert iscode(code_obj)
        code = Code(code_obj, self.scanner, self.currentclass)
        ast = self.build_ast(code._tokens, code._customize)
        self.customize(code._customize)

        # skip over: sstmt, stmt, return, ret_expr
        # and other singleton derivations
        while len(ast) == 1 or (ast in ("sstmt", "return") and ast[-1]
                                in ("RETURN_LAST", "RETURN_VALUE")):
            self.prec = 100
            ast = ast[0]

        n = ast[1]

        # collections is the name of the expression(s) we are iterating over
        collections = [node[-3]]
        list_ifs = []

        if self.version == 3.0 and n != "list_iter":
            # FIXME 3.0 is a snowflake here. We need
            # special code for this. Not sure if this is totally
            # correct.
            stores = [ast[3]]
            assert ast[4] == "comp_iter"
            n = ast[4]
            # Find the list comprehension body. It is the inner-most
            # node that is not comp_.. .
            while n == "comp_iter":
                if n[0] == "comp_for":
                    n = n[0]
                    stores.append(n[2])
                    n = n[3]
                elif n[0] in ("comp_if", "comp_if_not"):
                    n = n[0]
                    # FIXME: just a guess
                    if n[0].kind == "expr":
                        list_ifs.append(n)
                    else:
                        list_ifs.append([1])
                    n = n[2]
                    pass
                else:
                    break
                pass

            # Skip over n[0] which is something like: _[1]
            self.preorder(n[1])

        else:
            assert n == "list_iter"
            stores = []
            # Find the list comprehension body. It is the inner-most
            # node that is not list_.. .
            while n == "list_iter":

                # recurse one step
                n = n[0]

                if n == "list_for":
                    stores.append(n[2])
                    n = n[3]
                    if n[0] == "list_for":
                        # Dog-paddle down largely singleton reductions
                        # to find the collection (expr)
                        c = n[0][0]
                        if c == "expr":
                            c = c[0]
                        # FIXME: grammar is wonky here? Is this really an attribute?
                        if c == "attribute":
                            c = c[0]
                        collections.append(c)
                        pass
                elif n in ("list_if", "list_if_not"):
                    # FIXME: just a guess
                    if n[0].kind == "expr":
                        list_ifs.append(n)
                    else:
                        list_ifs.append([1])
                    n = n[2]
                    pass
                elif n == "list_if37":
                    list_ifs.append(n)
                    n = n[-1]
                    pass
                elif n == "list_afor":
                    collections.append(n[0][0])
                    n = n[1]
                    stores.append(n[1][0])
                    n = n[3]
                pass

            assert n == "lc_body", ast

            self.preorder(n[0])

        # FIXME: add indentation around "for"'s and "in"'s
        n_colls = len(collections)
        for i, store in enumerate(stores):
            if i >= n_colls:
                break
            if collections[i] == "LOAD_DEREF" and co_flags_is_async(
                    code_obj.co_flags):
                self.write(" async")
                pass
            self.write(" for ")
            self.preorder(store)
            self.write(" in ")
            self.preorder(collections[i])
            if i < len(list_ifs):
                self.preorder(list_ifs[i])
                pass
            pass
        self.prec = p
Exemplo n.º 20
0
def decompile(
    bytecode_version: str,
    co,
    out=None,
    showasm=None,
    showast={},
    timestamp=None,
    showgrammar=False,
    source_encoding=None,
    code_objects={},
    source_size=None,
    is_pypy=None,
    magic_int=None,
    mapstream=None,
    do_fragments=False,
    compile_mode="exec",
) -> Any:
    """
    ingests and deparses a given code block 'co'

    if `bytecode_version` is None, use the current Python intepreter
    version.

    Caller is responsible for closing `out` and `mapstream`
    """
    if bytecode_version is None:
        bytecode_version = sysinfo2float()

    # store final output stream for case of error
    real_out = out or sys.stdout

    def write(s):
        s += "\n"
        real_out.write(s)

    assert iscode(co)

    co_pypy_str = "PyPy " if is_pypy else ""
    run_pypy_str = "PyPy " if IS_PYPY else ""
    sys_version_lines = sys.version.split("\n")
    if source_encoding:
        write("# -*- coding: %s -*-" % source_encoding)
    write("# decompyle3 version %s\n"
          "# %sPython bytecode %s%s\n# Decompiled from: %sPython %s" % (
              VERSION,
              co_pypy_str,
              bytecode_version,
              " (%s)" % str(magic_int) if magic_int else "",
              run_pypy_str,
              "\n# ".join(sys_version_lines),
          ))
    if co.co_filename:
        write("# Embedded file name: %s" % co.co_filename)
    if timestamp:
        write("# Compiled at: %s" % datetime.datetime.fromtimestamp(timestamp))
    if source_size:
        write("# Size of source mod 2**32: %d bytes" % source_size)

    debug_opts = {"asm": showasm, "ast": showast, "grammar": showgrammar}

    try:
        if mapstream:
            if isinstance(mapstream, str):
                mapstream = _get_outstream(mapstream)

            deparsed = deparse_code_with_map(
                bytecode_version,
                co,
                out,
                showasm,
                showast,
                showgrammar,
                code_objects=code_objects,
                is_pypy=is_pypy,
            )
            header_count = 3 + len(sys_version_lines)
            linemap = [(line_no,
                        deparsed.source_linemap[line_no] + header_count)
                       for line_no in sorted(deparsed.source_linemap.keys())]
            mapstream.write("\n\n# %s\n" % linemap)
        else:
            if do_fragments:
                deparse_fn = code_deparse_fragments
            else:
                deparse_fn = code_deparse
            deparsed = deparse_fn(
                co,
                out,
                bytecode_version,
                debug_opts=debug_opts,
                is_pypy=is_pypy,
                compile_mode=compile_mode,
            )
            pass
        return deparsed
    except pysource.SourceWalkerError as e:
        # deparsing failed
        raise pysource.SourceWalkerError(str(e))
Exemplo n.º 21
0
    def ingest(self, co, classname=None, code_objects={}, show_asm=None):
        """
        Pick out tokens from an decompyle3 code object, and transform them,
        returning a list of decompyle3 Token's.

        The transformations are made to assist the deparsing grammar.
        Specificially:
           -  various types of LOAD_CONST's are categorized in terms of what they load
           -  COME_FROM instructions are added to assist parsing control structures
           -  MAKE_FUNCTION and FUNCTION_CALLS append the number of positional arguments
           -  some EXTENDED_ARGS instructions are removed

        Also, when we encounter certain tokens, we add them to a set which will cause custom
        grammar rules. Specifically, variable arg tokens like MAKE_FUNCTION or BUILD_LIST
        cause specific rules for the specific number of arguments they take.
        """

        def tokens_append(j, token):
            tokens.append(token)
            self.offset2tok_index[token.offset] = j
            j += 1
            assert j == len(tokens)
            return j

        if not show_asm:
            show_asm = self.show_asm

        bytecode = self.build_instructions(co)

        # show_asm = 'both'
        if show_asm in ("both", "before"):
            for instr in bytecode.get_instructions(co):
                print(instr.disassemble())

        # "customize" is in the process of going away here
        customize = {}

        if self.is_pypy:
            customize["PyPy"] = 0

        # Scan for assertions. Later we will
        # turn 'LOAD_GLOBAL' to 'LOAD_ASSERT'.
        # 'LOAD_ASSERT' is used in assert statements.
        self.load_asserts = set()

        # list of tokens/instructions
        tokens = []
        self.offset2tok_index = {}

        n = len(self.insts)
        for i, inst in enumerate(self.insts):

            # We need to detect the difference between:
            #   raise AssertionError
            #  and
            #   assert ...
            # If we have a JUMP_FORWARD after the
            # RAISE_VARARGS then we have a "raise" statement
            # else we have an "assert" statement.
            assert_can_follow = inst.opname == "POP_JUMP_IF_TRUE" and i + 1 < n
            if assert_can_follow:
                next_inst = self.insts[i + 1]
                if (
                    next_inst.opname == "LOAD_GLOBAL"
                    and next_inst.argval == "AssertionError"
                ):
                    raise_idx = self.offset2inst_index[self.prev_op[inst.argval]]
                    raise_inst = self.insts[raise_idx]
                    if raise_inst.opname.startswith("RAISE_VARARGS"):
                        self.load_asserts.add(next_inst.offset)
                    pass
                pass

        # Operand values in Python wordcode are small. As a result,
        # there are these EXTENDED_ARG instructions - way more than
        # before 3.6. These parsing a lot of pain.

        # To simplify things we want to untangle this. We also
        # do this loop before we compute jump targets.
        for i, inst in enumerate(self.insts):

            # One artifact of the "too-small" operand problem, is that
            # some backward jumps, are turned into forward jumps to another
            # "extended arg" backward jump to the same location.
            if inst.opname == "JUMP_FORWARD":
                jump_inst = self.insts[self.offset2inst_index[inst.argval]]
                if jump_inst.has_extended_arg and jump_inst.opname.startswith("JUMP"):
                    # Create comination of the jump-to instruction and
                    # this one. Keep the position information of this instruction,
                    # but the operator and operand properties come from the other
                    # instruction
                    self.insts[i] = Instruction(
                        jump_inst.opname,
                        jump_inst.opcode,
                        jump_inst.optype,
                        jump_inst.inst_size,
                        jump_inst.arg,
                        jump_inst.argval,
                        jump_inst.argrepr,
                        jump_inst.has_arg,
                        inst.offset,
                        inst.starts_line,
                        inst.is_jump_target,
                        inst.has_extended_arg,
                    )

        # Get jump targets
        # Format: {target offset: [jump offsets]}
        jump_targets = self.find_jump_targets(show_asm)
        # print("XXX2", jump_targets)

        last_op_was_break = False

        j = 0
        for i, inst in enumerate(self.insts):

            argval = inst.argval
            op = inst.opcode

            if inst.opname == "EXTENDED_ARG":
                # FIXME: The EXTENDED_ARG is used to signal annotation
                # parameters
                if i + 1 < n and self.insts[i + 1].opcode != self.opc.MAKE_FUNCTION:
                    continue

            if inst.offset in jump_targets:
                jump_idx = 0
                # We want to process COME_FROMs to the same offset to be in *descending*
                # offset order so we have the larger range or biggest instruction interval
                # last. (I think they are sorted in increasing order, but for safety
                # we sort them). That way, specific COME_FROM tags will match up
                # properly. For example, a "loop" with an "if" nested in it should have the
                # "loop" tag last so the grammar rule matches that properly.
                for jump_offset in sorted(jump_targets[inst.offset], reverse=True):
                    come_from_name = "COME_FROM"

                    opname = self.opname_for_offset(jump_offset)
                    if opname == "EXTENDED_ARG":
                        k = xdis.next_offset(op, self.opc, jump_offset)
                        opname = self.opname_for_offset(k)

                    if opname.startswith("SETUP_"):
                        come_from_type = opname[len("SETUP_") :]
                        come_from_name = "COME_FROM_%s" % come_from_type
                        pass
                    elif inst.offset in self.except_targets:
                        come_from_name = "COME_FROM_EXCEPT_CLAUSE"
                    j = tokens_append(
                        j,
                        Token(
                            come_from_name,
                            jump_offset,
                            repr(jump_offset),
                            offset="%s_%s" % (inst.offset, jump_idx),
                            has_arg=True,
                            opc=self.opc,
                            has_extended_arg=False,
                        ),
                    )
                    jump_idx += 1
                    pass
                pass

            pattr = inst.argrepr
            opname = inst.opname

            if op in self.opc.CONST_OPS:
                const = argval
                if iscode(const):
                    if const.co_name == "<lambda>":
                        assert opname == "LOAD_CONST"
                        opname = "LOAD_LAMBDA"
                    elif const.co_name == "<genexpr>":
                        opname = "LOAD_GENEXPR"
                    elif const.co_name == "<dictcomp>":
                        opname = "LOAD_DICTCOMP"
                    elif const.co_name == "<setcomp>":
                        opname = "LOAD_SETCOMP"
                    elif const.co_name == "<listcomp>":
                        opname = "LOAD_LISTCOMP"
                    else:
                        opname = "LOAD_CODE"
                    # verify() uses 'pattr' for comparison, since 'attr'
                    # now holds Code(const) and thus can not be used
                    # for comparison (todo: think about changing this)
                    # pattr = 'code_object @ 0x%x %s->%s' %\
                    # (id(const), const.co_filename, const.co_name)
                    pattr = "<code_object " + const.co_name + ">"
                elif isinstance(const, str):
                    opname = "LOAD_STR"
                else:
                    if isinstance(inst.arg, int) and inst.arg < len(co.co_consts):
                        argval, _ = _get_const_info(inst.arg, co.co_consts)
                    # Why don't we use _ above for "pattr" rather than "const"?
                    # This *is* a little hoaky, but we have to coordinate with
                    # other parts like n_LOAD_CONST in pysource.py for example.
                    pattr = const
                    pass
            elif opname == "IMPORT_NAME":
                if "." in inst.argval:
                    opname = "IMPORT_NAME_ATTR"
                    pass
            elif opname in ("MAKE_FUNCTION", "MAKE_CLOSURE"):
                flags = argval
                opname = "MAKE_FUNCTION_%d" % (flags)
                attr = []
                for flag in self.MAKE_FUNCTION_FLAGS:
                    bit = flags & 1
                    attr.append(bit)
                    flags >>= 1
                attr = attr[:4]  # remove last value: attr[5] == False
                j = tokens_append(
                    j,
                    Token(
                        opname=opname,
                        attr=attr,
                        pattr=pattr,
                        offset=inst.offset,
                        linestart=inst.starts_line,
                        op=op,
                        has_arg=inst.has_arg,
                        opc=self.opc,
                        has_extended_arg=inst.has_extended_arg,
                    ),
                )
                continue
            elif op in self.varargs_ops:
                pos_args = argval
                if self.is_pypy and not pos_args and opname == "BUILD_MAP":
                    opname = "BUILD_MAP_n"
                else:
                    opname = "%s_%d" % (opname, pos_args)

            elif self.is_pypy and opname == "JUMP_IF_NOT_DEBUG":
                # The value in the dict is in special cases in semantic actions, such
                # as JUMP_IF_NOT_DEBUG. The value is not used in these cases, so we put
                # in arbitrary value 0.
                customize[opname] = 0
            elif opname == "UNPACK_EX":
                # FIXME: try with scanner and parser by
                # changing argval
                before_args = argval & 0xFF
                after_args = (argval >> 8) & 0xFF
                pattr = "%d before vararg, %d after" % (before_args, after_args)
                argval = (before_args, after_args)
                opname = "%s_%d+%d" % (opname, before_args, after_args)

            elif op == self.opc.JUMP_ABSOLUTE:
                #  Refine JUMP_ABSOLUTE further in into:
                #
                # * "JUMP_BACK"    - which are are used in loops. This is sometimes
                #                   found at the end of a looping construct
                # * "BREAK_LOOP"  - which are are used to break loops.
                # * "CONTINUE"    - jumps which may appear in a "continue" statement.
                #                   It is okay to confuse this with JUMP_BACK. The
                #                   grammar should tolerate this.
                # * "JUMP_FORWARD - forward jumps that are not BREAK_LOOP jumps.
                #
                # The loop-type and continue-type jumps will help us
                # classify loop boundaries The continue-type jumps
                # help us get "continue" statements with would
                # otherwise be turned into a "pass" statement because
                # JUMPs are sometimes ignored in rules as just
                # boundary overhead. Again, in comprehensions we might
                # sometimes classify JUMP_BACK as CONTINUE, but that's
                # okay since grammar rules should tolerate that.
                pattr = argval
                target = self.get_target(inst.offset)
                if target <= inst.offset:
                    next_opname = self.insts[i + 1].opname

                    # 'Continue's include jumps to loops that are not
                    # and the end of a block which follow with POP_BLOCK and COME_FROM_LOOP.
                    # If the JUMP_ABSOLUTE is to a FOR_ITER and it is followed by another JUMP_FORWARD
                    # then we'll take it as a "continue".
                    is_continue = (
                        self.insts[self.offset2inst_index[target]].opname == "FOR_ITER"
                        and self.insts[i + 1].opname == "JUMP_FORWARD"
                    )

                    if self.version < 3.8 and (
                        is_continue
                        or (
                            inst.offset in self.stmts
                            and (
                                inst.starts_line
                                and next_opname not in self.not_continue_follow
                            )
                        )
                    ):
                        opname = "CONTINUE"
                    else:
                        opname = "JUMP_BACK"
                        # FIXME: this is a hack to catch stuff like:
                        #   if x: continue
                        # the "continue" is not on a new line.
                        # There are other situations where we don't catch
                        # CONTINUE as well.
                        if tokens[-1].kind == "JUMP_BACK" and tokens[-1].attr <= argval:
                            if tokens[-2].kind == "BREAK_LOOP":
                                del tokens[-1]
                            else:
                                # intern is used because we are changing the *previous* token.
                                # A POP_TOP suggests a "break" rather than a "continue"?
                                if tokens[-2] == "POP_TOP":
                                    tokens[-1].kind = sys.intern("BREAK_LOOP")
                                else:
                                    tokens[-1].kind = sys.intern("CONTINUE")
                                    pass
                                pass
                            pass
                    if last_op_was_break and opname == "CONTINUE":
                        last_op_was_break = False
                        continue
                    pass
                else:
                    opname = "JUMP_FORWARD"

            elif opname.startswith("POP_JUMP_IF_") and not inst.jumps_forward():
                opname += "_BACK"
            elif inst.offset in self.load_asserts:
                opname = "LOAD_ASSERT"

            last_op_was_break = opname == "BREAK_LOOP"
            j = tokens_append(
                j,
                Token(
                    opname=opname,
                    attr=argval,
                    pattr=pattr,
                    offset=inst.offset,
                    linestart=inst.starts_line,
                    op=op,
                    has_arg=inst.has_arg,
                    opc=self.opc,
                    has_extended_arg=inst.has_extended_arg,
                ),
            )
            pass

        if show_asm in ("both", "after"):
            for t in tokens:
                print(t.format(line_prefix=""))
            print()
        return tokens, customize
Exemplo n.º 22
0
def make_function3_annotate(self,
                            node,
                            is_lambda,
                            nested=1,
                            code_node=None,
                            annotate_last=-1):
    """
    Dump function defintion, doc string, and function
    body. This code is specialized for Python 3"""
    def build_param(ast, name, default):
        """build parameters:
            - handle defaults
            - handle format tuple parameters
        """
        if default:
            value = self.traverse(default, indent="")
            maybe_show_tree_param_default(self, name, value)
            result = "%s=%s" % (name, value)
            if result[-2:] == "= ":  # default was 'LOAD_CONST None'
                result += "None"
            return result
        else:
            return name

    # MAKE_FUNCTION_... or MAKE_CLOSURE_...
    assert node[-1].kind.startswith("MAKE_")

    annotate_tuple = None
    for annotate_last in range(len(node) - 1, -1, -1):
        if node[annotate_last] == "annotate_tuple":
            annotate_tuple = node[annotate_last]
            break
    annotate_args = {}

    if (annotate_tuple == "annotate_tuple"
            and annotate_tuple[0] in ("LOAD_CONST", "LOAD_NAME")
            and isinstance(annotate_tuple[0].attr, tuple)):
        annotate_tup = annotate_tuple[0].attr
        i = -1
        j = annotate_last - 1
        l = -len(node)
        while j >= l and node[j].kind in ("annotate_arg", "annotate_tuple"):
            annotate_args[annotate_tup[i]] = node[j][0]
            i -= 1
            j -= 1

    args_node = node[-1]
    if isinstance(args_node.attr, tuple):
        # positional args are before kwargs
        defparams = node[:args_node.attr[0]]
        pos_args, kw_args, annotate_argc = args_node.attr
        if "return" in annotate_args.keys():
            annotate_argc = len(annotate_args) - 1
    else:
        defparams = node[:args_node.attr]
        kw_args = 0
        annotate_argc = 0
        pass

    annotate_dict = {}

    for name in annotate_args.keys():
        n = self.traverse(annotate_args[name], indent="")
        annotate_dict[name] = n

    if 3.0 <= self.version <= 3.2:
        lambda_index = -2
    elif 3.03 <= self.version:
        lambda_index = -3
    else:
        lambda_index = None

    if lambda_index and is_lambda and iscode(node[lambda_index].attr):
        assert node[lambda_index].kind == "LOAD_LAMBDA"
        code = node[lambda_index].attr
    else:
        code = code_node.attr

    assert iscode(code)
    code = Code(code, self.scanner, self.currentclass)

    # add defaults values to parameter names
    argc = code.co_argcount
    kwonlyargcount = code.co_kwonlyargcount

    paramnames = list(code.co_varnames[:argc])
    if kwonlyargcount > 0:
        kwargs = list(code.co_varnames[argc:argc + kwonlyargcount])

    try:
        ast = self.build_ast(
            code._tokens,
            code._customize,
            is_lambda=is_lambda,
            noneInNames=("None" in code.co_names),
        )
    except (ParserError, ParserError2) as p:
        self.write(str(p))
        if not self.tolerate_errors:
            self.ERROR = p
        return

    kw_pairs = args_node.attr[1]
    indent = self.indent

    if is_lambda:
        self.write("lambda ")
    else:
        self.write("(")

    last_line = self.f.getvalue().split("\n")[-1]
    l = len(last_line)
    indent = " " * l
    line_number = self.line_number

    i = len(paramnames) - len(defparams)
    suffix = ""

    for param in paramnames[:i]:
        self.write(suffix, param)
        suffix = ", "
        if param in annotate_dict:
            self.write(": %s" % annotate_dict[param])
            if line_number != self.line_number:
                suffix = ",\n" + indent
                line_number = self.line_number
            # value, string = annotate_args[param]
            # if string:
            #     self.write(': "%s"' % value)
            # else:
            #     self.write(': %s' % value)

    suffix = ", " if i > 0 else ""
    for n in node:
        if n == "pos_arg":
            self.write(suffix)
            param = paramnames[i]
            self.write(param)
            if param in annotate_args:
                aa = annotate_args[param]
                if isinstance(aa, tuple):
                    aa = aa[0]
                    self.write(': "%s"' % aa)
                elif isinstance(aa, SyntaxTree):
                    self.write(": ")
                    self.preorder(aa)

            self.write("=")
            i += 1
            self.preorder(n)
            if line_number != self.line_number:
                suffix = ",\n" + indent
                line_number = self.line_number
            else:
                suffix = ", "

    if code_has_star_arg(code):
        star_arg = code.co_varnames[argc + kwonlyargcount]
        if annotate_dict and star_arg in annotate_dict:
            self.write(suffix, "*%s: %s" % (star_arg, annotate_dict[star_arg]))
        else:
            self.write(suffix, "*%s" % star_arg)
        argc += 1

    # self.println(indent, '#flags:\t', int(code.co_flags))
    ends_in_comma = False
    if kwonlyargcount > 0:
        if not code_has_star_arg(code):
            if argc > 0:
                self.write(", *, ")
            else:
                self.write("*, ")
            pass
            ends_in_comma = True
        else:
            if argc > 0:
                self.write(", ")
                ends_in_comma = True

        kw_args = [None] * kwonlyargcount

        for n in node:
            if n == "kwargs":
                n = n[0]
            if n == "kwarg":
                name = eval(n[0].pattr)
                idx = kwargs.index(name)
                default = self.traverse(n[1], indent="")
                if annotate_dict and name in annotate_dict:
                    kw_args[idx] = "%s: %s=%s" % (name, annotate_dict[name],
                                                  default)
                else:
                    kw_args[idx] = "%s=%s" % (name, default)
                pass
            pass

        # handling other args
        other_kw = [c == None for c in kw_args]
        for i, flag in enumerate(other_kw):
            if flag:
                n = kwargs[i]
                if n in annotate_dict:
                    kw_args[i] = "%s: %s" % (n, annotate_dict[n])
                else:
                    kw_args[i] = "%s" % n

        self.write(", ".join(kw_args))
        ends_in_comma = False

    else:
        if argc == 0:
            ends_in_comma = True

    if code_has_star_star_arg(code):
        if not ends_in_comma:
            self.write(", ")
        star_star_arg = code.co_varnames[argc + kwonlyargcount]
        if annotate_dict and star_star_arg in annotate_dict:
            self.write("**%s: %s" %
                       (star_star_arg, annotate_dict[star_star_arg]))
        else:
            self.write("**%s" % star_star_arg)

    if is_lambda:
        self.write(": ")
    else:
        self.write(")")
        if "return" in annotate_tuple[0].attr:
            if (line_number != self.line_number) and not no_paramnames:
                self.write("\n" + indent)
                line_number = self.line_number
            self.write(" -> ")
            if "return" in annotate_dict:
                self.write(annotate_dict["return"])
            else:
                # value, string = annotate_args['return']
                # if string:
                #     self.write(' -> "%s"' % value)
                # else:
                #     self.write(' -> %s' % value)
                self.preorder(node[annotate_last - 1])

        self.println(":")

    if (len(code.co_consts) > 0 and code.co_consts[0] is not None
            and not is_lambda):  # ugly
        # docstring exists, dump it
        print_docstring(self, self.indent, code.co_consts[0])

    code._tokens = None  # save memory
    assert ast == "stmts"

    all_globals = find_all_globals(ast, set())
    globals, nonlocals = find_globals_and_nonlocals(ast, set(), set(), code,
                                                    self.version)
    for g in sorted((all_globals & self.mod_globs) | globals):
        self.println(self.indent, "global ", g)
    for nl in sorted(nonlocals):
        self.println(self.indent, "nonlocal ", nl)
    self.mod_globs -= all_globals
    has_none = "None" in code.co_names
    rn = has_none and not find_none(ast)
    self.gen_source(ast,
                    code.co_name,
                    code._customize,
                    is_lambda=is_lambda,
                    returnNone=rn)
    code._tokens = code._customize = None  # save memory
Exemplo n.º 23
0
    def n_classdef36(node):
        # class definition ('class X(A,B,C):')
        cclass = self.currentclass

        # Pick out various needed bits of information
        # * class_name - the name of the class
        # * subclass_info - the parameters to the class  e.g.
        #      class Foo(bar, baz)
        #               ----------
        # * subclass_code - the code for the subclass body
        subclass_info = None
        if node == "classdefdeco2":
            if isinstance(node[1][1].attr, str):
                class_name = node[1][1].attr
            else:
                class_name = node[1][2].attr
            build_class = node
        else:
            build_class = node[0]
            if build_class == "build_class_kw":
                mkfunc = build_class[1]
                assert mkfunc == "mkfunc"
                subclass_info = build_class
                if hasattr(mkfunc[0], "attr") and iscode(mkfunc[0].attr):
                    subclass_code = mkfunc[0].attr
                else:
                    assert mkfunc[0] == "load_closure"
                    subclass_code = mkfunc[1].attr
                    assert iscode(subclass_code)
            if build_class[1][0] == "load_closure":
                code_node = build_class[1][1]
            else:
                code_node = build_class[1][0]
            class_name = code_node.attr.co_name

        assert "mkfunc" == build_class[1]
        mkfunc = build_class[1]
        if mkfunc[0] in ("kwargs", "no_kwargs"):
            for n in mkfunc:
                if hasattr(n, "attr") and iscode(n.attr):
                    subclass_code = n.attr
                    break
                pass
            if node == "classdefdeco2":
                subclass_info = node
            else:
                subclass_info = node[0]
        elif build_class[1][0] == "load_closure":
            # Python 3 with closures not functions
            load_closure = build_class[1]
            if hasattr(load_closure[-3], "attr"):
                # Python 3.3 classes with closures work like this.
                # Note have to test before 3.2 case because
                # index -2 also has an attr.
                subclass_code = load_closure[-3].attr
            elif hasattr(load_closure[-2], "attr"):
                # Python 3.2 works like this
                subclass_code = load_closure[-2].attr
            else:
                raise "Internal Error n_classdef: cannot find class body"
            if hasattr(build_class[3], "__len__"):
                if not subclass_info:
                    subclass_info = build_class[3]
            elif hasattr(build_class[2], "__len__"):
                subclass_info = build_class[2]
            else:
                raise "Internal Error n_classdef: cannot superclass name"
        elif node == "classdefdeco2":
            subclass_info = node
            subclass_code = build_class[1][0].attr
        elif not subclass_info:
            if mkfunc[0] in ("no_kwargs", "kwargs"):
                subclass_code = mkfunc[1].attr
            else:
                subclass_code = mkfunc[0].attr
            if node == "classdefdeco2":
                subclass_info = node
            else:
                subclass_info = node[0]

        if node == "classdefdeco2":
            self.write("\n")
        else:
            self.write("\n\n")

        self.currentclass = str(class_name)
        self.write(self.indent, "class ", self.currentclass)

        self.print_super_classes3(subclass_info)
        self.println(":")

        # class body
        self.indent_more()
        self.build_class(subclass_code)
        self.indent_less()

        self.currentclass = cclass
        if len(self.param_stack) > 1:
            self.write("\n\n")
        else:
            self.write("\n\n\n")

        self.prune()
Exemplo n.º 24
0
def make_function3_annotate(self,
                            node,
                            is_lambda,
                            nested=1,
                            code_node=None,
                            annotate_last=-1):
    """
    Dump function defintion, doc string, and function
    body. This code is specialized for Python 3"""
    def build_param(ast, name, default):
        """build parameters:
            - handle defaults
            - handle format tuple parameters
        """
        if default:
            value = self.traverse(default, indent="")
            maybe_show_tree_param_default(self, name, value)
            result = "%s=%s" % (name, value)
            if result[-2:] == "= ":  # default was 'LOAD_CONST None'
                result += "None"
            return result
        else:
            return name

    # MAKE_FUNCTION_... or MAKE_CLOSURE_...
    assert node[-1].kind.startswith("MAKE_")

    annotate_tuple = None
    for annotate_last in range(len(node) - 1, -1, -1):
        if node[annotate_last] == "annotate_tuple":
            annotate_tuple = node[annotate_last]
            break
    annotate_args = {}

    if (annotate_tuple == "annotate_tuple"
            and annotate_tuple[0] in ("LOAD_CONST", "LOAD_NAME")
            and isinstance(annotate_tuple[0].attr, tuple)):
        annotate_tup = annotate_tuple[0].attr
        i = -1
        j = annotate_last - 1
        l = -len(node)
        while j >= l and node[j].kind in ("annotate_arg", "annotate_tuple"):
            annotate_args[annotate_tup[i]] = node[j][0]
            i -= 1
            j -= 1

    args_node = node[-1]
    if isinstance(args_node.attr, tuple):
        # positional args are before kwargs
        defparams = node[:args_node.attr[0]]
        pos_args, kw_args, annotate_argc = args_node.attr
        if "return" in annotate_args.keys():
            annotate_argc = len(annotate_args) - 1
    else:
        defparams = node[:args_node.attr]
        kw_args = 0
        annotate_argc = 0
        pass

    annotate_dict = {}

    for name in annotate_args.keys():
        n = self.traverse(annotate_args[name], indent="")
        annotate_dict[name] = n

    if 3.0 <= self.version <= 3.2:
        lambda_index = -2
    elif 3.03 <= self.version:
        lambda_index = -3
    else:
        lambda_index = None

    if lambda_index and is_lambda and iscode(node[lambda_index].attr):
        assert node[lambda_index].kind == "LOAD_LAMBDA"
        code = node[lambda_index].attr
    else:
        code = code_node.attr

    assert iscode(code)
    code = Code(code, self.scanner, self.currentclass)

    # add defaults values to parameter names
    argc = code.co_argcount
    kwonlyargcount = code.co_kwonlyargcount

    paramnames = list(code.co_varnames[:argc])
    if kwonlyargcount > 0:
        kwargs = list(code.co_varnames[argc:argc + kwonlyargcount])

    try:
        ast = self.build_ast(
            code._tokens,
            code._customize,
            code,
            is_lambda=is_lambda,
            noneInNames=("None" in code.co_names),
        )
    except ParserError, p:
        self.write(str(p))
        if not self.tolerate_errors:
            self.ERROR = p
        return
Exemplo n.º 25
0
def getargs(co, version):
    """Get information about the arguments accepted by a code object.

    Three things are returned: (args, varargs, varkw), where 'args' is
    a list of argument names (possibly containing nested lists), and
    'varargs' and 'varkw' are the names of the * and ** arguments or None."""

    if not iscode(co):
        raise TypeError('{!r} is not a code object'.format(co))

    nargs = co.co_argcount
    names = co.co_varnames
    args = list(names[:nargs])
    step = 0

    # The following acrobatics are for anonymous (tuple) arguments.
    for i in range(nargs):
        if args[i][:1] in ('', '.'):
            stack, remain, count = [], [], []
            while step < len(co.co_code):
                op = ord(co.co_code[step])
                step = step + 1
                if op >= opc.HAVE_ARGUMENT:
                    opname = opc.opname[op]
                    value = ord(
                        co.co_code[step]) + ord(co.co_code[step + 1]) * 256
                    step = step + 2
                    if opname in ('UNPACK_TUPLE', 'UNPACK_SEQUENCE'):
                        remain.append(value)
                        count.append(value)
                    elif opname in ('STORE_FAST', 'STORE_DEREF'):
                        if opname == 'STORE_FAST':
                            stack.append(names[value])
                        else:
                            stack.append(co.co_cellvars[value])

                        # Special case for sublists of length 1: def foo((bar))
                        # doesn't generate the UNPACK_TUPLE bytecode, so if
                        # `remain` is empty here, we have such a sublist.
                        if not remain:
                            stack[0] = [stack[0]]
                            break
                        else:
                            remain[-1] = remain[-1] - 1
                            while remain[-1] == 0:
                                remain.pop()
                                size = count.pop()
                                stack[-size:] = [stack[-size:]]
                                if not remain: break
                                remain[-1] = remain[-1] - 1
                            if not remain: break
            args[i] = stack[0]

    varargs = None
    if co.co_flags & COMPILER_FLAG_BIT["VARARGS"]:
        varargs = co.co_varnames[nargs]
        nargs = nargs + 1
    varkw = None
    if co.co_flags & COMPILER_FLAG_BIT["VARKEYWORDS"]:
        varkw = co.co_varnames[nargs]
    return Arguments(args, varargs, varkw)