示例#1
0
    def __init__(
        self,
        file_path_or_bytes: Union[str, pathlib.Path, BinaryIO],
        output_dir: pathlib.Path = None,
        **kwargs,
    ) -> None:
        if isinstance(file_path_or_bytes, str):
            file_path_or_bytes: pathlib.Path = pathlib.Path(file_path_or_bytes)
        if isinstance(file_path_or_bytes, pathlib.Path):
            utils.check_read_access(file_path_or_bytes)
            self.file_path = file_path_or_bytes
            input_file: BinaryIO
            with self.file_path.open("rb") as input_file:
                self.file_contents = input_file.read()
        if isinstance(file_path_or_bytes, io.BufferedIOBase):
            self.file_contents = file_path_or_bytes.read()

        if output_dir:
            self.output_dir = output_dir
        else:
            if hasattr(self, "file_path"):
                self.output_dir = self.file_path.parent / utils.slugify(
                    self.file_path.name + "_output")
            else:
                self.output_dir = pathlib.Path.cwd()
        utils.check_write_access(self.output_dir)

        if not self.validate_pyc_file():
            raise TypeError("[!] This is not a compiled Python file.")
        self.version_hint = kwargs.get("version_hint", None)
        if self.version_hint:
            try:
                self.magic_num = bytecode.version_str_to_magic_num_int(
                    self.version_hint)
            except Exception:
                raise RuntimeError(
                    f"Failed to produce magic number from version hint {self.version_hint}. Please try a different"
                    " version.")
示例#2
0
    def _determine_python_version(self):
        """Will attempt to determine what version of python was used when this
        py2exe PE was compiled. We need to know this because xdis requires
        knowledge of the python version to unmarshal the bytecode correctly"""
        potential_magic_nums = set()
        logger.debug("[*] Attempting to discover version for PYTHONSCRIPT resource")

        # Method 1: Looking for PythonXY.DLL resource in the same directory as the PYTHONSCRIPT resource. If there,
        # check to see if it has a VERSIONINFO resource with a FileVersion or ProductVersion field,
        # as these typically contain the python version. See https://github.com/erocarrera/pefile for more info on
        # the structures used below
        if hasattr(self, "archive_path"):
            parent_dir = self.archive_path.parents[0]
        else:
            parent_dir = pathlib.Path.cwd()
        for python_dll in os.listdir(parent_dir):
            if re.match(r"python[0-9]{0,2}\.dll", python_dll, re.I):
                logger.debug(f"[*] Found python DLL resource {str(python_dll)} in directory {parent_dir}")
                try:
                    dll_class_inst = PortableExecutable(parent_dir.joinpath(python_dll))
                except TypeError:
                    logger.debug(f"[!] PyDecipher could not create a PE/DLL class instance for {str(python_dll)}")
                else:
                    dll_class_inst.load_version_info(quiet=True)
                    if dll_class_inst.python_version:
                        potential_magic_nums.add(version_str_to_magic_num_int(dll_class_inst.python_version))
                finally:
                    break

        # Method 2: Check to see if there are pyc files in the same directory with magic numbers
        for pyc_file in parent_dir.rglob("*.pyc"):
            with pyc_file.open("rb") as pyc_file_ptr:
                try:
                    magic_bytes = pyc_file_ptr.read(4)
                    magic_num = magic2int(magic_bytes)
                except:  # TODO make more specific error catching
                    pass
                else:
                    potential_magic_nums.add(magic_num)
            break

        # Searching the PYTHONSCRIPT resource for strings like c:\python24\lib\site-packages\py2exe\boot_common.py
        b_python_regex = re.compile(b"(python)([0-9]{2})", re.I)
        script_re_obj = b_python_regex.search(self.resource_contents)
        if script_re_obj:
            version_str = script_re_obj.group(2).decode("utf-8")
            logger.info(
                "[*] Detected potential version string in PYTHONSCRIPT resource: {}".format(
                    script_re_obj.group().decode("utf-8")
                )
            )
            potential_magic_nums.add(version_str_to_magic_num_int(version_str[0] + "." + version_str[1]))

        if potential_magic_nums:
            logger.info(f"[*] Will attempt to unmarshal using these python magic numbers: {potential_magic_nums}")
            return potential_magic_nums
        else:
            logger.info(
                "[!] Couldn't find any python magic numbers to hint at the python version of this resource. "
                "Will attempt to brute-force determine the correct magic number."
            )
            return
示例#3
0
def run(_args: List[str] = None) -> None:
    """Orchestrate the flow of the remap command.

    This is the entry-point of the remap command. It calls out to other routines
    and attempts to follow this high-level flow:

        1.  Check that program is running in sufficiently new Python
            environment, and parse any arguments
        2.  Determine what type of input was passed to program, which will
            ultimately decide what method remap uses to recover the opmap.
        3.  Attempt one of the opmap recovery methods (see documentation for
            more on these methods)
        4.  If the opmap was successfully recovered, validate it, then write
            it to a file.

    Parameters
    ----------
    _args : List[str]
        If this function is being called from other Python code, remap
        flags and other command-line options can be passed in as a list.
    """
    if sys.version_info < (3, 8):
        logger.critical(
            "[!] This tool can only be run in Python 3.8 or later.")
        sys.exit(1)
    utils.check_for_our_xdis()

    args: argparse.Namespace = _parse_args(_args)

    logging_options: Dict[str, Union[bool, os.PathLike]] = {
        "verbose": args.verbose,
        "quiet": args.quiet
    }
    pydecipher.set_logging_options(**logging_options)

    remapped_bytecode_path: pathlib.Path = pathlib.Path(
        args.remapped_bytecode_path).resolve()

    if args.output:
        output_dir: pathlib.Path = pathlib.Path(args.output.strip()).resolve()
    else:
        output_dir: pathlib.Path = pathlib.Path.cwd()
    output_dir = output_dir / f"remap_output_{utils.slugify(remapped_bytecode_path.name)}"

    # The following block sets up logging to a stringIO stream, which will
    # eventually be placed in a file. We don't immediately log to a file because
    # we don't want to leave a log file on disk unless the program succeeds.
    log_stream: io.StringIO = io.StringIO()
    log_stream__handler: logging.StreamHandler = logging.StreamHandler(
        log_stream)
    log_stream__handler.setFormatter(pydecipher.log_format)
    log_stream__handler.setLevel(logging.DEBUG)
    logger.addHandler(log_stream__handler)

    remappings: Dict[int, Dict[int, int]] = {}
    version: str = ""
    remapping_method: str = ""
    cli: str = " ".join(sys.argv) if not _args else " ".join(_args)
    if args.version:
        version = args.version
    if args.megafile:
        # Determine if argument is a version or a path
        if pathlib.Path(args.megafile).exists():
            standard_bytecode_path: pathlib.Path = pathlib.Path(args.megafile)
        else:
            potential_version: str = args.megafile
            magic_num: int = bytecode.version_str_to_magic_num_int(
                potential_version)
            if magic_num:
                compiled_file: str
                for compiled_file in os.listdir(
                        pathlib.Path(__file__).parent / "reference_files" /
                        "compiled"):
                    full_path_obj: pathlib.Path = (
                        pathlib.Path(__file__).parent / "reference_files" /
                        "compiled" / compiled_file)
                    infile: BinaryIO
                    with full_path_obj.open("rb") as infile:
                        if xdis.magics.magic2int(infile.read(4)) == magic_num:
                            logger.info(
                                f"[*] Found matching megafile for version {potential_version}"
                            )
                            standard_bytecode_path: pathlib.Path = full_path_obj
                            break
            if not standard_bytecode_path:
                logger.error(
                    "[!] Something went wrong. remap could not find a standard compiled version of this megafile."
                )  # Next, find the path of the reference file
                sys.exit(1)
        remappings, version = megafile_remap(standard_bytecode_path,
                                             remapped_bytecode_path)
        remapping_method = "Megafile"
    elif args.opcode_file:
        remappings, version = opcode_constants_remap(remapped_bytecode_path,
                                                     provided_version=version)
        remapping_method = "opcode.pyc constants-walking"
    elif args.standard_bytecode_path:
        standard_bytecode_path: pathlib.Path = pathlib.Path(
            args.standard_bytecode_path).resolve()
        utils.check_read_access(standard_bytecode_path)
        utils.check_read_access(remapped_bytecode_path)
        utils.check_write_access(output_dir)
        if not remapped_bytecode_path.is_dir():
            raise ValueError(
                "The standard/default remapping method requires a directory containing Python bytecode files"
            )
        if not standard_bytecode_path.is_dir():
            raise ValueError(
                "If you are going to provide your own reference opcode set, it must be a directory of "
                "Python bytecode files")
        remappings, version = standard_pyc_remap(standard_bytecode_path,
                                                 remapped_bytecode_path,
                                                 version=version)
        remapping_method = "Diff'ing against standard library bytecode"
    elif args.check_remapping:
        # Here, remapped_bytecode_path is not actually bytecode, its a remapping
        # file.
        utils.check_read_access(remapped_bytecode_path)
        remapping_file: TextIO
        with remapped_bytecode_path.open() as remapping_file:
            try:
                remapping_json: Dict["str", Union[str, int]] = json.loads(
                    remapping_file.read())
            except json.decoder.JSONDecodeError as e:
                e: json.decoder.JSONDecodeError
                logger.error(f"Could not read remapping file with error: {e}")
                sys.exit(1)
            version = remapping_json["python_version"]
            remappings_list: Dict[str, Union[
                bool, str, int]] = remapping_json["remapped_opcodes"]
            remapping_dict: Dict[str, int] = {
                d["opname"]: d["remapped_value"]
                for d in remappings_list
            }
            if bytecode.validate_opmap(version, remapping_dict):
                logger.info("[*] This opmap is valid.")
                return
            else:
                msg: str = "This opmap is not valid."
                if not logging_options["verbose"]:
                    msg += " Run with --verbose flag for more information."
                logger.warning(f"[!] {msg}")
                sys.exit(1)

    if remappings:
        remappings: Dict[int, int] = fix_remapping_conflicts(remappings)
        remappings: Dict[int,
                         Tuple[int,
                               bool]] = fill_opmap_gaps(remappings, version)
        output_file_path: pathlib.Path = write_remapping_file(
            remappings, version, remapping_method, cli, output_dir=output_dir)
        logger.info(
            f"[*] Remapping file {output_file_path.name} written to {output_file_path.parent}."
        )

        # If we successfully produced the remapping file, we want to also
        # include the logged output of remap.
        log_name: str = datetime.datetime.now().strftime(
            "log_%H_%M_%S_%b_%d_%Y.txt")
        log_file_ptr: TextIO
        with output_dir.joinpath(log_name).open("w") as log_file_ptr:
            log_file_ptr.write(log_stream.getvalue())
        logging_options: Dict[str, Union[bool, os.PathLike]] = {
            "log_path": output_dir.joinpath(log_name)
        }
        pydecipher.set_logging_options(**logging_options)
    else:
        logger.warning(
            "[!] Remap couldn't produce the new opmap. Run with --verbose for more information."
        )
        sys.exit(1)
示例#4
0
def standard_pyc_remap(
        standard_bytecode_path: pathlib.Path,
        remapped_bytecode_path: pathlib.Path,
        version: str = None) -> Tuple[Dict[int, Dict[int, int]], str]:
    """Diff compiled code objects from standard library and modified interpreter to try and recreate opcode mappings.

    This method is similar to the megafile method, but at a larger scale.
    See the remap documentation for more information on this method.

    Parameters
    ----------
    standard_bytecode_path: pathlib.Path
        The path on disk to the reference set of standard-compiled bytecode. The version of Python for the reference set
        must correspond to the version of Python used as a base for the modified interpreter.
    remapped_bytecode_path: pathlib.Path
        The path on disk to the set of bytecode compiled by the modified interpreter
    version: str, optional
        The version of Python that this opcode file corresponds to.

    Returns
    -------
     Tuple[Dict[int, Dict[int, int]], str]
        A tuple containing a dictionary of original_opcode to
        Dict[replacement_opcode:replacement_count] and the opmap's Python
        version. replacement_opcode is an opcode that was seen in place of
        original_opcode, and the replacement_count is the amount of times it was
        seen replacing the original_opcode throughout all the bytecode that was
        analyzed.
    """
    reference_files: Dict[str, List[pathlib.Path]] = {}
    determined_version: str = ""
    pyc_file: pathlib.Path
    for pyc_file in standard_bytecode_path.rglob("*.pyc"):
        pyc_file_name: str = pyc_file.name.split(".")[0]
        if pyc_file_name == "__init__":
            continue
        if not determined_version:
            try:
                infile: BinaryIO
                with pyc_file.open("rb") as infile:
                    pyc_magic_bytes: bytes = infile.read(4)
                    version_set: Set[str] = copy.deepcopy(
                        xdis.magics.by_magic[pyc_magic_bytes])
                    determined_version = version_set.pop()
            except Exception:
                pass
            else:
                logger.debug(
                    f"Determined version {determined_version} from reference bytecode."
                )
                if version and bytecode.version_str_to_magic_num_int(
                        determined_version
                ) != bytecode.version_str_to_magic_num_int(version):
                    logger.warning(
                        f"Provided version {version} does not equal the version determined in the reference pyc "
                        f"set ({determined_version}). We will proceed with the version you provided."
                    )
        if pyc_file_name in reference_files:
            reference_files[pyc_file_name].append(pyc_file)
        else:
            reference_files[pyc_file_name] = [pyc_file]

    if not version:
        version = determined_version

    remapped_files: Dict[str, List[pathlib.Path]] = {}
    for pyc_file in remapped_bytecode_path.rglob("*"):
        if not pyc_file.is_file():
            continue
        try:
            kwargs: Dict[str, str] = {"version_hint": version}
            artifact_types.pyc.Pyc(pyc_file, **kwargs)
        except TypeError:
            continue
        pyc_file_name: str = pyc_file.name.split(".")[0]
        if pyc_file_name == "__init__":
            # Too common a filename, causes more problems than its worth to try to include these
            # since they are usually empty anyway.
            continue
        if pyc_file_name in remapped_files:
            remapped_files[pyc_file_name].append(pyc_file)
        else:
            remapped_files[pyc_file_name] = [pyc_file]

    master_remapping_counts: Dict[int, Dict[int, int]] = {}
    pyc_filename: str
    list_of_filepaths: List[pathlib.Path]
    for pyc_filename, list_of_filepaths in remapped_files.items():
        if pyc_filename not in reference_files:
            continue
        pyc_filepath: pathlib.Path
        for pyc_filepath in list_of_filepaths:
            reference_file: pathlib.Path = None
            highest_similarity: int = 0
            ref_pyc_filepath: pathlib.Path
            for ref_pyc_filepath in reference_files[pyc_filename]:
                relative_reference_filepath: str = str(
                    ref_pyc_filepath.relative_to(standard_bytecode_path))
                relative_remapped_filepath: str = str(
                    pyc_filepath.relative_to(remapped_bytecode_path))
                path_similarity: float = textdistance.lcsstr.normalized_similarity(
                    relative_reference_filepath, relative_remapped_filepath)
                if path_similarity > highest_similarity:
                    highest_similarity = path_similarity
                    reference_file = ref_pyc_filepath
            if not reference_file:
                continue

            fixed_pyc_file: tempfile.NamedTemporaryFile
            if fixed_pyc_file := artifact_types.pyc.Pyc.check_and_fix_pyc(
                    pyc_filepath, provided_version=version):
                logger.debug(
                    f"[+] Duplicated file {str(pyc_filepath)} to correct issues with the pyc. New filepath:"
                    f" {fixed_pyc_file.name}")
                pyc_filepath = fixed_pyc_file.name

            try:
                remapped_filename: str
                remapped_co: CodeType  # can also be xdis codetypes
                remapped_version: float
                remapped_timestamp: int
                remapped_magic_int: int
                remapped_is_pypy: bool
                remapped_source_size: int
                remapped_sip_hash: str
                (
                    remapped_filename,
                    remapped_co,
                    remapped_version,
                    remapped_timestamp,
                    remapped_magic_int,
                    remapped_is_pypy,
                    remapped_source_size,
                    remapped_sip_hash,
                ) = xdis.disasm.disassemble_file(str(pyc_filepath),
                                                 header=True,
                                                 outstream=open(
                                                     os.devnull, "w"))

                reference_filename: str
                reference_co: CodeType  # can also be xdis codetypes
                reference_version: float
                reference_timestamp: int
                reference_magic_int: int
                reference_is_pypy: bool
                reference_source_size: int
                reference_sip_hash: str
                (
                    reference_filename,
                    reference_co,
                    reference_version,
                    reference_timestamp,
                    reference_magic_int,
                    reference_is_pypy,
                    reference_source_size,
                    reference_sip_hash,
                ) = xdis.disasm.disassemble_file(str(reference_file),
                                                 outstream=open(
                                                     os.devnull, "w"))
            except Exception:
                continue

            version = str(reference_version)

            try:
                remappings: Dict[int, int] = bytecode.diff_opcode(
                    reference_co, remapped_co, version)
            except RuntimeError:
                continue

            # merge these remappings into the larger dictionary.
            opcode_val: int
            remap_options: Dict[int, int]
            for opcode_val, remap_options in remappings.items():
                if opcode_val in master_remapping_counts:
                    remap_option: int
                    count: int
                    for remap_option, count in remap_options.items():
                        if remap_option in master_remapping_counts[opcode_val]:
                            master_remapping_counts[opcode_val][
                                remap_option] += count
                        else:
                            master_remapping_counts[opcode_val][
                                remap_option] = count
                else:
                    master_remapping_counts[opcode_val] = remap_options
示例#5
0
    def check_and_fix_pyc(
        pyc_file: pathlib.Path,
        provided_version: str = None
    ) -> Union[None, tempfile.NamedTemporaryFile]:
        """Fix a given pyc file so it can be properly disassembled by xdis.

        This function combats the following common obfuscations that may be
        applied to pyc files that would prevent them from easily being disassembled

            1. Missing the header entirely
            2. Missing only the magic bytes
            3. Magic bytes are there, but they don't match a known version
            4. Filename doesn't end in .pyc

        Parameters
        ----------
        pyc_file: pathlib.Path
            The path to the pyc file
        provided_version: str, optional
            The version of the Python that compiled the pyc, if known.

        Raises
        ------
        RuntimeError
            The pyc file is malformed and couldn't be corrected, likely due to
            a version not being given.

        Returns
        -------
        Union[None, tempfile.NamedTemporaryFile]
            If the pyc file is fine as is, this function returns None. If it
            needs to be fixed in some way, the temporary file object
            with the fixes is returned.
        """
        corrected_file_contents: bytes = b""
        all_bytes: bytes = b""
        utils.check_read_access(pyc_file)
        infile: BinaryIO
        with pyc_file.open("rb") as infile:
            first_24_bytes: bytes = infile.read(
                min(24,
                    pyc_file.stat().st_size))
            infile.seek(0)
            all_bytes = infile.read()

        if not any(True for p in Pyc.MARSHALLED_CODE_OBJECT_LEADING_BYTES
                   if p in first_24_bytes):
            raise RuntimeError(f"This file {str(pyc_file)} isn't pyc file!")

        if provided_version:
            correct_magic_num = bytecode.version_str_to_magic_num_int(
                provided_version)
            header = bytecode.create_pyc_header(correct_magic_num)
        if Pyc.is_headerless(first_24_bytes[:8]):
            # Is this pyc completely missing a header?
            if provided_version:
                corrected_file_contents = header
                corrected_file_contents += all_bytes
            else:
                logger.error(
                    "[!] The pyc file provided does not have a header. For remap to decompile this, please provide a"
                    " version with the --version flag")
                raise RuntimeError

        elif first_24_bytes[0:4] not in by_magic:
            # Does have a header of sorts, but can't recognize magic numbers.
            # We'll need a version from the user to proceed
            if not provided_version:
                logger.error(
                    "[!] This version has a header, but we can't recognize the magic number"
                    f" {struct.unpack('<H', first_24_bytes[0:2])[0]}. No version was provided to fix the header."
                )
                raise RuntimeError
            else:
                logger.debug(
                    "[*] This version has a header, but we can't recognize the magic number"
                    f" {struct.unpack('<H', first_24_bytes[0:2])[0]}. Using magic num {correct_magic_num} (from"
                    f" provided version {provided_version}) to fix the header."
                )
            code_object_begin_index: int = -1
            pattern: bytes
            for pattern in Pyc.MARSHALLED_CODE_OBJECT_LEADING_BYTES:
                if pattern in all_bytes:
                    code_object_begin_index = all_bytes.index(pattern)
                    break
            corrected_file_contents: bytes = header
            corrected_file_contents += all_bytes[code_object_begin_index:]

        bytes_to_write_out: bytes = b""
        if corrected_file_contents:
            bytes_to_write_out = corrected_file_contents
        elif pyc_file.suffix != ".pyc":
            # There was nothing to correct except the filename, so we just duplicate the file.
            bytes_to_write_out = all_bytes
        else:
            # There was nothing to do with this pyc file. It is seemingly valid.
            return

        temp_file: tempfile.NamedTemporaryFile = tempfile.NamedTemporaryFile(
            suffix=".pyc")
        pyc_fixed_file: pathlib.Path = pathlib.Path(temp_file.name)
        outfile: BinaryIO
        with pyc_fixed_file.open("wb") as outfile:
            outfile.write(bytes_to_write_out)
        return temp_file