Exemplo n.º 1
0
def extract_tar_file(archive_path, output_dir, relative_to, extractors=None):
    """
    Extract regular or compressed tar archive into the output directory.

    archive_path is the pathlib.Path to the archive to unpack
    output_dir is a pathlib.Path to the directory to unpack. It must already exist.

    relative_to is a pathlib.Path for directories that should be stripped relative to the
        root of the archive, or None if no path components should be stripped.
    extractors is a dictionary of PlatformEnum to a command or path to the
        extractor binary. Defaults to 'tar' for tar, and '_use_registry' for 7-Zip and WinRAR.

    Raises ExtractionError if unexpected issues arise during unpacking.
    """
    if extractors is None:
        extractors = DEFAULT_EXTRACTORS

    current_platform = get_running_platform()
    if current_platform == PlatformEnum.WINDOWS:
        # Try to use 7-zip first
        sevenzip_cmd = extractors.get(ExtractorEnum.SEVENZIP)
        if sevenzip_cmd == USE_REGISTRY:
            sevenzip_cmd = str(_find_7z_by_registry())
        sevenzip_bin = _find_extractor_by_cmd(sevenzip_cmd)
        if sevenzip_bin is not None:
            _extract_tar_with_7z(sevenzip_bin, archive_path, output_dir, relative_to)
            return

        # Use WinRAR if 7-zip is not found
        winrar_cmd = extractors.get(ExtractorEnum.WINRAR)
        if winrar_cmd == USE_REGISTRY:
            winrar_cmd = str(_find_winrar_by_registry())
        winrar_bin = _find_extractor_by_cmd(winrar_cmd)
        if winrar_bin is not None:
            _extract_tar_with_winrar(winrar_bin, archive_path, output_dir, relative_to)
            return
        get_logger().warning(
            'Neither 7-zip nor WinRAR were found. Falling back to Python extractor...')
    elif current_platform == PlatformEnum.UNIX:
        # NOTE: 7-zip isn't an option because it doesn't preserve file permissions
        tar_bin = _find_extractor_by_cmd(extractors.get(ExtractorEnum.TAR))
        if not tar_bin is None:
            _extract_tar_with_tar(tar_bin, archive_path, output_dir, relative_to)
            return
    else:
        # This is not a normal code path, so make it clear.
        raise NotImplementedError(current_platform)
    # Fallback to Python-based extractor on all platforms
    _extract_tar_with_python(archive_path, output_dir, relative_to)
Exemplo n.º 2
0
def extract_with_7z(
        archive_path,
        output_dir,
        relative_to, #pylint: disable=too-many-arguments
        extractors=None):
    """
    Extract archives with 7-zip into the output directory.
    Only supports archives with one layer of unpacking, so compressed tar archives don't work.

    archive_path is the pathlib.Path to the archive to unpack
    output_dir is a pathlib.Path to the directory to unpack. It must already exist.

    relative_to is a pathlib.Path for directories that should be stripped relative to the
    root of the archive.
    extractors is a dictionary of PlatformEnum to a command or path to the
    extractor binary. Defaults to 'tar' for tar, and '_use_registry' for 7-Zip.

    Raises ExtractionError if unexpected issues arise during unpacking.
    """
    # TODO: It would be nice to extend this to support arbitrary standard IO chaining of 7z
    # instances, so _extract_tar_with_7z and other future formats could use this.
    if extractors is None:
        extractors = DEFAULT_EXTRACTORS
    sevenzip_cmd = extractors.get(ExtractorEnum.SEVENZIP)
    if sevenzip_cmd == USE_REGISTRY:
        if not get_running_platform() == PlatformEnum.WINDOWS:
            get_logger().error('"%s" for 7-zip is only available on Windows', sevenzip_cmd)
            raise ExtractionError()
        sevenzip_cmd = str(_find_7z_by_registry())
    sevenzip_bin = _find_extractor_by_cmd(sevenzip_cmd)

    if not relative_to is None and (output_dir / relative_to).exists():
        get_logger().error('Temporary unpacking directory already exists: %s',
                           output_dir / relative_to)
        raise ExtractionError()
    cmd = (sevenzip_bin, 'x', str(archive_path), '-aoa', '-o{}'.format(str(output_dir)))
    get_logger().debug('7z command line: %s', ' '.join(cmd))

    result = subprocess.run(cmd)
    if result.returncode != 0:
        get_logger().error('7z command returned %s', result.returncode)
        raise ExtractionError()

    _process_relative_to(output_dir, relative_to)
Exemplo n.º 3
0
def extract_with_7z(
        archive_path,
        output_dir,
        relative_to, #pylint: disable=too-many-arguments
        extractors=None):
    """
    Extract archives with 7-zip into the output directory.
    Only supports archives with one layer of unpacking, so compressed tar archives don't work.

    archive_path is the pathlib.Path to the archive to unpack
    output_dir is a pathlib.Path to the directory to unpack. It must already exist.

    relative_to is a pathlib.Path for directories that should be stripped relative to the
    root of the archive.
    extractors is a dictionary of PlatformEnum to a command or path to the
    extractor binary. Defaults to 'tar' for tar, and '_use_registry' for 7-Zip.

    Raises ExtractionError if unexpected issues arise during unpacking.
    """
    # TODO: It would be nice to extend this to support arbitrary standard IO chaining of 7z
    # instances, so _extract_tar_with_7z and other future formats could use this.
    if extractors is None:
        extractors = DEFAULT_EXTRACTORS
    sevenzip_cmd = extractors.get(ExtractorEnum.SEVENZIP)
    if sevenzip_cmd == SEVENZIP_USE_REGISTRY:
        if not get_running_platform() == PlatformEnum.WINDOWS:
            get_logger().error('"%s" for 7-zip is only available on Windows', sevenzip_cmd)
            raise ExtractionError()
        sevenzip_cmd = str(_find_7z_by_registry())
    sevenzip_bin = _find_extractor_by_cmd(sevenzip_cmd)

    if not relative_to is None and (output_dir / relative_to).exists():
        get_logger().error('Temporary unpacking directory already exists: %s',
                           output_dir / relative_to)
        raise ExtractionError()
    cmd = (sevenzip_bin, 'x', str(archive_path), '-aoa', '-o{}'.format(str(output_dir)))
    get_logger().debug('7z command line: %s', ' '.join(cmd))

    result = subprocess.run(cmd)
    if result.returncode != 0:
        get_logger().error('7z command returned %s', result.returncode)
        raise ExtractionError()

    _process_relative_to(output_dir, relative_to)
Exemplo n.º 4
0
def extract_with_winrar(
        archive_path,
        output_dir,
        relative_to, #pylint: disable=too-many-arguments
        extractors=None):
    """
    Extract archives with WinRAR into the output directory.
    Only supports archives with one layer of unpacking, so compressed tar archives don't work.

    archive_path is the pathlib.Path to the archive to unpack
    output_dir is a pathlib.Path to the directory to unpack. It must already exist.

    relative_to is a pathlib.Path for directories that should be stripped relative to the
    root of the archive.
    extractors is a dictionary of PlatformEnum to a command or path to the
    extractor binary. Defaults to 'tar' for tar, and '_use_registry' for WinRAR.

    Raises ExtractionError if unexpected issues arise during unpacking.
    """
    if extractors is None:
        extractors = DEFAULT_EXTRACTORS
    winrar_cmd = extractors.get(ExtractorEnum.WINRAR)
    if winrar_cmd == USE_REGISTRY:
        if not get_running_platform() == PlatformEnum.WINDOWS:
            get_logger().error('"%s" for WinRAR is only available on Windows', winrar_cmd)
            raise ExtractionError()
        winrar_cmd = str(_find_winrar_by_registry())
    winrar_bin = _find_extractor_by_cmd(winrar_cmd)

    if not relative_to is None and (output_dir / relative_to).exists():
        get_logger().error('Temporary unpacking directory already exists: %s',
                           output_dir / relative_to)
        raise ExtractionError()
    cmd = (winrar_bin, 'x', '-o+', str(archive_path), str(output_dir))
    get_logger().debug('WinRAR command line: %s', ' '.join(cmd))

    result = subprocess.run(cmd)
    if result.returncode != 0:
        get_logger().error('WinRAR command returned %s', result.returncode)
        raise ExtractionError()

    _process_relative_to(output_dir, relative_to)
Exemplo n.º 5
0
def extract_tar_file(archive_path, output_dir, relative_to, extractors=None):
    """
    Extract regular or compressed tar archive into the output directory.

    archive_path is the pathlib.Path to the archive to unpack
    output_dir is a pathlib.Path to the directory to unpack. It must already exist.

    relative_to is a pathlib.Path for directories that should be stripped relative to the
        root of the archive, or None if no path components should be stripped.
    extractors is a dictionary of PlatformEnum to a command or path to the
        extractor binary. Defaults to 'tar' for tar, and '_use_registry' for 7-Zip.

    Raises ExtractionError if unexpected issues arise during unpacking.
    """
    if extractors is None:
        extractors = DEFAULT_EXTRACTORS

    current_platform = get_running_platform()
    if current_platform == PlatformEnum.WINDOWS:
        sevenzip_cmd = extractors.get(ExtractorEnum.SEVENZIP)
        if sevenzip_cmd == SEVENZIP_USE_REGISTRY:
            sevenzip_cmd = str(_find_7z_by_registry())
        sevenzip_bin = _find_extractor_by_cmd(sevenzip_cmd)
        if not sevenzip_bin is None:
            _extract_tar_with_7z(sevenzip_bin, archive_path, output_dir, relative_to)
            return
    elif current_platform == PlatformEnum.UNIX:
        # NOTE: 7-zip isn't an option because it doesn't preserve file permissions
        tar_bin = _find_extractor_by_cmd(extractors.get(ExtractorEnum.TAR))
        if not tar_bin is None:
            _extract_tar_with_tar(tar_bin, archive_path, output_dir, relative_to)
            return
    else:
        # This is not a normal code path, so make it clear.
        raise NotImplementedError(current_platform)
    # Fallback to Python-based extractor on all platforms
    _extract_tar_with_python(archive_path, output_dir, relative_to)