Пример #1
0
    def _RealGetContents(self):
        """Read in the table of contents for the ZIP file."""
        try:
            endrec = _EndRecData(self.url)
        except IOError:
            raise BadZipfile("File is not a zip file")
        if not endrec:
            raise BadZipfile, "File is not a zip file"
        if self.debug > 1:
            print endrec
        size_cd = endrec[_ECD_SIZE]  # bytes in central directory
        offset_cd = endrec[_ECD_OFFSET]  # offset of central directory
        self.comment = endrec[_ECD_COMMENT]  # archive comment

        # "concat" is zero, unless zip was concatenated to another file
        concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
        # if endrec[_ECD_SIGNATURE] == stringEndArchive64:
        #   # If Zip64 extension structures are present, account for them
        #   concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)

        if self.debug > 2:
            inferred = concat + offset_cd
            print "given, inferred, offset", offset_cd, inferred, concat
            # self.start_dir:  Position of start of central directory
        self.start_dir = offset_cd + concat
        ECD = _http_get_partial_data(self.url, self.start_dir, self.start_dir + size_cd - 1)
        data = ECD.read()
        ECD.close()
        fp = cStringIO.StringIO(data)
        total = 0
        while total < size_cd:
            centdir = fp.read(sizeCentralDir)
            if centdir[0:4] != stringCentralDir:
                raise BadZipfile, "Bad magic number for central directory"
            centdir = struct.unpack(structCentralDir, centdir)
            if self.debug > 2:
                print centdir
            filename = fp.read(centdir[_CD_FILENAME_LENGTH])
            # Create ZipInfo instance to store file information
            x = ZipInfo(filename)
            x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
            x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
            x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
            (
                x.create_version,
                x.create_system,
                x.extract_version,
                x.reserved,
                x.flag_bits,
                x.compress_type,
                t,
                d,
                x.CRC,
                x.compress_size,
                x.file_size,
            ) = centdir[1:12]
            x.volume, x.internal_attr, x.external_attr = centdir[15:18]
            # Convert date/time code to (year, month, day, hour, min, sec)
            x._raw_time = t
            x.date_time = ((d >> 9) + 1980, (d >> 5) & 0xF, d & 0x1F, t >> 11, (t >> 5) & 0x3F, (t & 0x1F) * 2)

            x._decodeExtra()
            x.header_offset = x.header_offset + concat
            x.filename = x._decodeFilename()
            self.filelist.append(x)
            self.NameToInfo[x.filename] = x

            # update total bytes read from central directory
            total = (
                total
                + sizeCentralDir
                + centdir[_CD_FILENAME_LENGTH]
                + centdir[_CD_EXTRA_FIELD_LENGTH]
                + centdir[_CD_COMMENT_LENGTH]
            )

        if self.debug > 2:
            print "total", total
Пример #2
0
    def get_zip_infos(self, *filenames):
        """Read in the table of contents for the ZIP file."""
        fp = self.fp
        max_file_count = self.max_file_count

        if not fp:
            raise RuntimeError(
                "Attempt to read ZIP archive that was already closed")

        filenames = set(filenames)
        if len(filenames) == 0:
            return

        try:
            endrec = _EndRecData(fp)
        except OSError:
            raise BadZipFile("File is not a zip file")
        if not endrec:
            raise BadZipFile("File is not a zip file")

        size_cd = endrec[_ECD_SIZE]  # bytes in central directory
        offset_cd = endrec[_ECD_OFFSET]  # offset of central directory

        # "concat" is zero, unless zip was concatenated to another file
        concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
        if endrec[_ECD_SIGNATURE] == stringEndArchive64:
            # If Zip64 extension structures are present, account for them
            concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)

        # start_dir:  Position of start of central directory
        start_dir = offset_cd + concat
        fp.seek(start_dir, 0)
        data = fp.read(size_cd)
        fp = BytesIO(data)
        total = 0
        file_count = 0
        while total < size_cd:
            centdir = fp.read(sizeCentralDir)
            if len(centdir) != sizeCentralDir:
                raise BadZipFile("Truncated central directory")
            centdir = struct.unpack(structCentralDir, centdir)
            if centdir[_CD_SIGNATURE] != stringCentralDir:
                raise BadZipFile("Bad magic number for central directory")
            filename = fp.read(centdir[_CD_FILENAME_LENGTH])
            flags = centdir[5]
            if flags & _UTF8_EXTENSION_FLAG:
                # UTF-8 file names extension
                filename = filename.decode('utf-8')
            else:
                # Historical ZIP filename encoding
                filename = filename.decode('cp437')
            # Create ZipInfo instance to store file information
            x = ZipInfo(filename)
            x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
            x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
            x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
            (x.create_version, x.create_system, x.extract_version, x.reserved,
             x.flag_bits, x.compress_type, t, d, x.CRC, x.compress_size,
             x.file_size) = centdir[1:12]
            if x.extract_version > MAX_EXTRACT_VERSION:
                raise NotImplementedError("zip file version %.1f" %
                                          (x.extract_version / 10))
            x.volume, x.internal_attr, x.external_attr = centdir[15:18]
            # Convert date/time code to (year, month, day, hour, min, sec)
            x._raw_time = t
            x.date_time = ((d >> 9) + 1980, (d >> 5) & 0xF, d & 0x1F, t >> 11,
                           (t >> 5) & 0x3F, (t & 0x1F) * 2)

            x._decodeExtra()
            x.header_offset = x.header_offset + concat

            # update total bytes read from central directory
            total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH] +
                     centdir[_CD_EXTRA_FIELD_LENGTH] +
                     centdir[_CD_COMMENT_LENGTH])

            file_count += 1
            if max_file_count is not None and file_count > max_file_count:
                raise TooManyFiles('Too many files in egg')

            if x.filename in filenames:
                filenames.discard(x.filename)
                yield x

            if len(filenames) == 0:
                return
Пример #3
0
    def get_zip_infos(self, *filenames):
        """Read in the table of contents for the ZIP file."""
        fp = self.fp
        max_file_count = self.max_file_count

        if not fp:
            raise RuntimeError(
                "Attempt to read ZIP archive that was already closed")

        filenames = set(filenames)
        if len(filenames) == 0:
            return

        try:
            endrec = _EndRecData(fp)
        except OSError:
            raise BadZipFile("File is not a zip file")
        if not endrec:
            raise BadZipFile("File is not a zip file")

        size_cd = endrec[_ECD_SIZE]             # bytes in central directory
        offset_cd = endrec[_ECD_OFFSET]         # offset of central directory

        # "concat" is zero, unless zip was concatenated to another file
        concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
        if endrec[_ECD_SIGNATURE] == stringEndArchive64:
            # If Zip64 extension structures are present, account for them
            concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)

        # start_dir:  Position of start of central directory
        start_dir = offset_cd + concat
        fp.seek(start_dir, 0)
        data = fp.read(size_cd)
        fp = BytesIO(data)
        total = 0
        file_count = 0
        while total < size_cd:
            centdir = fp.read(sizeCentralDir)
            if len(centdir) != sizeCentralDir:
                raise BadZipFile("Truncated central directory")
            centdir = struct.unpack(structCentralDir, centdir)
            if centdir[_CD_SIGNATURE] != stringCentralDir:
                raise BadZipFile("Bad magic number for central directory")
            filename = fp.read(centdir[_CD_FILENAME_LENGTH])
            flags = centdir[5]
            if flags & _UTF8_EXTENSION_FLAG:
                # UTF-8 file names extension
                filename = filename.decode('utf-8')
            else:
                # Historical ZIP filename encoding
                filename = filename.decode('cp437')
            # Create ZipInfo instance to store file information
            x = ZipInfo(filename)
            x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
            x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
            x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
            (x.create_version, x.create_system, x.extract_version, x.reserved,
             x.flag_bits, x.compress_type, t, d,
             x.CRC, x.compress_size, x.file_size) = centdir[1:12]
            if x.extract_version > MAX_EXTRACT_VERSION:
                raise NotImplementedError("zip file version %.1f" %
                                          (x.extract_version / 10))
            x.volume, x.internal_attr, x.external_attr = centdir[15:18]
            # Convert date/time code to (year, month, day, hour, min, sec)
            x._raw_time = t
            x.date_time = ((d >> 9) + 1980, (d >> 5) & 0xF, d & 0x1F,
                           t >> 11, (t >> 5) & 0x3F, (t & 0x1F) * 2)

            x._decodeExtra()
            x.header_offset = x.header_offset + concat

            # update total bytes read from central directory
            total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
                     + centdir[_CD_EXTRA_FIELD_LENGTH]
                     + centdir[_CD_COMMENT_LENGTH])

            file_count += 1
            if max_file_count is not None and file_count > max_file_count:
                raise TooManyFiles('Too many files in egg')

            if x.filename in filenames:
                filenames.discard(x.filename)
                yield x

            if len(filenames) == 0:
                return
    def fix_zip(self):
        if not self.broken:
            return False
        self.fp.seek(0, 2)
        file_len = self.fp.tell()
        mm = mmap.mmap(self.fp.fileno(), 0, access=mmap.ACCESS_READ)
        offset = 0
        file_list = {}
        cd_list = {}

        try:

            # pass one, parse the zip file
            while offset + 4 < file_len:
                hdr_off = mm.find(b"PK", offset)
                if hdr_off == -1:
                    break
                hdr_type = mm[hdr_off:hdr_off + 4]
                if hdr_type == stringFileHeader:
                    # local file header
                    if hdr_off + sizeFileHeader > file_len:
                        break
                    fheader = mm[hdr_off:hdr_off + sizeFileHeader]
                    fheader = struct.unpack(structFileHeader, fheader)
                    start = hdr_off
                    size = sizeFileHeader + fheader[_FH_COMPRESSED_SIZE] + fheader[_FH_FILENAME_LENGTH] + \
                        fheader[_FH_EXTRA_FIELD_LENGTH]
                    name = mm[hdr_off + sizeFileHeader:hdr_off +
                              sizeFileHeader + fheader[_FH_FILENAME_LENGTH]]
                    file_list[name] = [start, size, fheader]
                    offset = hdr_off + size
                elif hdr_type == stringCentralDir:
                    if hdr_off + sizeCentralDir > file_len:
                        break
                    centdir = mm[hdr_off:hdr_off + sizeCentralDir]
                    centdir = struct.unpack(structCentralDir, centdir)
                    start = hdr_off
                    size = sizeCentralDir + centdir[_CD_FILENAME_LENGTH] + centdir[_CD_EXTRA_FIELD_LENGTH] + \
                        centdir[_CD_COMMENT_LENGTH]
                    name = mm[hdr_off + sizeCentralDir:hdr_off +
                              sizeCentralDir + centdir[_CD_FILENAME_LENGTH]]
                    cd_list[name] = [start, size, centdir]
                    offset = hdr_off + size
                elif hdr_type == stringEndArchive:
                    offset = hdr_off + sizeEndCentDir
                else:
                    offset = hdr_off + 1

            # Guesses
            last_cv = 20
            last_ea = 0
            last_cs = 0
            last_dt = (0, 0)

            # Pass two, repair
            for filename, (start, end, centdir) in cd_list.items():
                if filename not in file_list:
                    continue

                if isinstance(filename, bytes):
                    x = ZipInfo(filename.decode('utf-8', 'backslashreplace'))
                else:
                    x = ZipInfo(filename)
                extra_off = start + sizeCentralDir
                x.extra = mm[extra_off:extra_off +
                             centdir[_CD_EXTRA_FIELD_LENGTH]]
                extra_off += centdir[_CD_EXTRA_FIELD_LENGTH]
                x.comment = mm[extra_off:extra_off +
                               centdir[_CD_EXTRA_FIELD_LENGTH]]

                x.header_offset = file_list[filename][0]

                (x.create_version, x.create_system, x.extract_version,
                 x.reserved, x.flag_bits, x.compress_type, t, d, x.CRC,
                 x.compress_size, x.file_size) = centdir[1:12]
                x.volume, x.internal_attr, x.external_attr = centdir[15:18]
                # Convert date/time code to (year, month, day, hour, min, sec)
                x._raw_time = t
                x.date_time = ((d >> 9) + 1980, (d >> 5) & 0xF, d & 0x1F,
                               t >> 11, (t >> 5) & 0x3F, (t & 0x1F) * 2)

                last_ea = x.external_attr
                last_cs = x.create_system
                last_cv = x.create_version
                last_dt = (d, t)

                # noinspection PyProtectedMember
                x._decodeExtra()
                # x.filename = x._decodeFilename()
                self.filelist.append(x)
                self.NameToInfo[x.filename] = x

            for filename, (start, end, fheader) in file_list.items():
                if filename in cd_list:
                    continue

                x = ZipInfo(filename.decode('utf-8', 'backslashreplace'))
                x.extra = ""
                x.comment = ""

                x.header_offset = file_list[filename][0]

                x.create_version = last_cv
                x.create_system = last_cs
                x.extract_version = fheader[_FH_EXTRACT_VERSION]
                x.reserved = 0
                x.flag_bits = fheader[_FH_GENERAL_PURPOSE_FLAG_BITS]
                x.compress_type = fheader[_FH_COMPRESSION_METHOD]
                d, t = last_dt
                x.CRC = fheader[_FH_CRC]
                x.compress_size = fheader[_FH_COMPRESSED_SIZE]
                x.file_size = fheader[_FH_UNCOMPRESSED_SIZE]

                x.volume = 0
                x.internal_attr = 0
                x.external_attr = last_ea

                # Convert date/time code to (year, month, day, hour, min, sec)
                x._raw_time = t
                x.date_time = ((d >> 9) + 1980, (d >> 5) & 0xF, d & 0x1F,
                               t >> 11, (t >> 5) & 0x3F, (t & 0x1F) * 2)

                # noinspection PyProtectedMember
                x._decodeExtra()
                # x.filename = x._decodeFilename()
                self.filelist.append(x)
                self.NameToInfo[x.filename] = x
        finally:
            mm.close()
Пример #5
0
    def next(self):
        """Return the next member of the archive as a ZipInfo object. Returns
        None if there is no more available. This method is analogous to
        TarFile.next().

        We construct a ZipInfo object using the information stored in the next file header.
        The logic here is based on the implementation of ZipFile._RealGetContents(), which
        constructs a ZipInfo object from information in a central directory file header, but
        modified to work with the file-header-specific struct
        (for the implementation of ZipFile._RealGetContents(), see
        https://github.com/python/cpython/blob/048f54dc75d51e8a1c5822ab7b2828295192aaa5/Lib/zipfile.py#L1316).
        """
        fp = self.fp

        # First, advance to the next header, if needed.
        fp.read(self._next_header_pos - fp.tell())

        # Read the next header.
        fheader = fp.read(sizeFileHeader)
        if len(fheader) != sizeFileHeader:
            raise BadZipFile("Truncated file header")
        fheader = struct.unpack(structFileHeader, fheader)
        if fheader[_FH_SIGNATURE] == stringCentralDir:
            # We've reached the central directory. This means that we've finished iterating through
            # all entries in the zip file. We can do this check because the file header signature
            # and central directory signature are stored in the same spot (index 0) and with the same format.
            self._loaded = True
            return None
        if fheader[_FH_SIGNATURE] != stringFileHeader:
            raise BadZipFile("Bad magic number for file header")
        filename = fp.read(fheader[_FH_FILENAME_LENGTH])
        flags = fheader[_FH_GENERAL_PURPOSE_FLAG_BITS]
        if flags & 0x800:
            # UTF-8 file names extension
            filename = filename.decode('utf-8')
        else:
            # Historical ZIP filename encoding
            filename = filename.decode('cp437')
        # Create ZipInfo instance to store file information
        x = ZipInfo(filename)
        x.extra = fp.read(fheader[_FH_EXTRA_FIELD_LENGTH])
        x.header_offset = self._next_header_pos

        # The file header stores nearly all the same information needed for ZipInfo as what the
        # central directory file header stores, except for a couple of missing fields.
        # We just set them to 0 here.
        x.comment = 0
        x.create_version, x.create_system = 0, 0
        x.volume, x.internal_attr, x.external_attr = 0, 0, 0

        (x.extract_version, x.reserved, x.flag_bits, x.compress_type, t, d,
         x.CRC, x.compress_size, x.file_size) = fheader[1:10]
        if x.extract_version > MAX_EXTRACT_VERSION:
            raise NotImplementedError("zip file version %.1f" %
                                      (x.extract_version / 10))

        # Convert date/time code to (year, month, day, hour, min, sec)
        # This comes from the original cpython code.
        x._raw_time = t
        x.date_time = ((d >> 9) + 1980, (d >> 5) & 0xF, d & 0x1F, t >> 11,
                       (t >> 5) & 0x3F, (t & 0x1F) * 2)

        x._decodeExtra()
        self.filelist.append(x)
        self.NameToInfo[x.filename] = x
        self._next_header_pos = (fp.tell() + x.compress_size
                                 )  # Beginning of the next file's header.
        return x