Exemplo n.º 1
0
 def _write_gzip_header(self):
     self.fileobj.write('\037\213')  # magic header
     self.fileobj.write('\010')  # compression method
     fname = os.path.basename(self.name)
     if fname.endswith(".gz"):
         fname = fname[:-3]
     elif fname.endswith(".tgz"):
         fname = "%s.tar" % fname[:-4]
     elif fname.endswith(".wrz"):
         fname = "%s.wrl" % fname[:-4]
     flags = 0
     if fname:
         flags = gzip.FNAME
     self.fileobj.write(chr(flags))
     gzip.write32u(self.fileobj, int(time()))
     self.fileobj.write('\002')
     self.fileobj.write('\377')
     if fname:
         if sys.platform == "win32":
             # Windows is case insensitive by default (although it can be
             # set to case sensitive), so according to the GZIP spec, we
             # force the name to lowercase
             fname = fname.lower()
         self.fileobj.write(
             fname.encode("ISO-8859-1", "replace").replace("?", "_") +
             '\000')
Exemplo n.º 2
0
 def spider_closed(self, spider, reason):
     jira_id = spider.custom_settings['JIRA_ID']
     self.finish_time = datetime.datetime.now()
     self.used_time = self.finish_time - self.start_time
     files = []
     for name, compressed in self.files.items():
         compressed.fileobj.write(compressed.compress.flush())
         gzip.write32u(compressed.fileobj, compressed.crc)
         gzip.write32u(compressed.fileobj, compressed.size & 0xffffffff)
         files.append((name + compressed.extension, compressed.mimetype, compressed))
     try:
         size = self.files[spider.name + '-items.json'].size
     except KeyError:
         size = 0
     stats = spider.crawler.stats.get_stats()
     dqr_status = stats.pop('columns_stats_information', {})
     if ('downloader/exception_count' in stats and stats['downloader/exception_count'] > 0) \
         or ('log_count/ERROR' in stats and stats['log_count/ERROR'] > 0):
         subject = "failed"
     else:
         subject = "succeed"
     mailsender = MailSender.from_settings(self.settings)
     mailsender.send(to=self.settings.getlist('JOB_NOTIFICATION_EMAILS'),
                     subject='JIRA ID:{}  job ends with {}'.format(jira_id, subject),
                     # attachs=files,
                     body=Environment().from_string(config.HTML).render({'stats':stats,
                                                                         'dqr_status':dqr_status,
                                                                         'jira':jira_id,
                                                                         'size':format_size(size)}),
                     mimetype='text/html', _callback=self._catch_mail_sent)
Exemplo n.º 3
0
 def _write_gzip_header(self):
     self.fileobj.write('\037\213')
     self.fileobj.write('\010')
     # no flags
     self.fileobj.write('\x00')
     write32u(self.fileobj, LongType(0))
     self.fileobj.write('\002')
     self.fileobj.write('\377')
Exemplo n.º 4
0
 def _write_gzip_header(self):
     self.fileobj.write('\037\213')
     self.fileobj.write('\010')
     # no flags
     self.fileobj.write('\x00')
     write32u(self.fileobj, long(0))
     self.fileobj.write('\002')
     self.fileobj.write('\377')
Exemplo n.º 5
0
 def _write_gzip_header(self):
     # Generate a header that is easily reproduced with gzip -9 -n on
     # an unix-like system
     self.fileobj.write('\037\213')             # magic header
     self.fileobj.write('\010')                 # compression method
     self.fileobj.write('\000')                 # flags
     write32u(self.fileobj, long(0))            # timestamp
     self.fileobj.write('\002')                 # max compression
     self.fileobj.write('\003')                 # UNIX
Exemplo n.º 6
0
 def _write_gzip_header(self):
     # Generate a header that is easily reproduced with gzip -9 -n on
     # an unix-like system
     self.fileobj.write('\037\213')  # magic header
     self.fileobj.write('\010')  # compression method
     self.fileobj.write('\000')  # flags
     write32u(self.fileobj, long(0))  # timestamp
     self.fileobj.write('\002')  # max compression
     self.fileobj.write('\003')  # UNIX
Exemplo n.º 7
0
 def _write_gzip_header(self):
     self.fileobj.write('\037\213')             # magic header
     self.fileobj.write('\010')                 # compression method
     fname = self.filename[:-3]
     flags = 0
     if fname:
         flags = gzip.FNAME
     self.fileobj.write(chr(flags))
     gzip.write32u(self.fileobj, long(self.timestamp))
     self.fileobj.write('\002')
     self.fileobj.write('\377')
     if fname:
         self.fileobj.write(fname + '\000')
Exemplo n.º 8
0
 def _write_gzip_header(self):
     self.fileobj.write('\037\213')  # magic header
     self.fileobj.write('\010')  # compression method
     fname = self.filename[:-3]
     flags = 0
     if fname:
         flags = FNAME
     self.fileobj.write(chr(flags))
     write32u(self.fileobj, int(0))
     self.fileobj.write('\002')
     self.fileobj.write('\377')
     if fname:
         self.fileobj.write(fname + '\000')
 def _write_gzip_header(self):
     self.fileobj.write('\037\213')             # magic header
     self.fileobj.write('\010')                 # compression method
     fname = self.filename[:-3]
     flags = 0
     if fname:
         flags = gzip.FNAME
     self.fileobj.write(chr(flags))
     ## This is what WebOb patches:
     gzip.write32u(self.fileobj, long(0))
     self.fileobj.write('\002')
     self.fileobj.write('\377')
     if fname:
         self.fileobj.write(fname + '\000')
Exemplo n.º 10
0
 def _write_gzip_header(self):
     self.fileobj.write('\037\213')  # magic header
     self.fileobj.write('\010')  # compression method
     # Python 2.6 deprecates self.filename
     fname = getattr(self, 'name', None) or self.filename
     flags = 0
     if fname:
         flags = gzip.FNAME
     self.fileobj.write(chr(flags))
     gzip.write32u(self.fileobj, long(self.timestamp))
     self.fileobj.write('\002')
     self.fileobj.write('\377')
     if fname:
         self.fileobj.write(fname + '\000')
Exemplo n.º 11
0
 def _write_gzip_header(self):
     self.fileobj.write('\037\213')             # magic header
     self.fileobj.write('\010')                 # compression method
     fname = self.filename[:-3]
     flags = 0
     if fname:
         flags = FNAME
     self.fileobj.write(chr(flags))
     # don't use current time!
     write32u(self.fileobj, 0L)
     self.fileobj.write('\002')
     self.fileobj.write('\377')
     if fname:
         self.fileobj.write(fname + '\000')
Exemplo n.º 12
0
 def _write_gzip_header(self):
     self.fileobj.write('\037\213')             # magic header
     self.fileobj.write('\010')                 # compression method
     # Python 2.6 deprecates self.filename
     fname = getattr(self, 'name', None) or self.filename
     flags = 0
     if fname:
         flags = gzip.FNAME
     self.fileobj.write(chr(flags))
     gzip.write32u(self.fileobj, long(self.timestamp))
     self.fileobj.write('\002')
     self.fileobj.write('\377')
     if fname:
         self.fileobj.write(fname + '\000')
Exemplo n.º 13
0
    def close_member(self):
        """Closes the current member being written.
        """
        # The new member is not yet started, no need to close
        if self._new_member:
            return

        self.fileobj.write(self.compress.flush())
        write32u(self.fileobj, self.crc)
        # self.size may exceed 2GB, or even 4GB
        write32u(self.fileobj, self.size & 0xFFFFFFFFL)
        self.size = 0
        self.compress = zlib.compressobj(9, zlib.DEFLATED, -zlib.MAX_WBITS, zlib.DEF_MEM_LEVEL, 0)
        self._new_member = True
Exemplo n.º 14
0
 def close(self):
     if self.fileobj is None:
         return
     self.fileobj.flush()
     # What if the CRC is bad or we write it in the wrong place
     # There is no way to know this until we decompress. And then it could be too late
     # and we may end up with a coreupted archive.
     # Make sure unittets always pass.
     gzip.write32u(self.fileobj, self.crc)
     gzip.write32u(self.fileobj, self.size & 0xffffffffL)
     self.fileobj = None
     if self.myfileobj:
         self.myfileobj.close()
         self.myfileobj = None
Exemplo n.º 15
0
 def _write_gzip_header(self):
     self.fileobj.write("\037\213")  # magic header
     self.fileobj.write("\010")  # compression method
     fname = self.name
     if fname and fname.endswith(".gz"):
         fname = fname[:-3]
     flags = 0
     if fname:
         flags = gzip.FNAME
     self.fileobj.write(chr(flags))
     gzip.write32u(self.fileobj, long(self.timestamp))  # noqa
     self.fileobj.write("\002")
     self.fileobj.write("\377")
     if fname:
         self.fileobj.write(fname + "\000")
Exemplo n.º 16
0
    def close_member(self):
        """Closes the current member being written.
        """
        # The new member is not yet started, no need to close
        if self._new_member:
            return

        self.fileobj.write(self.compress.flush())
        write32u(self.fileobj, self.crc)
        # self.size may exceed 2GB, or even 4GB
        write32u(self.fileobj, self.size & 0xffffffff)
        self.size = 0
        self.compress = zlib.compressobj(9, zlib.DEFLATED, -zlib.MAX_WBITS,
                                         zlib.DEF_MEM_LEVEL, 0)
        self._new_member = True
Exemplo n.º 17
0
 def _write_gzip_header(self):
     self.fileobj.write('\037\213')  # magic header
     self.fileobj.write('\010')  # compression method
     fname = self.name
     if fname and fname.endswith('.gz'):
         fname = fname[:-3]
     flags = 0
     if fname:
         flags = gzip.FNAME
     self.fileobj.write(pycompat.bytechr(flags))
     gzip.write32u(self.fileobj, int(self.timestamp))
     self.fileobj.write('\002')
     self.fileobj.write('\377')
     if fname:
         self.fileobj.write(fname + '\000')
Exemplo n.º 18
0
    def _write_member_header(self, compressed_size, raw_size):
        self.fileobj.write(b'\037\213')  # magic header, 2 bytes
        self.fileobj.write(b'\010')  # compression method, 1 byte
        try:
            # RFC 1952 requires the FNAME field to be Latin-1. Do not
            # include filenames that cannot be represented that way.
            fname = os.path.basename(self.name)
            if not isinstance(fname, bytes):
                fname = fname.encode('latin-1')
            if fname.endswith(b'.gz'):
                fname = fname[:-3]
        except UnicodeEncodeError:
            fname = b''
        flags = FEXTRA
        if fname:
            flags |= FNAME
        self.fileobj.write(chr(flags).encode('latin-1'))  # flags, 1 byte
        mtime = self._write_mtime
        if mtime is None:
            mtime = time.time()
        write32u(self.fileobj, int(mtime))  # modified time, 4 bytes
        self.fileobj.write(b'\002')  # fixed flag (maximum compression), 1 byte
        self.fileobj.write(b'\377')  # OS (unknown), 1 byte

        # write extra flag for indexing
        # XLEN, 20 bytes
        self.fileobj.write(b'\x14\x00')  # extra flag len, 2 bytes
        # EXTRA FLAG FORMAT:
        # +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
        # |SI1|SI2|  LEN  |       MEMBER SIZE (8 Bytes)   |       RAW SIZE (8 Bytes)      |
        # +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
        # SI1, SI2:      Subfield ID, 'IG' (Indexed Gzip file)
        # LEN:           Length of subfield body, always 16 bits
        # MEMBER SIZE:   The size of current member
        # RAW SIZE:      Raw text size in uint64 (since raw size is not able to represent >4GB file)
        self.fileobj.write(SID)  # subfield ID (IG), 2 bytes
        # LEN: 16 bytes
        self.fileobj.write(b'\x10\x00')  # subfield len (16), 2 bytes
        # compressed data size: 16 + 8 + 8 + len(fname) + 1 + data + 8
        member_size = 32 + len(fname) + 1 + compressed_size + 8
        self.fileobj.write(struct.pack("<Q",
                                       member_size))  # member size, 8 bytes
        # raw data size:
        self.fileobj.write(struct.pack("<Q",
                                       raw_size))  # raw data size, 8 bytes
        if fname:
            self.fileobj.write(fname + b'\000')
        return member_size
Exemplo n.º 19
0
 def _write_gzip_header(self):
     self.fileobj.write("\037\213")  # magic header
     self.fileobj.write("\010")  # compression method
     # Python 2.6 deprecates self.filename
     fname = getattr(self, "name", None) or self.filename
     if fname and fname.endswith(".gz"):
         fname = fname[:-3]
     flags = 0
     if fname:
         flags = gzip.FNAME
     self.fileobj.write(chr(flags))
     gzip.write32u(self.fileobj, long(self.timestamp))
     self.fileobj.write("\002")
     self.fileobj.write("\377")
     if fname:
         self.fileobj.write(fname + "\000")
Exemplo n.º 20
0
    def _write_member_header(self, compressed_size, raw_size):
        self.fileobj.write(b'\037\213')  # magic header, 2 bytes
        self.fileobj.write(b'\010')  # compression method, 1 byte
        try:
            # RFC 1952 requires the FNAME field to be Latin-1. Do not
            # include filenames that cannot be represented that way.
            fname = os.path.basename(self.name)
            if not isinstance(fname, bytes):
                fname = fname.encode('latin-1')
            if fname.endswith(b'.gz'):
                fname = fname[:-3]
        except UnicodeEncodeError:
            fname = b''
        flags = FEXTRA
        if fname:
            flags |= FNAME
        self.fileobj.write(chr(flags).encode('latin-1'))  # flags, 1 byte
        mtime = self._write_mtime
        if mtime is None:
            mtime = time.time()
        write32u(self.fileobj, int(mtime))  # modified time, 4 bytes
        self.fileobj.write(b'\002')  # fixed flag (maximum compression), 1 byte
        self.fileobj.write(b'\377')  # OS (unknown), 1 byte

        # write extra flag for indexing
        # XLEN, 8 bytes
        self.fileobj.write(b'\x08\x00')  # extra flag len, 2 bytes
        # EXTRA FLAG FORMAT:
        # +---+---+---+---+---+---+---+---+
        # |SI1|SI2|  LEN  |  MEMBER SIZE  |
        # +---+---+---+---+---+---+---+---+
        # SI1, SI2:      Subfield ID, 'IG' (Indexed Gzip file)
        # LEN:           Length of subfield body, always 4 (bytes)
        # MEMBER SIZE:   The size of current member
        self.fileobj.write(SID)  # subfield ID (IG), 2 bytes
        # LEN: 4 bytes
        self.fileobj.write(b'\x04\x00')  # subfield len (4), 2 bytes
        # compressed data size: 16 + 4 + len(fname) + 1 + data + 8
        #                       header + member size + filename with zero end + data block + CRC32 and ISIZE
        member_size = 20 + len(fname) + 1 + compressed_size + 8
        if not fname:
            member_size -= 1
        self.fileobj.write(struct.pack("<I",
                                       member_size))  # member size, 4 bytes
        if fname:
            self.fileobj.write(fname + b'\000')
        return member_size
Exemplo n.º 21
0
 def close(self):
     fileobj = self.fileobj
     if fileobj is None:
         return
     self.fileobj = None
     try:
         if self.mode == gzip.WRITE:
             fileobj.write(self.compress.flush(Z_FINISH))
             gzip.write32u(fileobj, self.crc)
             # self.size may exceed 2GB, or even 4GB
             gzip.write32u(fileobj, self.size & 0xffffffff)
             fileobj.flush()
     finally:
         myfileobj = self.myfileobj
         if myfileobj:
             self.myfileobj = None
             myfileobj.close()
Exemplo n.º 22
0
 def close(self):
     fileobj = self.fileobj
     if fileobj is None:
         return
     self.fileobj = None
     try:
         if self.mode == gzip.WRITE:
             fileobj.write(self.compress.flush(Z_FINISH))
             gzip.write32u(fileobj, self.crc)
             # self.size may exceed 2GB, or even 4GB
             gzip.write32u(fileobj, self.size & 0xffffffffL)
             fileobj.flush()
     finally:
         myfileobj = self.myfileobj
         if myfileobj:
             self.myfileobj = None
             myfileobj.close()
Exemplo n.º 23
0
 def _write_member(self, cdata):
     """
         Write a compressed data as a complete gzip member
         Input:
             cdata:
                 compressed data, a tuple of compressed result returned by _compress_func()
         Return:
             size of member
     """
     size = self._write_member_header(
         len(cdata[0]) + len(cdata[1]) + len(cdata[2]), cdata[4])
     self.fileobj.write(cdata[0])  # buffer data
     self.fileobj.write(cdata[1])  # body data
     self.fileobj.write(cdata[2])  # rest data
     write32u(self.fileobj, cdata[3])  # CRC32
     write32u(self.fileobj,
              cdata[4] & 0xffffffff)  # raw data size in 32bits
     return size
Exemplo n.º 24
0
 def _write_gzip_header(self):
     self.fileobj.write('\x1f\x8b')  # magic header
     self.fileobj.write('\x08')  # compression method
     if hasattr(self, 'name'):
         # 2.6
         fname = self.name
         if fname.endswith(".gz"):
             fname = fname[:-3]
     else:
         fname = self.filename[:-3]
     flags = 0
     if fname:
         flags = gzip.FNAME
     self.fileobj.write(chr(flags))
     ## This is what WebOb patches:
     gzip.write32u(self.fileobj, long(0))
     self.fileobj.write('\x02\xff')
     if fname:
         self.fileobj.write(fname + '\x00')
Exemplo n.º 25
0
 def _write_gzip_header(self):
     self.fileobj.write('\037\213')             # magic header
     self.fileobj.write('\010')                 # compression method
     # Python 2.6 introduced self.name and deprecated self.filename
     try:
         fname = self.name
     except AttributeError:
         fname = self.filename
     if fname and fname.endswith('.gz'):
         fname = fname[:-3]
     flags = 0
     if fname:
         flags = gzip.FNAME
     self.fileobj.write(chr(flags))
     gzip.write32u(self.fileobj, long(self.timestamp))
     self.fileobj.write('\002')
     self.fileobj.write('\377')
     if fname:
         self.fileobj.write(fname + '\000')
Exemplo n.º 26
0
 def _write_gzip_header(self):
     self.fileobj.write('\037\213')  # magic header
     self.fileobj.write('\010')  # compression method
     # Python 2.6 introduced self.name and deprecated self.filename
     try:
         fname = self.name
     except AttributeError:
         fname = self.filename
     if fname and fname.endswith('.gz'):
         fname = fname[:-3]
     flags = 0
     if fname:
         flags = gzip.FNAME
     self.fileobj.write(chr(flags))
     gzip.write32u(self.fileobj, long(self.timestamp))
     self.fileobj.write('\002')
     self.fileobj.write('\377')
     if fname:
         self.fileobj.write(fname + '\000')
Exemplo n.º 27
0
 def _write_gzip_header(self):
     self.fileobj.write('\x1f\x8b') # magic header
     self.fileobj.write('\x08') # compression method
     if hasattr(self, 'name'):
         # 2.6
         fname = self.name
         if fname.endswith(".gz"):
             fname = fname[:-3]
     else:
         fname = self.filename[:-3]
     flags = 0
     if fname:
         flags = gzip.FNAME
     self.fileobj.write(chr(flags))
     ## This is what WebOb patches:
     gzip.write32u(self.fileobj, long(0))
     self.fileobj.write('\x02\xff')
     if fname:
         self.fileobj.write(fname + '\x00')
Exemplo n.º 28
0
 def update_event(self, inp=-1):
     self.set_output_val(0, gzip.write32u(self.input(0), self.input(1)))