def __loadBlock(self): frame = self.__frames[self.__iFrame] # Read block's size raw = self.file.read(4) if len(raw) != 4: raise IOError('LZ4 block has been truncated') sizeBlock = struct.unpack('<I', raw)[0] sizeBlock &= 0x7fffffff # EndMark reached # - push end of mark block and content checksum in decompressor engine (maybe useless ...) # - mark current frame as empty if sizeBlock == 0: res = lz4f.decompressFrame(raw, self.__ctx) #print " next={0}".format(res['next']) if len(res['decomp']) != 0: raise IOError('Unexpected output') # Read content checksum if enabled if frame.bContentChecksum: raw = self.file.read(4) if len(raw) != 4: raise IOError('Not enought data for content checksum') # TODO check content checksum res = lz4f.decompressFrame(raw, self.__ctx) #print " next={0}".format(res['next']) if len(res['decomp']) != 0: raise IOError('Unexpected output') frame.empty = True # Another block to process, go back in file else: self.file.seek(-4, 1) toRead = 4 + sizeBlock + (4 if frame.bBlockChecksum else 0) raw = self.file.read(toRead) if (len(raw) != toRead): raise IOError('LZ4 block has been truncated') res = lz4f.decompressFrame(raw, self.__ctx) #print " next={0}".format(res['next']) self.__blockdata = res['decomp'] self.__blockpos = 0
def __loadBlock(self): frame = self.__frames[self.__iFrame] # Read block's size raw = self.file.read(4) if len(raw) != 4: raise IOError('LZ4 block has been truncated') sizeBlock = struct.unpack('<I', raw)[0] sizeBlock &= 0x7fffffff # EndMark reached # - push end of mark block and content checksum in decompressor engine (maybe useless ...) # - mark current frame as empty if sizeBlock == 0: res = lz4f.decompressFrame(raw, self.__ctx) #print " next={0}".format(res['next']) if len(res['decomp']) != 0: raise IOError('Unexpected output') # Read content checksum if enabled if frame.bContentChecksum: raw = self.file.read(4) if len(raw) != 4: raise IOError('Not enought data for content checksum') # TODO check content checksum res = lz4f.decompressFrame(raw, self.__ctx) #print " next={0}".format(res['next']) if len(res['decomp']) != 0: raise IOError('Unexpected output') frame.empty = True # Another block to process, go back in file else: self.file.seek(-4, 1) toRead = 4 + sizeBlock + (4 if frame.bBlockChecksum else 0) raw = self.file.read(toRead) if(len(raw) != toRead): raise IOError('LZ4 block has been truncated') res = lz4f.decompressFrame(raw, self.__ctx) #print " next={0}".format(res['next']) self.__blockdata = res['decomp'] self.__blockpos = 0
def read_block(self, blkSize=None, blk=None, setCur=True): """ :type int: blkSize - returned from get_block_size() :type dict: blk - entry from blkDict :type bool: setCur - update current blk var Reads the next block, unless provided a blk from blkDict. If provided a blk, it will read that specific block. """ if blk: self.fileObj.seek(blk.get('comp_begin')) blkSize = blk.get('blkSize') if not blkSize: blkSize = self.get_block_size() if blkSize == 0: return '' if setCur: try: iteritems = self.blkDict.iteritems except AttributeError: iteritems = self.blkDict.items self.curBlk = [num for num, b in iteritems() if self.fileObj.tell() == b.get('comp_begin')][0] if (self.fileObj.tell() + blkSize + 8) == self.compEnd: blkSize += 8 regen = True compData = self.fileObj.read(blkSize) #resultDict = lz4f.decompressFrame(compData, self.dCtx, self.blkSizeID) resultDict = lz4f.decompressFrame(compData, self.dCtx) if 'regen' in locals(): self._regenDCTX() return resultDict.get('decomp')
def read_block(self, blkSize=None, blk=None, setCur=True): """ :type int: blkSize - returned from get_block_size() :type dict: blk - entry from blkDict :type bool: setCur - update current blk var Reads the next block, unless provided a blk from blkDict. If provided a blk, it will read that specific block. """ if blk: self.fileObj.seek(blk.get('comp_begin')) blkSize = blk.get('blkSize') if not blkSize: blkSize = self.get_block_size() if blkSize == 0: return '' if setCur: try: iteritems = self.blkDict.iteritems except AttributeError: iteritems = self.blkDict.items self.curBlk = [num for num, b in iteritems() if self.fileObj.tell() == b.get('comp_begin')][0] if (self.fileObj.tell() + blkSize + 8) == self.compEnd: blkSize += 8 regen = True compData = self.fileObj.read(blkSize) resultDict = lz4f.decompressFrame(compData, self.dCtx, self.blkSizeID) if 'regen' in locals(): self._regenDCTX() return resultDict.get('decomp')
def __loadFrame(self): self.__iFrame += 1 self.__blockdata = '' self.__blockpos = 0 # if new index reaches the end of frame array, try to read new one if self.__iFrame >= len(self.__frames): frame = self.__readFrame() # No more frame ? if frame is None: return False self.__frames.append(frame) # Use an already discovered frame else: frame = self.__frames[self.__iFrame] # Init decompression context with this frame header raw = self.file.read(frame.szHeader) res = lz4f.decompressFrame(raw, self.__ctx) if len(res['decomp']) != 0: raise IOError('Unexpected output') return True
def lz4_decode(payload): """Decode payload using interoperable LZ4 framing. Requires Kafka >= 0.10""" # pylint: disable-msg=no-member ctx = lz4f.createDecompContext() data = lz4f.decompressFrame(payload, ctx) # lz4f python module does not expose how much of the payload was # actually read if the decompression was only partial. if data['next'] != 0: raise RuntimeError('lz4f unable to decompress full payload') return data['decomp']
def lz4_decode(payload): # Kafka's LZ4 code has a bug in its header checksum implementation header_size = 7 if isinstance(payload[4], int): flg = payload[4] else: flg = ord(payload[4]) content_size_bit = ((flg >> 3) & 1) if content_size_bit: header_size += 8 # This should be the correct hc hc = xxhash.xxh32(payload[4:header_size-1]).digest()[-2:-1] # pylint: disable-msg=no-member munged_payload = b''.join([ payload[0:header_size-1], hc, payload[header_size:] ]) cCtx = lz4f.createCompContext() # pylint: disable-msg=no-member data = lz4f.decompressFrame(munged_payload, cCtx) # pylint: disable-msg=no-member return data['decomp']
def lz4_decode(payload): # Kafka's LZ4 code has a bug in its header checksum implementation header_size = 7 if isinstance(payload[4], int): flg = payload[4] else: flg = ord(payload[4]) content_size_bit = ((flg >> 3) & 1) if content_size_bit: header_size += 8 # This should be the correct hc hc = xxhash.xxh32( payload[4:header_size - 1]).digest()[-2:-1] # pylint: disable-msg=no-member munged_payload = b''.join( [payload[0:header_size - 1], hc, payload[header_size:]]) cCtx = lz4f.createCompContext() # pylint: disable-msg=no-member data = lz4f.decompressFrame(munged_payload, cCtx) # pylint: disable-msg=no-member return data['decomp']
def extract_slice_data(slison_zip, data_filename, value_type): with slison_zip.open(data_filename) as slice_file: data = lz4f.decompressFrame(slice_file.read(), dCtx=lz4f.createDecompContext()) slice_data = np.frombuffer(data['decomp'], dtype=value_type) return slice_data