def createFields(self): yield textHandler(Bits(self, "blockheader", 48, "Block header"), hexadecimal) if self["blockheader"].value != 0x314159265359: # pi raise ParserError("Invalid block header!") yield textHandler(UInt32(self, "crc32", "CRC32 for this block"), hexadecimal) yield Bit(self, "randomized", "Is this block randomized?") yield Bits(self, "orig_bwt_pointer", 24, "Starting pointer into BWT after untransform") yield GenericVector(self, "huffman_used_map", 16, Bit, 'block_used', "Bitmap showing which blocks (representing 16 literals each) are in use") symbols_used = [] for index, block_used in enumerate(self["huffman_used_map"].array('block_used')): if block_used.value: start_index = index*16 field = Bzip2Bitmap(self, "huffman_used_bitmap[%i]"%index, 16, start_index, "Bitmap for block %i (literals %i to %i) showing which symbols are in use"%(index, start_index, start_index + 15)) yield field for i, used in enumerate(field): if used.value: symbols_used.append(start_index + i) yield Bits(self, "huffman_groups", 3, "Number of different Huffman tables in use") yield Bits(self, "selectors_used", 15, "Number of times the Huffman tables are switched") yield Bzip2Selectors(self, "selectors_list", self["huffman_groups"].value) trees = [] for group in xrange(self["huffman_groups"].value): field = Bzip2Lengths(self, "huffman_lengths[]", len(symbols_used)+2) yield field trees.append(field.tree) counter = 0 rle_run = 0 selector_tree = None while True: if counter%50 == 0: select_id = self["selectors_list"].array("selector_list")[counter//50].realvalue selector_tree = trees[select_id] field = HuffmanCode(self, "huffman_code[]", selector_tree) if field.realvalue in [0, 1]: # RLE codes if rle_run == 0: rle_power = 1 rle_run += (field.realvalue + 1) * rle_power rle_power <<= 1 field._description = "RLE Run Code %i (for %r); Total accumulated run %i (Huffman Code %i)" % (field.realvalue, chr(symbols_used[0]), rle_run, field.value) elif field.realvalue == len(symbols_used)+1: field._description = "Block Terminator (%i) (Huffman Code %i)"%(field.realvalue, field.value) yield field break else: rle_run = 0 move_to_front(symbols_used, field.realvalue-1) field._description = "Literal %r (value %i) (Huffman Code %i)"%(chr(symbols_used[0]), field.realvalue, field.value) yield field if field.realvalue == len(symbols_used)+1: break counter += 1
def createFields(self): for i in xrange(20): yield Bits(self, "pretree_lengths[]", 4) pre_tree = build_tree( [self['pretree_lengths[%d]' % x].value for x in xrange(20)]) if not hasattr(self.root, "lzx_tree_lengths_" + self.name): self.lengths = [0] * self.num_elements setattr(self.root, "lzx_tree_lengths_" + self.name, self.lengths) else: self.lengths = getattr(self.root, "lzx_tree_lengths_" + self.name) i = 0 while i < self.num_elements: field = HuffmanCode(self, "tree_code[]", pre_tree) if field.realvalue <= 16: self.lengths[i] = (self.lengths[i] - field.realvalue) % 17 field._description = "Literal tree delta length %i (new length value %i for element %i)" % ( field.realvalue, self.lengths[i], i) i += 1 yield field elif field.realvalue == 17: field._description = "Tree Code 17: Zeros for 4-19 elements" yield field extra = Bits(self, "extra[]", 4) zeros = 4 + extra.value extra._description = "Extra bits: zeros for %i elements (elements %i through %i)" % ( zeros, i, i + zeros - 1) yield extra self.lengths[i:i + zeros] = [0] * zeros i += zeros elif field.realvalue == 18: field._description = "Tree Code 18: Zeros for 20-51 elements" yield field extra = Bits(self, "extra[]", 5) zeros = 20 + extra.value extra._description = "Extra bits: zeros for %i elements (elements %i through %i)" % ( zeros, i, i + zeros - 1) yield extra self.lengths[i:i + zeros] = [0] * zeros i += zeros elif field.realvalue == 19: field._description = "Tree Code 19: Same code for 4-5 elements" yield field extra = Bits(self, "extra[]", 1) run = 4 + extra.value extra._description = "Extra bits: run for %i elements (elements %i through %i)" % ( run, i, i + run - 1) yield extra newfield = HuffmanCode(self, "tree_code[]", pre_tree) assert newfield.realvalue <= 16 newfield._description = "Literal tree delta length %i (new length value %i for elements %i through %i)" % ( newfield.realvalue, self.lengths[i], i, i + run - 1) self.lengths[i:i + run] = [ (self.lengths[i] - newfield.realvalue) % 17 ] * run i += run yield newfield
def createFields(self): for i in xrange(20): yield Bits(self, "pretree_lengths[]", 4) pre_tree = build_tree([self['pretree_lengths[%d]'%x].value for x in xrange(20)]) if not hasattr(self.root, "lzx_tree_lengths_"+self.name): self.lengths = [0] * self.num_elements setattr(self.root, "lzx_tree_lengths_"+self.name, self.lengths) else: self.lengths = getattr(self.root, "lzx_tree_lengths_"+self.name) i = 0 while i < self.num_elements: field = HuffmanCode(self, "tree_code[]", pre_tree) if field.realvalue <= 16: self.lengths[i] = (self.lengths[i] - field.realvalue) % 17 field._description = "Literal tree delta length %i (new length value %i for element %i)" % ( field.realvalue, self.lengths[i], i) i += 1 yield field elif field.realvalue == 17: field._description = "Tree Code 17: Zeros for 4-19 elements" yield field extra = Bits(self, "extra[]", 4) zeros = 4 + extra.value extra._description = "Extra bits: zeros for %i elements (elements %i through %i)" % (zeros, i, i+zeros-1) yield extra self.lengths[i:i+zeros] = [0] * zeros i += zeros elif field.realvalue == 18: field._description = "Tree Code 18: Zeros for 20-51 elements" yield field extra = Bits(self, "extra[]", 5) zeros = 20 + extra.value extra._description = "Extra bits: zeros for %i elements (elements %i through %i)" % (zeros, i, i+zeros-1) yield extra self.lengths[i:i+zeros] = [0] * zeros i += zeros elif field.realvalue == 19: field._description = "Tree Code 19: Same code for 4-5 elements" yield field extra = Bits(self, "extra[]", 1) run = 4 + extra.value extra._description = "Extra bits: run for %i elements (elements %i through %i)" % (run, i, i+run-1) yield extra newfield = HuffmanCode(self, "tree_code[]", pre_tree) assert newfield.realvalue <= 16 newfield._description = "Literal tree delta length %i (new length value %i for elements %i through %i)" % ( newfield.realvalue, self.lengths[i], i, i+run-1) self.lengths[i:i+run] = [(self.lengths[i] - newfield.realvalue) % 17] * run i += run yield newfield
def createFields(self): yield textHandler(Bits(self, "blockheader", 48, "Block header"), hexadecimal) if self["blockheader"].value != 0x314159265359: # pi raise ParserError("Invalid block header!") yield textHandler(UInt32(self, "crc32", "CRC32 for this block"), hexadecimal) yield Bit(self, "randomized", "Is this block randomized?") yield Bits(self, "orig_bwt_pointer", 24, "Starting pointer into BWT after untransform") yield GenericVector( self, "huffman_used_map", 16, Bit, 'block_used', "Bitmap showing which blocks (representing 16 literals each) are in use" ) symbols_used = [] for index, block_used in enumerate( self["huffman_used_map"].array('block_used')): if block_used.value: start_index = index * 16 field = Bzip2Bitmap( self, "huffman_used_bitmap[%i]" % index, 16, start_index, "Bitmap for block %i (literals %i to %i) showing which symbols are in use" % (index, start_index, start_index + 15)) yield field for i, used in enumerate(field): if used.value: symbols_used.append(start_index + i) yield Bits(self, "huffman_groups", 3, "Number of different Huffman tables in use") yield Bits(self, "selectors_used", 15, "Number of times the Huffman tables are switched") yield Bzip2Selectors(self, "selectors_list", self["huffman_groups"].value) trees = [] for group in xrange(self["huffman_groups"].value): field = Bzip2Lengths(self, "huffman_lengths[]", len(symbols_used) + 2) yield field trees.append(field.tree) counter = 0 rle_run = 0 selector_tree = None while True: if counter % 50 == 0: select_id = self["selectors_list"].array("selector_list")[ counter // 50].realvalue selector_tree = trees[select_id] field = HuffmanCode(self, "huffman_code[]", selector_tree) if field.realvalue in [0, 1]: # RLE codes if rle_run == 0: rle_power = 1 rle_run += (field.realvalue + 1) * rle_power rle_power <<= 1 field._description = "RLE Run Code %i (for %r); Total accumulated run %i (Huffman Code %i)" % ( field.realvalue, chr( symbols_used[0]), rle_run, field.value) elif field.realvalue == len(symbols_used) + 1: field._description = "Block Terminator (%i) (Huffman Code %i)" % ( field.realvalue, field.value) yield field break else: rle_run = 0 move_to_front(symbols_used, field.realvalue - 1) field._description = "Literal %r (value %i) (Huffman Code %i)" % ( chr(symbols_used[0]), field.realvalue, field.value) yield field if field.realvalue == len(symbols_used) + 1: break counter += 1
def createFields(self): yield Bits(self, "block_type", 3) yield Bits(self, "block_size", 24) self.uncompressed_size = self["block_size"].value self.compression_level = self.root.compr_level self.window_size = self.WINDOW_SIZE[self.compression_level] self.block_type = self["block_type"].value curlen = len(self.parent.uncompressed_data) if self.block_type in (1, 2): # Verbatim or aligned offset block if self.block_type == 2: for i in xrange(8): yield Bits(self, "aligned_len[]", 3) aligned_tree = build_tree([self['aligned_len[%d]'%i].value for i in xrange(8)]) yield LZXPreTreeEncodedTree(self, "main_tree_start", 256) yield LZXPreTreeEncodedTree(self, "main_tree_rest", self.window_size * 8) main_tree = build_tree(self["main_tree_start"].lengths + self["main_tree_rest"].lengths) yield LZXPreTreeEncodedTree(self, "length_tree", 249) length_tree = build_tree(self["length_tree"].lengths) current_decoded_size = 0 while current_decoded_size < self.uncompressed_size: if (curlen+current_decoded_size) % 32768 == 0 and (curlen+current_decoded_size) != 0: padding = paddingSize(self.address + self.current_size, 16) if padding: yield PaddingBits(self, "padding[]", padding) field = HuffmanCode(self, "main_code[]", main_tree) if field.realvalue < 256: field._description = "Literal value %r" % chr(field.realvalue) current_decoded_size += 1 self.parent.uncompressed_data += chr(field.realvalue) yield field continue position_header, length_header = divmod(field.realvalue - 256, 8) info = self.POSITION_SLOTS[position_header] if info[2] == 0: if info[0] == 0: position = self.parent.r0 field._description = "Position Slot %i, Position [R0] (%i)" % (position_header, position) elif info[0] == 1: position = self.parent.r1 self.parent.r1 = self.parent.r0 self.parent.r0 = position field._description = "Position Slot %i, Position [R1] (%i)" % (position_header, position) elif info[0] == 2: position = self.parent.r2 self.parent.r2 = self.parent.r0 self.parent.r0 = position field._description = "Position Slot %i, Position [R2] (%i)" % (position_header, position) else: position = info[0] - 2 self.parent.r2 = self.parent.r1 self.parent.r1 = self.parent.r0 self.parent.r0 = position field._description = "Position Slot %i, Position %i" % (position_header, position) else: field._description = "Position Slot %i, Positions %i to %i" % (position_header, info[0] - 2, info[1] - 2) if length_header == 7: field._description += ", Length Values 9 and up" yield field length_field = HuffmanCode(self, "length_code[]", length_tree) length = length_field.realvalue + 9 length_field._description = "Length Code %i, total length %i" % (length_field.realvalue, length) yield length_field else: field._description += ", Length Value %i (Huffman Code %i)"%(length_header + 2, field.value) yield field length = length_header + 2 if info[2]: if self.block_type == 1 or info[2] < 3: # verbatim extrafield = Bits(self, "position_extra[%s" % field.name.split('[')[1], info[2]) position = extrafield.value + info[0] - 2 extrafield._description = "Position Extra Bits (%i), total position %i"%(extrafield.value, position) yield extrafield else: # aligned offset position = info[0] - 2 if info[2] > 3: extrafield = Bits(self, "position_verbatim[%s" % field.name.split('[')[1], info[2]-3) position += extrafield.value*8 extrafield._description = "Position Verbatim Bits (%i), added position %i"%(extrafield.value, extrafield.value*8) yield extrafield if info[2] >= 3: extrafield = HuffmanCode(self, "position_aligned[%s" % field.name.split('[')[1], aligned_tree) position += extrafield.realvalue extrafield._description = "Position Aligned Bits (%i), total position %i"%(extrafield.realvalue, position) yield extrafield self.parent.r2 = self.parent.r1 self.parent.r1 = self.parent.r0 self.parent.r0 = position self.parent.uncompressed_data = extend_data(self.parent.uncompressed_data, length, position) current_decoded_size += length elif self.block_type == 3: # Uncompressed block padding = paddingSize(self.address + self.current_size, 16) if padding: yield PaddingBits(self, "padding[]", padding) else: yield PaddingBits(self, "padding[]", 16) self.endian = LITTLE_ENDIAN yield UInt32(self, "r[]", "New value of R0") yield UInt32(self, "r[]", "New value of R1") yield UInt32(self, "r[]", "New value of R2") self.parent.r0 = self["r[0]"].value self.parent.r1 = self["r[1]"].value self.parent.r2 = self["r[2]"].value yield RawBytes(self, "data", self.uncompressed_size) self.parent.uncompressed_data+=self["data"].value if self["block_size"].value % 2: yield PaddingBits(self, "padding", 8) else: raise ParserError("Unknown block type %d!"%self.block_type)
def createFields(self): yield Bits(self, "block_type", 3) yield Bits(self, "block_size", 24) self.uncompressed_size = self["block_size"].value self.compression_level = self.root.compr_level self.window_size = self.WINDOW_SIZE[self.compression_level] self.block_type = self["block_type"].value curlen = len(self.parent.uncompressed_data) if self.block_type in (1, 2): # Verbatim or aligned offset block if self.block_type == 2: for i in xrange(8): yield Bits(self, "aligned_len[]", 3) aligned_tree = build_tree( [self['aligned_len[%d]' % i].value for i in xrange(8)]) yield LZXPreTreeEncodedTree(self, "main_tree_start", 256) yield LZXPreTreeEncodedTree(self, "main_tree_rest", self.window_size * 8) main_tree = build_tree(self["main_tree_start"].lengths + self["main_tree_rest"].lengths) yield LZXPreTreeEncodedTree(self, "length_tree", 249) length_tree = build_tree(self["length_tree"].lengths) current_decoded_size = 0 while current_decoded_size < self.uncompressed_size: if (curlen + current_decoded_size) % 32768 == 0 and ( curlen + current_decoded_size) != 0: padding = paddingSize(self.address + self.current_size, 16) if padding: yield PaddingBits(self, "padding[]", padding) field = HuffmanCode(self, "main_code[]", main_tree) if field.realvalue < 256: field._description = "Literal value %r" % chr( field.realvalue) current_decoded_size += 1 self.parent.uncompressed_data += chr(field.realvalue) yield field continue position_header, length_header = divmod( field.realvalue - 256, 8) info = self.POSITION_SLOTS[position_header] if info[2] == 0: if info[0] == 0: position = self.parent.r0 field._description = "Position Slot %i, Position [R0] (%i)" % ( position_header, position) elif info[0] == 1: position = self.parent.r1 self.parent.r1 = self.parent.r0 self.parent.r0 = position field._description = "Position Slot %i, Position [R1] (%i)" % ( position_header, position) elif info[0] == 2: position = self.parent.r2 self.parent.r2 = self.parent.r0 self.parent.r0 = position field._description = "Position Slot %i, Position [R2] (%i)" % ( position_header, position) else: position = info[0] - 2 self.parent.r2 = self.parent.r1 self.parent.r1 = self.parent.r0 self.parent.r0 = position field._description = "Position Slot %i, Position %i" % ( position_header, position) else: field._description = "Position Slot %i, Positions %i to %i" % ( position_header, info[0] - 2, info[1] - 2) if length_header == 7: field._description += ", Length Values 9 and up" yield field length_field = HuffmanCode(self, "length_code[]", length_tree) length = length_field.realvalue + 9 length_field._description = "Length Code %i, total length %i" % ( length_field.realvalue, length) yield length_field else: field._description += ", Length Value %i (Huffman Code %i)" % ( length_header + 2, field.value) yield field length = length_header + 2 if info[2]: if self.block_type == 1 or info[2] < 3: # verbatim extrafield = Bits( self, "position_extra[%s" % field.name.split('[')[1], info[2]) position = extrafield.value + info[0] - 2 extrafield._description = "Position Extra Bits (%i), total position %i" % ( extrafield.value, position) yield extrafield else: # aligned offset position = info[0] - 2 if info[2] > 3: extrafield = Bits( self, "position_verbatim[%s" % field.name.split('[')[1], info[2] - 3) position += extrafield.value * 8 extrafield._description = "Position Verbatim Bits (%i), added position %i" % ( extrafield.value, extrafield.value * 8) yield extrafield if info[2] >= 3: extrafield = HuffmanCode( self, "position_aligned[%s" % field.name.split('[')[1], aligned_tree) position += extrafield.realvalue extrafield._description = "Position Aligned Bits (%i), total position %i" % ( extrafield.realvalue, position) yield extrafield self.parent.r2 = self.parent.r1 self.parent.r1 = self.parent.r0 self.parent.r0 = position self.parent.uncompressed_data = extend_data( self.parent.uncompressed_data, length, position) current_decoded_size += length elif self.block_type == 3: # Uncompressed block padding = paddingSize(self.address + self.current_size, 16) if padding: yield PaddingBits(self, "padding[]", padding) else: yield PaddingBits(self, "padding[]", 16) self.endian = LITTLE_ENDIAN yield UInt32(self, "r[]", "New value of R0") yield UInt32(self, "r[]", "New value of R1") yield UInt32(self, "r[]", "New value of R2") self.parent.r0 = self["r[0]"].value self.parent.r1 = self["r[1]"].value self.parent.r2 = self["r[2]"].value yield RawBytes(self, "data", self.uncompressed_size) self.parent.uncompressed_data += self["data"].value if self["block_size"].value % 2: yield PaddingBits(self, "padding", 8) else: raise ParserError("Unknown block type %d!" % self.block_type)