def test_bits_read( self ): data = bytes([0b10010010, 0b01001010, 0b10101010, 0b10111111]) bs = bits.BitStream( data ) self.assertEqual( bs.read( 3 ), 0b100 ) self.assertEqual( bs.read( 3 ), 0b100 ) self.assertEqual( bs.read( 3 ), 0b100 ) self.assertEqual( bs.read( 3 ), 0b100 ) self.assertEqual( bs.read( 14 ), 0b10101010101010 ) bs = bits.BitStream( data, io_endian='little' ) self.assertEqual( bs.read( 3 ), 0b001 ) self.assertEqual( bs.read( 3 ), 0b001 ) self.assertEqual( bs.read( 3 ), 0b001 ) self.assertEqual( bs.read( 3 ), 0b001 ) self.assertEqual( bs.read( 14 ), 0b01010101010101 ) bs = bits.BitStream( data, start_offset=len( data ) - 1, bytes_reverse=True ) self.assertEqual( bs.read( 17 ), 0b10111111101010100 ) self.assertEqual( bs.read( 3 ), 0b100 ) self.assertEqual( bs.read( 4 ), 0b1010 ) self.assertEqual( bs.read( 3 ), 0b100 ) self.assertEqual( bs.read( 3 ), 0b100 ) bs = bits.BitStream( data, start_offset=(len( data ) - 1, 7), bytes_reverse=True, io_endian='little', bit_endian='little' ) self.assertEqual( bs.read( 6 ), 0b111111 ) self.assertEqual( bs.read( 14 ), 0b10101010101010 ) self.assertEqual( bs.read( 3 ), 0b100 ) self.assertEqual( bs.read( 3 ), 0b100 ) self.assertEqual( bs.read( 3 ), 0b100 ) self.assertEqual( bs.read( 3 ), 0b100 )
def dump_stream( cls, frames ): writer = bits.BitStream( bytearray(), io_endian='big', bit_endian='little' ) for frame in frames: if isinstance( frame, SilentFrame ): writer.write( 0b0000, cls.ENERGY_BITS ) elif isinstance( frame, StopFrame ): writer.write( 0b1111, cls.ENERGY_BITS ) elif isinstance( frame, RepeatedFrame ): writer.write( find_closest_index( cls.ENERGY_LUT[cls.ENERGY_LUT_FLOOR:-1], frame.energy ) + cls.ENERGY_LUT_FLOOR, cls.ENERGY_BITS ) writer.write( 1, cls.REPEAT_BITS ) writer.write( find_closest_index( cls.PITCH_LUT, frame.pitch ), cls.PITCH_BITS ) elif isinstance( frame, UnvoicedFrame ): writer.write( find_closest_index( cls.ENERGY_LUT[cls.ENERGY_LUT_FLOOR:-1], frame.energy ) + cls.ENERGY_LUT_FLOOR, cls.ENERGY_BITS ) writer.write( 0, cls.REPEAT_BITS ) writer.write( 0, cls.PITCH_BITS ) writer.write( find_closest_index( cls.K_LUT[0], frame.k1 ), cls.K_BITS[0] ) writer.write( find_closest_index( cls.K_LUT[1], frame.k2 ), cls.K_BITS[1] ) writer.write( find_closest_index( cls.K_LUT[2], frame.k3 ), cls.K_BITS[2] ) writer.write( find_closest_index( cls.K_LUT[3], frame.k4 ), cls.K_BITS[3] ) elif isinstance( frame, VoicedFrame ): writer.write( find_closest_index( cls.ENERGY_LUT[cls.ENERGY_LUT_FLOOR:-1], frame.energy ) + cls.ENERGY_LUT_FLOOR, cls.ENERGY_BITS ) writer.write( 0, cls.REPEAT_BITS ) writer.write( find_closest_index( cls.PITCH_LUT[1:], frame.pitch ) + 1, cls.PITCH_BITS ) writer.write( find_closest_index( cls.K_LUT[0], frame.k1 ), cls.K_BITS[0] ) writer.write( find_closest_index( cls.K_LUT[1], frame.k2 ), cls.K_BITS[1] ) writer.write( find_closest_index( cls.K_LUT[2], frame.k3 ), cls.K_BITS[2] ) writer.write( find_closest_index( cls.K_LUT[3], frame.k4 ), cls.K_BITS[3] ) writer.write( find_closest_index( cls.K_LUT[4], frame.k5 ), cls.K_BITS[4] ) writer.write( find_closest_index( cls.K_LUT[5], frame.k6 ), cls.K_BITS[5] ) writer.write( find_closest_index( cls.K_LUT[6], frame.k7 ), cls.K_BITS[6] ) writer.write( find_closest_index( cls.K_LUT[7], frame.k8 ), cls.K_BITS[7] ) writer.write( find_closest_index( cls.K_LUT[8], frame.k9 ), cls.K_BITS[8] ) writer.write( find_closest_index( cls.K_LUT[9], frame.k10 ), cls.K_BITS[9] ) return writer.get_buffer()
def parse_stream( cls, buffer ): frames = [] reader = bits.BitStream( buffer, io_endian='big', bit_endian='little' ) while not reader.tell() == (len( buffer ), 0): energy = reader.read( cls.ENERGY_BITS ) if energy == 0b0000: frames.append( SilentFrame() ) continue if energy == 0b1111: frames.append( StopFrame() ) break repeat = reader.read( cls.REPEAT_BITS ) pitch = reader.read( cls.PITCH_BITS ) if repeat: frames.append( RepeatedFrame( energy=cls.ENERGY_LUT[energy], pitch=cls.PITCH_LUT[pitch] ) ) continue k1 = reader.read( cls.K_BITS[0] ) k2 = reader.read( cls.K_BITS[1] ) k3 = reader.read( cls.K_BITS[2] ) k4 = reader.read( cls.K_BITS[3] ) if pitch == 0b000000: frames.append( UnvoicedFrame( energy=cls.ENERGY_LUT[energy], k1=cls.K_LUT[0][k1], k2=cls.K_LUT[1][k2], k3=cls.K_LUT[2][k3], k4=cls.K_LUT[3][k4] ) ) continue k5 = reader.read( cls.K_BITS[4] ) k6 = reader.read( cls.K_BITS[5] ) k7 = reader.read( cls.K_BITS[6] ) k8 = reader.read( cls.K_BITS[7] ) k9 = reader.read( cls.K_BITS[8] ) k10 = reader.read( cls.K_BITS[9] ) #print(f'{energy:04b}, {repeat:01b}, {pitch:05b}, {k1:05b}, {k2:05b}, {k3:04b}, {k4:04b}, {k5:04b}, {k6:04b}, {k7:04b}, {k8:03b}, {k9:03b}, {k10:03b}') frames.append( VoicedFrame( energy=cls.ENERGY_LUT[energy], pitch=cls.PITCH_LUT[pitch], k1=cls.K_LUT[0][k1], k2=cls.K_LUT[1][k2], k3=cls.K_LUT[2][k3], k4=cls.K_LUT[3][k4], k5=cls.K_LUT[4][k5], k6=cls.K_LUT[5][k6], k7=cls.K_LUT[6][k7], k8=cls.K_LUT[7][k8], k9=cls.K_LUT[8][k9], k10=cls.K_LUT[9][k10] ) ) return { 'frames': frames, 'size': reader.tell()[0], }
def import_data(self, buffer, parent=None): decomp_size = utils.from_uint32_le(buffer[:4]) max_bits = utils.from_uint16_le(buffer[4:6]) # should be 12 lookup = [bytes((i, )) for i in range(256)] lookup.append(None) # 256: error lookup.append(None) # 257: end of data output = bytearray() bs = bits.BitStream(buffer, 6, bit_endian='big', io_endian='big') state = {'usebits': 9} def add_to_lookup(state, entry): if len(lookup) < (1 << max_bits): logger.debug('lookup[{}] = {}'.format(len(lookup), entry)) lookup.append(entry) if len(lookup) == (1 << state['usebits']) - 1: state['usebits'] = min(state['usebits'] + 1, max_bits) logger.debug('usebits = {}'.format(state['usebits'])) return fcode = bs.read(state['usebits']) match = lookup[fcode] logger.debug('fcode={},match={}'.format(fcode, match)) output.extend(match) while True: ncode = bs.read(state['usebits']) logger.debug('ncode={}'.format(ncode)) if ncode == 257: # end of data break elif ncode == 256: # error raise Exception('Found error code, data is not valid') elif ncode < len(lookup): nmatch = lookup[ncode] else: nmatch = match + match[0:1] logger.debug('match={}'.format(match)) logger.debug('nmatch={}'.format(nmatch)) output.extend(nmatch) # add code to lookup add_to_lookup(state, match + nmatch[0:1]) match = nmatch if len(output) != decomp_size: logger.warning( '{}: was expecting data of size {}, got data of size {} instead' .format(self, decomp_size, len(output))) return mrc.TransformResult(payload=bytes(output), end_offset=len(buffer))
def test_bits_write(self): target = bytes([0b10010010, 0b01001010, 0b10101010, 0b10111111]) bs = bits.BitStream(bytearray()) bs.write(0b100, 3) bs.write(0b100, 3) bs.write(0b100, 3) bs.write(0b100, 3) bs.write(0b10101010101010, 14) bs.write(0b111111, 6) self.assertEqual(target, bs.buffer) bs = bits.BitStream(bytearray(), io_endian='little') bs.write(0b001, 3) bs.write(0b001, 3) bs.write(0b001, 3) bs.write(0b001, 3) bs.write(0b01010101010101, 14) bs.write(0b111111, 6) self.assertEqual(target, bs.buffer) bs = bits.BitStream(bytearray(), bytes_reverse=True) bs.write(0b10111111101010100, 17) bs.write(0b100, 3) bs.write(0b1010, 4) bs.write(0b100, 3) bs.write(0b100, 3) bs.write(0b10, 2) self.assertEqual(target, bs.buffer) bs = bits.BitStream(bytearray(), bytes_reverse=True, io_endian='little', bit_endian='little') bs.write(0b111111, 6) bs.write(0b10101010101010, 14) bs.write(0b100, 3) bs.write(0b100, 3) bs.write(0b100, 3) bs.write(0b100, 3) self.assertEqual(target, bs.buffer)
def import_data(self, buffer, parent=None): assert utils.is_bytes(buffer) inline_copy_bits = buffer[0] high_priority_flag = mrc.Bits(0x01, 0b00010000).get_from_buffer(buffer) palette_offset = mrc.Bits(0x01, 0b00001100).get_from_buffer(buffer) flip_horiz = mrc.Bits(0x01, 0b00000010).get_from_buffer(buffer) flip_vert = mrc.Bits(0x01, 0b00000001).get_from_buffer(buffer) incremental_copy = mrc.UInt16_BE(0x02).get_from_buffer(buffer) literal_copy = mrc.UInt16_BE(0x04).get_from_buffer(buffer) bs = bits.BitStream(buffer[0x06:], 0, bit_endian='big') output = bytearray()
def test_bits_seek(self): target = bytes([0b10010010, 0b01001010, 0b10101010, 0b10111111]) bs = bits.BitStream(target) bs.seek((3, 4)) self.assertEqual(bs.tell(), (3, 4)) bs.seek((1, 2)) self.assertEqual(bs.tell(), (1, 2)) bs.seek((1, 2), origin='current') self.assertEqual(bs.tell(), (2, 4)) bs.seek((-1, -4), origin='current') self.assertEqual(bs.tell(), (1, 0)) bs.seek((-1, -2), origin='end') self.assertEqual(bs.tell(), (2, 6))
def import_data(self, buffer, parent=None): src = bytearray(buffer) dest = bytearray() lookup_pointer = 0 bit_size = 9 bs = bits.BitStream(src, start_offset=2, bit_endian='big', io_endian='big') eof = False loop = 0 while True: if loop % 2 == 0: src[lookup_pointer:lookup_pointer + 2] = utils.to_uint16_le( len(dest)) lookup_pointer += 2 while True: test = 0 try: test = bs.read(bit_size) except IndexError: eof = True break if test != 0x100: break bit_size += 1 if eof: break if test <= 0xff: dest.append(test & 0xff) else: index = (test - 0x101) << 1 if index > len(src): print('Out of bounds! 0x{:04x}'.format(index)) break start = utils.from_uint16_le(src[index:index + 2]) end = utils.from_uint16_le(src[index + 2:index + 4]) dest.extend(dest[start:end]) loop += 1 return mrc.TransformResult(payload=bytes(dest), end_offset=len(buffer))
def import_data(self, buffer, parent=None): assert utils.is_bytes(buffer) pattern_count = mrc.UInt16_BE(0x0000).get_from_buffer(buffer) xor_mode = (pattern_count & 0x8000) != 0 pattern_count &= 0x7fff index = 2 lut = {} prev_pal_index = 0 while index < len(buffer): test = buffer[index] if test == 0xff: break elif test & 0x80: code_raw = buffer[index + 2] bit_count = buffer[index + 1] & 0x0f code = ''.join([ '1' if (code_raw & (1 << i)) else '0' for i in range(bit_count - 1, -1, -1) ]) lut[code] = { 'pal_index': buffer[index] & 0x0f, 'copy_count': ((buffer[index + 1] & 0xf0) >> 4) + 1, } prev_pal_index = lut[code]['pal_index'] index += 3 else: code_raw = buffer[index + 1] bit_count = buffer[index] & 0x0f code = ''.join([ '1' if (code_raw & (1 << i)) else '0' for i in range(bit_count - 1, -1, -1) ]) lut[code] = { 'pal_index': prev_pal_index, 'copy_count': ((buffer[index] & 0xf0) >> 4) + 1, } index += 2 bs = bits.BitStream(buffer[index + 1:], 0, bit_endian='big') state = { 'output': bytearray(64 * pattern_count), 'output_index': 0, 'current_row': [], 'prev_row': bytearray(8) } def push_pal(pal, state): state['current_row'].append(pal) if len(state['current_row']) == 8: output_index = state['output_index'] for i in range(8): state['output'][output_index + i] = state['current_row'][i] if xor_mode: for i in range(8): state['output'][output_index + i] ^= state['prev_row'][i] prev_row = state['output'][output_index:output_index + 8] state['output_index'] += 8 state['current_row'].clear() return max_key_size = max([len(x) for x in lut.keys()]) while state['output_index'] < 64 * pattern_count: test = '' for i in range(max_key_size): test += '1' if bs.read(1) else '0' if test in lut or test == '111111': break if test in lut: for i in range(lut[test]['copy_count']): push_pal(lut[test]['pal_index'], state) elif test == '111111': copy_count = bs.read(3) pal_index = bs.read(4) for i in range(copy_count): push_pal(pal_index, state) else: raise Exception('Invalid code found in data stream, aborting') return bytes(state['output'])
def import_data(self, buffer, parent=None): assert utils.is_bytes(buffer) pointer = 0 total_num_bytes = len(buffer) bit_count = utils.from_uint8(buffer[pointer:pointer + 1]) checksum = utils.from_uint8(buffer[pointer + 1:pointer + 2]) decompressed_size = utils.from_uint32_be(buffer[pointer + 2:pointer + 6]) compressed_size = utils.from_uint32_be(buffer[pointer + 6:pointer + 10]) pointer += 10 total_num_bytes -= 10 compressed_size -= 10 compressed_data = bytearray(buffer[pointer:pointer + compressed_size]) if checksum != self._xor_checksum(compressed_data): logger.warning('{}: Checksum doesn\'t match header'.format(self)) pointer += compressed_size total_num_bytes -= compressed_size # first byte of compressed data is shifted wrongly, fix compressed_data[-1] = (compressed_data[-1] << (8 - bit_count)) & 0xff bs = bits.BitStream(compressed_data, start_offset=(compressed_size - 1, bit_count - 1), bytes_reverse=True, bit_endian='little', io_endian='big') def copy_prev_data(blocklen, offset_size, state): offset = bs.read(offset_size) for i in range(blocklen): state['dptr'] -= 1 state['ddata'][state['dptr']] = state['ddata'][state['dptr'] + offset + 1] return def dump_data(num_bytes, state): for i in range(num_bytes): state['dptr'] -= 1 state['ddata'][state['dptr']] = bs.read(8) return state = { 'dptr': decompressed_size, 'ddata': bytearray(decompressed_size), } while True: if bs.read(1) == 1: test = bs.read(2) if test == 0: copy_prev_data(3, 9, state) elif test == 1: copy_prev_data(4, 10, state) elif test == 2: copy_prev_data(bs.read(8) + 1, 12, state) elif test == 3: dump_data(bs.read(8) + 9, state) else: test = bs.read(1) if test == 0: dump_data(bs.read(3) + 1, state) elif test == 1: copy_prev_data(2, 8, state) if not (state['dptr'] > 0): break return mrc.TransformResult(payload=bytes(state['ddata']), end_offset=pointer)
def export_data(self, buffer, parent=None): assert utils.is_bytes(buffer) decompressed_size = len(buffer) bs = bits.BitStream(bit_endian='big', io_endian='little') pointer = 0 def encode_raw_data(length, bs): assert length <= 255 + 9 if length > 8: bs.write(length - 9, 8) bs.write(0x7, 3) elif length > 0: bs.write(length - 1, 3) bs.write(0x0, 2) def find_reference(): # main form of compression is of the form: # - while decompressing from end to start # - look forward [up to max_offset] bytes in the decompressed data # - copy [up to max_length] bytes to the current decompression position # the largest offset supported by the file format is 4096, but this means # every call to find_reference loops 4096 times. # this takes foreeeever in Python! # because the compression is worthless and time is money, max_offset has # been slashed to 16 to speed up proceedings. #max_offset = (1 << 12) + 1 max_offset = (1 << 4) + 1 # largest length supported by the file format is 256 max_length = (1 << 8) + 1 length = 4 # throw away short references offset = 0 short_offset = [0, 0, 0] for i in range(pointer + 1, pointer + max_offset): temp_len = 0 while (temp_len < max_length) and (i + temp_len < decompressed_size): # record short references if (temp_len >= 2) and (temp_len <= 4): if short_offset[temp_len - 2] == 0: short_offset[temp_len - 2] = i - pointer if buffer[pointer + temp_len] != buffer[i + temp_len]: break temp_len += 1 if temp_len == max_length: temp_len -= 1 # largest reference so far? use it if temp_len > length: length = temp_len offset = i - pointer assert length < max_length assert offset < max_offset # no long references? try short if (offset == 0): for i in (2, 1, 0): max_short_offset = (1 << (i + 8)) + 1 if (short_offset[i] > 0) and (short_offset[i] < max_short_offset): length = i + 2 offset = short_offset[i] break return length, offset raw = 0 while pointer < decompressed_size: length, ref = find_reference() if ref > 0: if raw > 0: encode_raw_data(raw, bs) raw = 0 if length > 4: bs.write(ref - 1, 12) bs.write(length - 1, 8) bs.write(0x6, 3) elif length == 4: bs.write(ref - 1, 10) bs.write(0x5, 3) elif length == 3: bs.write(ref - 1, 9) bs.write(0x4, 3) elif length == 2: bs.write(ref - 1, 8) bs.write(0x1, 2) pointer += length else: bs.write(buffer[pointer], 8) raw += 1 if raw == 264: encode_raw_data(raw, bs) raw = 0 pointer += 1 encode_raw_data(raw, bs) compressed_data = bytearray(bs.get_buffer()) compressed_data[-1] = compressed_data[-1] >> (8 - bs.tell()[1]) compressed_size = len(compressed_data) + 10 checksum = self._xor_checksum(compressed_data) output = bytearray(6) output[0:1] = utils.to_uint8(bs.tell()[1]) output[1:2] = utils.to_uint8(checksum) output[2:6] = utils.to_uint32_be(decompressed_size) output[6:10] = utils.to_uint32_be(compressed_size) output.extend(compressed_data) return mrc.TransformResult(payload=bytes(output))