def _unzip(code, block_size=8, max_size=None): in_stream = BitInStream(code) out_stream = BitOutStream() count = 0 log2count = 0 dictionary = [np.empty(0, np.ubyte)] while True: try: token = in_stream.next(log2count + block_size) except StopIteration: break cw = int(''.join(map(str, token[:log2count])), 2) if log2count else 0 prefix = dictionary[cw] symbol = token[log2count:] dictionary.append(np.concatenate((prefix, symbol)).view(dtype=np.ubyte)) if not max_size or count < max_size: count += 1 if 1 << log2count <= count: log2count += 1 out_stream.write(dictionary[-1]) logging.info('Unzip dictionary size: %d', count) return out_stream.close().decode('utf-8').rstrip('\0')
def _unzip(code, block_size=8, max_size=None): in_stream = BitInStream(code) out_stream = BitOutStream() count = 0 log2count = 0 dictionary = [np.empty(0, np.ubyte)] while True: try: token = in_stream.next(log2count + block_size) except StopIteration: break cw = int("".join(map(str, token[:log2count])), 2) if log2count else 0 prefix = dictionary[cw] symbol = token[log2count:] dictionary.append(np.concatenate((prefix, symbol)).view(dtype=np.ubyte)) if not max_size or count < max_size: count += 1 if 1 << log2count <= count: log2count += 1 out_stream.write(dictionary[-1]) logging.info("Unzip dictionary size: %d", count) return out_stream.close().decode("utf-8").rstrip("\0")
def _zip(text, block_size=8, max_size=None): in_stream = BitInStream(text.encode('utf-8'), block_size) out_stream = BitOutStream() dictionary = Node(0, {}) current = dictionary count = 0 log2count = 0 for symbol in in_stream: sym = symbol.tostring() if sym in current.children: current = current.children[sym] else: prefix = ('{:0%db}' % log2count).format(current.id)[:log2count] out_stream.write(prefix) out_stream.write(symbol) if not max_size or count < max_size: count += 1 if 1 << log2count <= count: log2count += 1 current.children[sym] = Node(count, {}) current = dictionary if current != dictionary: prefix = ('{:0%db}' % log2count).format(current.id)[:log2count] out_stream.write(prefix) out_stream.write('0' * block_size) logging.info('Zip dictionary size: %d', count) return out_stream.close()
def _zip(text, block_size=8, max_size=None): in_stream = BitInStream(text.encode("utf-8"), block_size) out_stream = BitOutStream() dictionary = Node(0, {}) current = dictionary count = 0 log2count = 0 for symbol in in_stream: sym = symbol.tostring() if sym in current.children: current = current.children[sym] else: prefix = ("{:0%db}" % log2count).format(current.id)[:log2count] out_stream.write(prefix) out_stream.write(symbol) if not max_size or count < max_size: count += 1 if 1 << log2count <= count: log2count += 1 current.children[sym] = Node(count, {}) current = dictionary if current != dictionary: prefix = ("{:0%db}" % log2count).format(current.id)[:log2count] out_stream.write(prefix) out_stream.write("0" * block_size) logging.info("Zip dictionary size: %d", count) return out_stream.close()