def decode_chunk_into(chunk, buf, block_size): num_channels = chunk.shape[0] # Grid size (number of blocks in the chunk) gx = ceil_div(chunk.shape[3], block_size[0]) gy = ceil_div(chunk.shape[2], block_size[1]) gz = ceil_div(chunk.shape[1], block_size[2]) if len(buf) < num_channels * (4 + 8 * gx * gy * gz): raise InvalidFormatError("compressed_segmentation file too short") channel_offsets = [ 4 * ret[0] for ret in struct.iter_unpack("<I", buf[:4*num_channels]) ] for channel, (offset, next_offset) in enumerate( itertools.zip_longest(channel_offsets, channel_offsets[1:])): # next_offset will be None for the last channel if offset + 8 * gx * gy * gz > len(buf): raise InvalidFormatError("compressed_segmentation channel offset " "is too large (truncated file?)") _decode_channel_into( chunk, channel, buf[offset:next_offset], block_size ) return chunk
def _decode_channel_into(chunk, channel, buf, block_size): # Grid size (number of blocks in the chunk) gx = ceil_div(chunk.shape[3], block_size[0]) gy = ceil_div(chunk.shape[2], block_size[1]) gz = ceil_div(chunk.shape[1], block_size[2]) block_num_elem = block_size[0] * block_size[1] * block_size[2] for z, y, x in np.ndindex((gz, gy, gx)): # Read the block header res = struct.unpack_from("<II", buf, 8 * (x + gx * (y + gy * z))) lookup_table_offset = 4 * (res[0] & 0x00FFFFFF) bits = res[0] >> 24 if bits not in (0, 1, 2, 4, 8, 16, 32): raise InvalidFormatError( "Invalid number of encoding bits for " "compressed_segmentation block ({0})".format(bits)) encoded_values_offset = 4 * res[1] lookup_table_past_end = lookup_table_offset + chunk.itemsize * min( (2**bits), ((len(buf) - lookup_table_offset) // chunk.itemsize)) lookup_table = np.frombuffer( buf[lookup_table_offset:lookup_table_past_end], dtype=chunk.dtype) if bits == 0: block = np.empty(block_size, dtype=chunk.dtype) try: block[...] = lookup_table[0] except IndexError as exc: raise InvalidFormatError( "Invalid compressed_segmentation data: indexing out of " "the lookup table") from exc else: values_per_32bit = 32 // bits encoded_values_end = encoded_values_offset + 4 * (ceil_div( block_num_elem, values_per_32bit)) if encoded_values_end > len(buf): raise InvalidFormatError( "Invalid compressed_segmentation data: file too short, " "insufficient room for encoded values") packed_values = np.frombuffer( buf[encoded_values_offset:encoded_values_end], dtype="<I") encoded_values = _unpack_encoded_values(packed_values, bits, block_num_elem) # Apply the lookup table try: decoded_values = lookup_table[encoded_values] except IndexError as exc: raise InvalidFormatError( "Invalid compressed_segmentation data: indexing out of " "the lookup table") from exc block = decoded_values.reshape( (block_size[2], block_size[1], block_size[0])) # Remove padding zmax = min(block_size[2], chunk.shape[1] - z * block_size[2]) ymax = min(block_size[1], chunk.shape[2] - y * block_size[1]) xmax = min(block_size[0], chunk.shape[3] - x * block_size[0]) chunk[channel, z * block_size[2]:(z + 1) * block_size[2], y * block_size[1]:(y + 1) * block_size[1], x * block_size[0]:(x + 1) * block_size[0]] = block[:zmax, :ymax, :xmax]
def downscale_info(scale_level): factors = [ 2**max(0, scale_level - delay) for delay in axis_level_delays ] scale_info = copy.deepcopy(full_scale_info) scale_info["resolution"] = [ res * axis_factor for res, axis_factor in zip(full_scale_info["resolution"], factors) ] scale_info["size"] = [ ceil_div(sz, axis_factor) for sz, axis_factor in zip(full_scale_info["size"], factors) ] # Key is the resolution in micrometres scale_info["key"] = format_length(min(scale_info["resolution"]), key_unit) max_delay = max(axis_level_delays) anisotropy_factors = [ max(0, max_delay - delay - scale_level) for delay in axis_level_delays ] sum_anisotropy_factors = sum(anisotropy_factors) # Ensure that the smallest chunk size is 1 for extremely anisotropic # datasets (i.e. reduce the anisotropy of chunk_size) excess_anisotropy = sum_anisotropy_factors - 3 * target_chunk_exponent if excess_anisotropy > 0: anisotropy_reduction = ceil_div( excess_anisotropy, sum(1 for f in anisotropy_factors if f != 0)) anisotropy_factors = [ max(f - anisotropy_reduction, 0) for f in anisotropy_factors ] sum_anisotropy_factors = sum(anisotropy_factors) assert sum_anisotropy_factors <= 3 * target_chunk_exponent base_chunk_exponent = (target_chunk_exponent - (sum_anisotropy_factors + 1) // 3) assert base_chunk_exponent >= 0 scale_info["chunk_sizes"] = [[ 2**(base_chunk_exponent + anisotropy_factor) for anisotropy_factor in anisotropy_factors ]] assert (abs( sum( int(round(math.log2(size))) for size in scale_info["chunk_sizes"][0]) - 3 * target_chunk_exponent) <= 1) return scale_info
def _encode_channel(chunk_channel, block_size): block_size = tuple(block_size) # Grid size (number of blocks in the chunk) gx = ceil_div(chunk_channel.shape[2], block_size[0]) gy = ceil_div(chunk_channel.shape[1], block_size[1]) gz = ceil_div(chunk_channel.shape[0], block_size[2]) stored_lut_offsets = {} buf = bytearray(gx * gy * gz * 8) for z, y, x in np.ndindex((gz, gy, gx)): block = chunk_channel[ z*block_size[2] : (z+1)*block_size[2], y*block_size[1] : (y+1)*block_size[1], x*block_size[0] : (x+1)*block_size[0] ] if block.shape != block_size: block = pad_block(block, block_size) # TODO optimization: to improve additional compression (gzip), sort the # list of unique symbols by decreasing frequency using # return_counts=True so that low-value symbols are used more often. # Alternatively, sort by label value to improve sharing of lookup # tables… (lookup_table, encoded_values) = np.unique( block, return_inverse=True, return_counts=False) bits = number_of_encoding_bits(len(lookup_table)) # Write look-up table to the buffer (or re-use another one) lut_bytes = lookup_table.astype(block.dtype).tobytes() if lut_bytes in stored_lut_offsets: lookup_table_offset = stored_lut_offsets[lut_bytes] else: assert len(buf) % 4 == 0 lookup_table_offset = len(buf) // 4 buf += lut_bytes stored_lut_offsets[lut_bytes] = lookup_table_offset assert len(buf) % 4 == 0 encoded_values_offset = len(buf) // 4 buf += _pack_encoded_values(encoded_values, bits) assert lookup_table_offset == (lookup_table_offset & 0xFFFFFF) struct.pack_into("<II", buf, 8 * (x + gx * (y + gy * z)), lookup_table_offset | (bits << 24), encoded_values_offset) return buf
def _unpack_encoded_values(packed_values, bits, num_values): assert bits > 0 bitmask = (1 << bits) - 1 values_per_32bit = 32 // bits padded_values = np.empty( values_per_32bit * ceil_div(num_values, values_per_32bit), dtype="<I") for shift in range(values_per_32bit): padded_values[shift::values_per_32bit] = ( (packed_values >> (shift * bits)) & bitmask) return padded_values[:num_values]
def downscale(self, chunk, downscaling_factors): if not self.check_factors(downscaling_factors): raise NotImplementedError new_chunk = np.empty( (chunk.shape[0], ceil_div(chunk.shape[1], downscaling_factors[2]), ceil_div(chunk.shape[2], downscaling_factors[1]), ceil_div(chunk.shape[3], downscaling_factors[0])), dtype=chunk.dtype) for t, z, y, x in np.ndindex(*new_chunk.shape): zd = z * downscaling_factors[2] yd = y * downscaling_factors[1] xd = x * downscaling_factors[0] block = chunk[t, zd:(zd + downscaling_factors[2]), yd:(yd + downscaling_factors[1]), xd:(xd + downscaling_factors[0])] labels, counts = np.unique(block.flat, return_counts=True) new_chunk[t, z, y, x] = labels[np.argmax(counts)] return new_chunk
def load_tilechunk(xmin, xmax, ymin, ymax, zmin, zmax): ret = np.empty([num_channels, zmax - zmin, ymax - ymin, xmax - xmin], dtype=np.uint8) for x_idx in range(xmin // chunk_size[0], ceil_div(xmax, chunk_size[0])): for y_idx in range(ymin // chunk_size[1], ceil_div(ymax, chunk_size[1])): for z_idx in range(zmin // chunk_size[2], ceil_div(zmax, chunk_size[2])): chunk_xmin = chunk_size[0] * x_idx chunk_xmax = min(chunk_size[0] * (x_idx + 1), size[0]) chunk_ymin = chunk_size[1] * y_idx chunk_ymax = min(chunk_size[1] * (y_idx + 1), size[1]) chunk_zmin = chunk_size[2] * z_idx chunk_zmax = min(chunk_size[2] * (z_idx + 1), size[2]) chunk = pyramid_io.read_chunk( key, (chunk_xmin, chunk_xmax, chunk_ymin, chunk_ymax, chunk_zmin, chunk_zmax)) chunk = scale_chunk_to_uint8(chunk) ret[:, chunk_zmin - zmin:chunk_zmax - zmin, chunk_ymin - ymin:chunk_ymax - ymin, chunk_xmin - xmin:chunk_xmax - xmin] = chunk return ret
def _pack_encoded_values(encoded_values, bits): # TODO optimize with np.packbits for bits == 1 if bits == 0: return bytes() else: values_per_32bit = 32 // bits assert np.all(encoded_values == encoded_values & ((1 << bits) - 1)) padded_values = np.empty( values_per_32bit * ceil_div(len(encoded_values), values_per_32bit), dtype="<I") padded_values[:len(encoded_values)] = encoded_values padded_values[len(encoded_values):] = 0 packed_values = functools.reduce( np.bitwise_or, (padded_values[shift::values_per_32bit] << (shift * bits) for shift in range(values_per_32bit))) return packed_values.tobytes()
def convert_scale(pyramid_io, level, zoomer_accessor): # Key is the resolution in micrometres key = pyramid_io.info["scales"][level]["key"] scale_info = pyramid_io.scale_info(key) chunk_size = scale_info["chunk_sizes"][0] key = scale_info["key"] size = scale_info["size"] dtype = np.dtype(pyramid_io.info["data_type"]).newbyteorder("<") num_channels = pyramid_io.info["num_channels"] assert num_channels == 1 scale_chunk_to_uint8 = get_chunk_scaler_to_uint8(dtype) def load_tilechunk(xmin, xmax, ymin, ymax, zmin, zmax): ret = np.empty([num_channels, zmax - zmin, ymax - ymin, xmax - xmin], dtype=np.uint8) for x_idx in range(xmin // chunk_size[0], ceil_div(xmax, chunk_size[0])): for y_idx in range(ymin // chunk_size[1], ceil_div(ymax, chunk_size[1])): for z_idx in range(zmin // chunk_size[2], ceil_div(zmax, chunk_size[2])): chunk_xmin = chunk_size[0] * x_idx chunk_xmax = min(chunk_size[0] * (x_idx + 1), size[0]) chunk_ymin = chunk_size[1] * y_idx chunk_ymax = min(chunk_size[1] * (y_idx + 1), size[1]) chunk_zmin = chunk_size[2] * z_idx chunk_zmax = min(chunk_size[2] * (z_idx + 1), size[2]) chunk = pyramid_io.read_chunk( key, (chunk_xmin, chunk_xmax, chunk_ymin, chunk_ymax, chunk_zmin, chunk_zmax)) chunk = scale_chunk_to_uint8(chunk) ret[:, chunk_zmin - zmin:chunk_zmax - zmin, chunk_ymin - ymin:chunk_ymax - ymin, chunk_xmin - xmin:chunk_xmax - xmin] = chunk return ret def write_x_tiles(tilechunk): for x in range(tilechunk.shape[3]): tile = tilechunk[0, :, :, x].T tile_path = TILE_PATTERN.format("y", y_idx, "z", z_idx, level=level, slice_axis="x", slice_number=x_idx * TILE_SIZE + x) write_tile(tile, tile_path) def write_y_tiles(tilechunk): for y in range(tilechunk.shape[2]): tile = tilechunk[0, :, y, :] tile_path = TILE_PATTERN.format("z", z_idx, "x", x_idx, level=level, slice_axis="y", slice_number=y_idx * TILE_SIZE + y) write_tile(tile, tile_path) def write_z_tiles(tilechunk): for z in range(tilechunk.shape[1]): tile = tilechunk[0, z, :, :] tile_path = TILE_PATTERN.format("y", y_idx, "x", x_idx, level=level, slice_axis="z", slice_number=z_idx * TILE_SIZE + z) write_tile(tile, tile_path) def write_tile(tile, tile_path): img = PIL.Image.fromarray(tile) io_buf = io.BytesIO() img.save(io_buf, format="png") zoomer_accessor.store_file(tile_path, io_buf.getvalue(), mime_type="image/png") progress_bar = tqdm( total=(ceil_div(size[0], TILE_SIZE) * ceil_div(size[1], TILE_SIZE) * ceil_div(size[2], TILE_SIZE)), desc="converting scale {}".format(key), unit="tilechunk", leave=True) for x_idx, y_idx, z_idx in np.ndindex( (ceil_div(size[0], TILE_SIZE), ceil_div(size[1], TILE_SIZE), ceil_div(size[2], TILE_SIZE))): xmin = TILE_SIZE * x_idx xmax = min(TILE_SIZE * (x_idx + 1), size[0]) ymin = TILE_SIZE * y_idx ymax = min(TILE_SIZE * (y_idx + 1), size[1]) zmin = TILE_SIZE * z_idx zmax = min(TILE_SIZE * (z_idx + 1), size[2]) tilechunk = load_tilechunk(xmin, xmax, ymin, ymax, zmin, zmax) write_x_tiles(tilechunk) write_y_tiles(tilechunk) write_z_tiles(tilechunk) progress_bar.update()
def compute_dyadic_downscaling(info, source_scale_index, downscaler, chunk_reader, chunk_writer): # Key is the resolution in micrometres old_scale_info = info["scales"][source_scale_index] new_scale_info = info["scales"][source_scale_index + 1] old_chunk_size = old_scale_info["chunk_sizes"][0] new_chunk_size = new_scale_info["chunk_sizes"][0] old_key = old_scale_info["key"] new_key = new_scale_info["key"] old_size = old_scale_info["size"] new_size = new_scale_info["size"] dtype = np.dtype(info["data_type"]).newbyteorder("<") num_channels = info["num_channels"] downscaling_factors = [ 1 if os == ns else 2 for os, ns in zip(old_size, new_size) ] if new_size != [ ceil_div(os, ds) for os, ds in zip(old_size, downscaling_factors) ]: raise ValueError("Unsupported downscaling factor between scales " "{} and {} (only 1 and 2 are supported)".format( old_key, new_key)) downscaler.check_factors(downscaling_factors) half_chunk = [ osz // f for osz, f in zip(old_chunk_size, downscaling_factors) ] chunk_fetch_factor = [ nsz // hc for nsz, hc in zip(new_chunk_size, half_chunk) ] def load_and_downscale_old_chunk(z_idx, y_idx, x_idx): xmin = old_chunk_size[0] * x_idx xmax = min(old_chunk_size[0] * (x_idx + 1), old_size[0]) ymin = old_chunk_size[1] * y_idx ymax = min(old_chunk_size[1] * (y_idx + 1), old_size[1]) zmin = old_chunk_size[2] * z_idx zmax = min(old_chunk_size[2] * (z_idx + 1), old_size[2]) old_chunk_coords = (xmin, xmax, ymin, ymax, zmin, zmax) chunk = chunk_reader.read_chunk(old_key, old_chunk_coords) return downscaler.downscale(chunk, downscaling_factors) chunk_range = (ceil_div(new_size[0], new_chunk_size[0]), ceil_div(new_size[1], new_chunk_size[1]), ceil_div(new_size[2], new_chunk_size[2])) # TODO how to do progress report correctly with logging? for x_idx, y_idx, z_idx in tqdm(np.ndindex(chunk_range), total=np.prod(chunk_range), desc="computing scale {}".format(new_key), unit="chunks", leave=True): xmin = new_chunk_size[0] * x_idx xmax = min(new_chunk_size[0] * (x_idx + 1), new_size[0]) ymin = new_chunk_size[1] * y_idx ymax = min(new_chunk_size[1] * (y_idx + 1), new_size[1]) zmin = new_chunk_size[2] * z_idx zmax = min(new_chunk_size[2] * (z_idx + 1), new_size[2]) new_chunk_coords = (xmin, xmax, ymin, ymax, zmin, zmax) new_chunk = np.empty( [num_channels, zmax - zmin, ymax - ymin, xmax - xmin], dtype=dtype) new_chunk[:, :half_chunk[2], :half_chunk[1], :half_chunk[0]] = ( load_and_downscale_old_chunk(z_idx * chunk_fetch_factor[2], y_idx * chunk_fetch_factor[1], x_idx * chunk_fetch_factor[0])) if new_chunk.shape[1] > half_chunk[2]: new_chunk[:, half_chunk[2]:, :half_chunk[1], :half_chunk[0]] = ( load_and_downscale_old_chunk(z_idx * chunk_fetch_factor[2] + 1, y_idx * chunk_fetch_factor[1], x_idx * chunk_fetch_factor[0])) if new_chunk.shape[2] > half_chunk[1]: new_chunk[:, :half_chunk[2], half_chunk[1]:, :half_chunk[0]] = ( load_and_downscale_old_chunk(z_idx * chunk_fetch_factor[2], y_idx * chunk_fetch_factor[1] + 1, x_idx * chunk_fetch_factor[0])) if (new_chunk.shape[1] > half_chunk[2] and new_chunk.shape[2] > half_chunk[1]): new_chunk[:, half_chunk[2]:, half_chunk[1]:, :half_chunk[0]] = ( load_and_downscale_old_chunk(z_idx * chunk_fetch_factor[2] + 1, y_idx * chunk_fetch_factor[1] + 1, x_idx * chunk_fetch_factor[0])) if new_chunk.shape[3] > half_chunk[0]: new_chunk[:, :half_chunk[2], :half_chunk[1], half_chunk[0]:] = (load_and_downscale_old_chunk( z_idx * chunk_fetch_factor[2], y_idx * chunk_fetch_factor[1], x_idx * chunk_fetch_factor[0] + 1)) if (new_chunk.shape[1] > half_chunk[2] and new_chunk.shape[3] > half_chunk[0]): new_chunk[:, half_chunk[2]:, :half_chunk[1], half_chunk[0]:] = (load_and_downscale_old_chunk( z_idx * chunk_fetch_factor[2] + 1, y_idx * chunk_fetch_factor[1], x_idx * chunk_fetch_factor[0] + 1)) if (new_chunk.shape[2] > half_chunk[1] and new_chunk.shape[3] > half_chunk[0]): new_chunk[:, :half_chunk[2], half_chunk[1]:, half_chunk[0]:] = (load_and_downscale_old_chunk( z_idx * chunk_fetch_factor[2], y_idx * chunk_fetch_factor[1] + 1, x_idx * chunk_fetch_factor[0] + 1)) if (new_chunk.shape[1] > half_chunk[2] and new_chunk.shape[2] > half_chunk[1] and new_chunk.shape[3] > half_chunk[0]): new_chunk[:, half_chunk[2]:, half_chunk[1]:, half_chunk[0]:] = (load_and_downscale_old_chunk( z_idx * chunk_fetch_factor[2] + 1, y_idx * chunk_fetch_factor[1] + 1, x_idx * chunk_fetch_factor[0] + 1)) chunk_writer.write_chunk(new_chunk.astype(dtype), new_key, new_chunk_coords)