def compute_scales(vol, mip, shape, axis, factor, chunk_size=None): shape = min2(vol.meta.volume_size(mip), shape) # sometimes we downsample a base layer of 512x512 # into underlying chunks of 64x64 which permits more scales underlying_mip = (mip + 1) if (mip + 1) in vol.available_mips else mip if chunk_size: scale_chunk_size = Vec(*chunk_size).astype(np.float32) else: scale_chunk_size = vol.meta.chunk_size(underlying_mip).astype(np.float32) if factor is None: factor = axis_to_factor(axis) factors = compute_factors(shape, factor, scale_chunk_size, vol.meta.volume_size(mip)) scales = [ vol.meta.resolution(mip) ] precision = max(map(getprecision, vol.meta.resolution(mip))) def prec(x): if precision == 0: return int(x) return round(x, precision) for factor3 in factors: scales.append( list(map(prec, Vec(*scales[-1], dtype=np.float32) * Vec(*factor3))) ) return scales[1:]
def test_sharded_image_bits(scale): dataset_size = Vec(*scale["size"]) chunk_size = Vec(*scale["chunk_sizes"][0]) spec = create_sharded_image_info(dataset_size=dataset_size, chunk_size=chunk_size, encoding=scale["encoding"], dtype=np.uint8) shape = image_shard_shape_from_spec(spec, dataset_size, chunk_size) shape = lib.min2(shape, dataset_size) dataset_bbox = Bbox.from_vec(dataset_size) gpts = list(gridpoints(dataset_bbox, dataset_bbox, chunk_size)) grid_size = np.ceil(dataset_size / chunk_size).astype(np.int64) spec = ShardingSpecification.from_dict(spec) reader = ShardReader(None, None, spec) morton_codes = compressed_morton_code(gpts, grid_size) min_num_shards = prod(dataset_size / shape) max_num_shards = prod(np.ceil(dataset_size / shape)) assert 0 < min_num_shards <= 2**spec.shard_bits assert 0 < max_num_shards <= 2**spec.shard_bits real_num_shards = len(set(map(reader.get_filename, morton_codes))) assert min_num_shards <= real_num_shards <= max_num_shards
def test_broken_dataset(): """ This dataset was previously returning 19 total bits when 20 were needed to cover all the morton codes. """ scale = { 'chunk_sizes': [[128, 128, 20]], 'encoding': 'raw', 'key': '16_16_40', 'resolution': [16, 16, 40], 'size': [10240,10240,990], 'voxel_offset': [17408,9216,4855], } dataset_size = Vec(*scale["size"]) chunk_size = Vec(*scale["chunk_sizes"][0]) spec = create_sharded_image_info( dataset_size=dataset_size, chunk_size=chunk_size, encoding="jpeg", dtype=np.uint8 ) total_bits = spec["shard_bits"] + spec["minishard_bits"] + spec["preshift_bits"] assert total_bits == 20
def SpatialIndexTask( cloudpath: str, shape: Tuple[int, int, int], offset: Tuple[int, int, int], subdir: str, precision: int, mip: int = 0, fill_missing: bool = False, compress: Optional[Union[str, bool]] = 'gzip', ) -> None: """ The main way to add a spatial index is to use the MeshTask or SkeletonTask, but old datasets or broken datasets may need it to be reconstituted. An alternative use is create the spatial index over a different area size than the mesh or skeleton task. """ cv = CloudVolume(cloudpath, mip=mip, bounded=False, fill_missing=fill_missing) cf = CloudFiles(cloudpath) bounds = Bbox(Vec(*offset), Vec(*shape) + Vec(*offset)) bounds = Bbox.clamp(bounds, cv.bounds) data_bounds = bounds.clone() data_bounds.maxpt += 1 # match typical Marching Cubes overlap resolution = cv.resolution # remap: old img -> img img, remap = cv.download(data_bounds, renumber=True) img = img[..., 0] slcs = find_objects(img) del img reverse_map = {v: k for k, v in remap.items()} # img -> old img bboxes = {} for label, slc in enumerate(slcs): if slc is None: continue obj_bounds = Bbox.from_slices(slc) obj_bounds += Vec(*offset) obj_bounds *= Vec(*resolution, dtype=np.float32) bboxes[str(reverse_map[label+1])] = \ obj_bounds.astype(resolution.dtype).to_list() bounds = bounds.astype(resolution.dtype) * resolution cf.put_json( cf.join(subdir, f"{bounds.to_filename(precision)}.spatial"), bboxes, compress=compress, cache_control=False, )
def add_scale( layer_path, mip, preserve_chunk_size=True, chunk_size=None, encoding=None, factor=None ): vol = CloudVolume(layer_path, mip=mip) if factor is None: factor = (2,2,1) new_resolution = vol.resolution * Vec(*factor) vol.meta.add_resolution( new_resolution, encoding=encoding, chunk_size=chunk_size ) if chunk_size is None: if preserve_chunk_size: chunk_size = vol.scales[mip]['chunk_sizes'] else: chunk_size = vol.scales[mip + 1]['chunk_sizes'] else: chunk_size = [ chunk_size ] if encoding is None: encoding = vol.scales[mip]['encoding'] vol.scales[mip + 1]['chunk_sizes'] = chunk_size return vol
def image_shard_shape_from_spec( spec: ShardingSpecification, dataset_size: ShapeType, chunk_size: ShapeType ) -> ShapeType: chunk_size = Vec(*chunk_size, dtype=np.uint64) dataset_size = Vec(*dataset_size, dtype=np.uint64) preshift_bits = np.uint64(spec["preshift_bits"]) minishard_bits = np.uint64(spec["minishard_bits"]) shape_bits = preshift_bits + minishard_bits grid_size = np.ceil(dataset_size / chunk_size).astype(np.uint64) one = np.uint64(1) if shape_bits >= 64: raise ValueError( f"preshift_bits ({preshift_bits}) + minishard_bits ({minishard_bits}) must be < 64. Sum: {shape_bits}" ) def compute_shape_bits(): shape = Vec(0,0,0, dtype=np.uint64) i = 0 over = [ False, False, False ] while i < shape_bits: changed = False for dim in range(3): if 2 ** (shape[dim] + 1) < grid_size[dim] * 2 and not over[dim]: if 2 ** (shape[dim] + 1) >= grid_size[dim]: over[dim] = True shape[dim] += one i += 1 changed = True if i >= shape_bits: return shape if not changed: return shape return shape shape = compute_shape_bits() shape = Vec(2 ** shape.x, 2 ** shape.y, 2 ** shape.z, dtype=np.uint64) return chunk_size * shape
def test_image_sharding_hash(): spec = ShardingSpecification( type="neuroglancer_uint64_sharded_v1", data_encoding="gzip", hash="identity", minishard_bits=6, minishard_index_encoding="gzip", preshift_bits=9, shard_bits=16, ) point = Vec(144689, 52487, 2829) volume_size = Vec(*[248832, 134144, 7063]) chunk_size = Vec(*[128, 128, 16]) grid_size = np.ceil(volume_size / chunk_size).astype(np.uint32) gridpt = np.ceil(point / chunk_size).astype(np.int32) code = compressed_morton_code(gridpt, grid_size) loc = spec.compute_shard_location(code) assert loc.shard_number == '458d'
def compute_factors(ds_shape, factor, chunk_size): grid_size = Vec(*ds_shape, dtype=np.float32) / Vec(*chunk_size, dtype=np.float32) # find the dimension which will tolerate the smallest number of downsamples and # return the number it will accept. + 0.0001 then truncate to compensate for FP errors # that would result in the log e.g. resulting in 1.9999999382 when it should be an # exact result. # This filtering is to avoid problems with dividing by log(1) grid_size = [g for f, g in zip(factor, grid_size) if f != 1] grid_size = Vec(*grid_size, dtype=np.float32) factor_div = [f for f in factor if f != 1] factor_div = Vec(*factor_div, dtype=np.float32) if len(factor_div) == 0: return [] N = np.log(grid_size) / np.log(factor_div) N += 0.0001 return [factor] * int(min(N))
def compute_factors(ds_shape, factor, chunk_size, volume_size): chunk_size = np.array(chunk_size) grid_size = Vec(*ds_shape, dtype=np.float32) / Vec(*chunk_size, dtype=np.float32) # find the dimension which will tolerate the smallest number of downsamples and # return the number it will accept. + 0.0001 then truncate to compensate for FP errors # that would result in the log e.g. resulting in 1.9999999382 when it should be an # exact result. # This filtering is to avoid problems with dividing by log(1) grid_size = [ g for f, g in zip(factor, grid_size) if f != 1 ] grid_size = Vec(*grid_size, dtype=np.float32) factor_div = [ f for f in factor if f != 1 ] factor_div = Vec(*factor_div, dtype=np.float32) if len(factor_div) == 0: return [] epsilon = 0.0001 N = np.log(grid_size) / np.log(factor_div) N += epsilon N = min(N) if N < epsilon: return [] dsvol = np.array(volume_size) / (np.array(factor) ** int(np.ceil(N))) dsvol = np.array([ dsvol[i] for i,f in enumerate(factor) if f != 1 ]) chunk_size = np.array([ chunk_size[i] for i,f in enumerate(factor) if f != 1 ]) N, fract = int(N), (N - float(int(N))) # incomplete downsamples are only legal when the # volume size is smaller than the chunk size. if all(dsvol < chunk_size) and fract > 0.05: N += 1 return [ factor ] * N
def compute_shape_bits(): shape = Vec(0,0,0, dtype=np.uint64) i = 0 over = [ False, False, False ] while i < shape_bits: changed = False for dim in range(3): if 2 ** (shape[dim] + 1) < grid_size[dim] * 2 and not over[dim]: if 2 ** (shape[dim] + 1) >= grid_size[dim]: over[dim] = True shape[dim] += one i += 1 changed = True if i >= shape_bits: return shape if not changed: return shape return shape
def downsample_shape_from_memory_target( data_width, cx, cy, cz, factor, byte_target, max_mips=float('inf') ): """ Compute the shape that will give the most downsamples for a given memory target (e.g. 3e9 bytes aka 3 GB). data_width: byte size of dtype cx: chunk size x cy: chunk size y cz: chunk size z factor: (2,2,1) or (2,2,2) are supported byte_target: memory used should be less than this Returns: Vec3 shape """ # formulas come from solving the following optimization equations: # # factor (1,1,1) # find integers n and m such that # |n * cx - m * cy| is (approximately) minimized # treat cz as fixed to make thing easier. # We start with a guess that n = sqrt(byte_target / data_width / cx / cy / cz) # # factor (2,2,1) # 4/3 * data_width * cx^(2^n) * cy^(2^n) * cz < byte_target # # factor (2,2,2) # 8/7 * data_width * cx^(2^n) * cy^(2^n) * cz^(2^n) < byte_target # # it's possible to solve for an arbitrary factor, but more complicated # and we really only need those two as the blowup gets intense. if byte_target <= 0: raise ValueError(f"Unable to pick a shape for a byte budget <= 0. Got: {byte_target}") if cx * cy * cz <= 0: raise ValueError(f"Chunk size must have a positive integer volume. Got: <{cx},{cy},{cz}>") def n_shape(n, c_): num_downsamples = int(math.log2((c_ ** (2*n)) / c_)) num_downsamples = int(min(num_downsamples, max_mips)) return c_ * (2 ** num_downsamples) if factor == (1,1,1): n = int(math.sqrt(byte_target / data_width / cx / cy / cz)) m = int(n * cx / cy) out = Vec(n * cx, m * cy, cz) elif factor == (2,2,1): if cx * cy == 1: size = 2 ** int(math.log2(math.sqrt(byte_target / cz))) out = Vec(size, size, cz) else: n = math.log(3/4 * byte_target / data_width / cz) n = n / 2 / math.log(cx * cy) shape = lambda c_: n_shape(n, c_) out = Vec(shape(cx), shape(cy), cz) elif factor == (2,2,2): if cx * cy * cz == 1: size = 2 ** int(math.log2(round(byte_target ** (1/3), 5))) out = Vec(size, size, size) else: n = math.log(7/8 * byte_target / data_width) n = n / 2 / math.log(cx * cy * cz) shape = lambda c_: n_shape(n, c_) out = Vec(shape(cx), shape(cy), shape(cz)) else: raise ValueError(f"This is now a harder optimization problem. Got: {factor}") out = out.astype(int) min_shape = Vec(cx,cy,cz) if any(out < min_shape): raise ValueError( f"Too little memory allocated to create a valid task." f" Got: {byte_target} Predicted Shape: {out} Minimum Shape: {min_shape}" ) return out