def make_info_file(brain, home_dir, volume_size, type_vol="647", commit=True): info = CloudVolume.create_new_info( num_channels=1, layer_type="image", # "image" or "segmentation" data_type= "uint16", # 32 not necessary for Princeton atlas, but was for Allen atlas encoding= "raw", # other options: "jpeg", "compressed_segmentation" (req. uint32 or uint64) resolution=[1630, 1630, 3000 ], # X,Y,Z values in nanometers, 40 microns in each dim. voxel_offset=[0, 0, 0], # values X,Y,Z values in voxels chunk_size=[1024, 1024, 32], # rechunk of image X,Y,Z in voxels, volume_size=volume_size, # X,Y,Z size in voxels ) # If you"re using amazon or the local file system, you can replace "gs" with "s3" or "file" vol = CloudVolume("file://" + home_dir + "/" + brain + "/" + type_vol, info=info) vol.provenance.description = "TP tracing" vol.provenance.owners = ["*****@*****.**" ] # list of contact email addresses if commit: vol.commit_info() # generates gs://bucket/dataset/layer/info json file vol.commit_provenance( ) # generates gs://bucket/dataset/layer/provenance json file print("Created CloudVolume info file: ", vol.info_cloudpath) return vol
def create_watershed_remap_tasks(task_queue, map_path, src_layer_path, dest_layer_path, shape=Vec(2048, 2048, 64)): shape = Vec(*shape) vol = CloudVolume(src_layer_path) create_downsample_scales(dest_layer_path, mip=0, ds_shape=shape) for startpt in tqdm(xyzrange(vol.bounds.minpt, vol.bounds.maxpt, shape), desc="Inserting Remap Tasks"): task = WatershedRemapTask( map_path=map_path, src_path=src_layer_path, dest_path=dest_layer_path, shape=shape.clone(), offset=startpt.clone(), ) task_queue.insert(task) task_queue.wait('Uploading Remap Tasks') dvol = CloudVolume(dest_layer_path) dvol.provenance.processing.append({ 'method': { 'task': 'WatershedRemapTask', 'src': src_layer_path, 'dest': dest_layer_path, 'remap_file': map_path, 'shape': list(shape), }, 'by': '*****@*****.**', 'date': strftime('%Y-%m-%d %H:%M %Z'), }) dvol.commit_provenance()
def on_finish(self): job_details = { 'method': { 'task': 'TransferTask', 'src': src_layer_path, 'dest': dest_layer_path, 'shape': list(map(int, shape)), 'fill_missing': fill_missing, 'translate': list(map(int, translate)), 'skip_downsamples': skip_downsamples, 'bounds': [ bounds.minpt.tolist(), bounds.maxpt.tolist() ], 'mip': mip, }, 'by': OPERATOR_CONTACT, 'date': strftime('%Y-%m-%d %H:%M %Z'), } dvol = CloudVolume(dest_layer_path) dvol.provenance.sources = [ src_layer_path ] dvol.provenance.processing.append(job_details) dvol.commit_provenance() if vol.path.protocol != 'boss': vol.provenance.processing.append(job_details) vol.commit_provenance()
def __iter__(self): for x, y, z in xyzrange(grid_size): output_bounds = Bbox.from_slices( tuple( slice(s + x * b, s + x * b + b) for (s, x, b) in zip(output_block_start, ( z, y, x), output_block_size))) yield MaskAffinitymapTask( aff_input_layer_path=aff_input_layer_path, aff_output_layer_path=aff_output_layer_path, aff_mip=aff_mip, mask_layer_path=mask_layer_path, mask_mip=mask_mip, output_bounds=output_bounds, ) vol = CloudVolume(output_layer_path, mip=aff_mip) vol.provenance.processing.append({ 'method': { 'task': 'InferenceTask', 'aff_input_layer_path': aff_input_layer_path, 'aff_output_layer_path': aff_output_layer_path, 'aff_mip': aff_mip, 'mask_layer_path': mask_layer_path, 'mask_mip': mask_mip, 'output_block_start': output_block_start, 'output_block_size': output_block_size, 'grid_size': grid_size, }, 'by': OPERATOR_CONTACT, 'date': strftime('%Y-%m-%d %H:%M %Z'), }) vol.commit_provenance()
def upload_cloud(cloud_dir, volume, layer_type, dtype, resolution, volume_size, voxel_offset=[0,0,0], prov_description=""): """ cloud_dir : Cloud directory to upload volume : Volume to upload layer_type : 'image' or 'segmentation' dtype : Has to match volume's data type resolution : Resolution of voxel volume_size : Volume size voxel_offset : Volume offset prov_description : Provenance description """ info = CloudVolume.create_new_info( num_channels = 1, layer_type = layer_type, # 'image' or 'segmentation' data_type = dtype, # can pick any popular uint encoding = 'raw', # other option: 'jpeg' but it's lossy resolution = resolution, # X,Y,Z values in nanometers voxel_offset = voxel_offset, # values X,Y,Z values in voxels chunk_size = [ 128, 128, 1 ], # rechunk of image X,Y,Z in voxels volume_size = volume_size, # X,Y,Z size in voxels ) vol = CloudVolume(cloud_dir, parallel=True, progress=True, cdn_cache=False, info=info) vol.provenance.description = prov_description vol.provenance.owners = ['*****@*****.**'] # list of contact email addresses vol.commit_info() # generates gs://bucket/dataset/layer/info json file vol.commit_provenance() # generates gs://bucket/dataset/layer/provenance json file vol[:,:,:] = volume
def create_meshing_tasks(task_queue, layer_path, mip, shape=Vec(512, 512, 512)): shape = Vec(*shape) max_simplification_error = 40 vol = CloudVolume(layer_path, mip) if not 'mesh' in vol.info: vol.info['mesh'] = 'mesh_mip_{}_err_{}'.format( mip, max_simplification_error) vol.commit_info() for startpt in tqdm(xyzrange(vol.bounds.minpt, vol.bounds.maxpt, shape), desc="Inserting Mesh Tasks"): task = MeshTask( layer_path=layer_path, mip=vol.mip, shape=shape.clone(), offset=startpt.clone(), max_simplification_error=max_simplification_error, ) task_queue.insert(task) task_queue.wait('Uploading MeshTasks') vol.provenance.processing.append({ 'method': { 'task': 'MeshTask', 'layer_path': layer_path, 'mip': vol.mip, 'shape': shape.tolist(), }, 'by': USER_EMAIL, 'date': strftime('%Y-%m-%d %H:%M %Z'), }) vol.commit_provenance()
def __iter__(self): for startpt in xyzrange(vol.bounds.minpt, vol.bounds.maxpt, shape): bounded_shape = min2(shape, vol.bounds.maxpt - startpt) yield DeleteTask( layer_path=layer_path, shape=bounded_shape.clone(), offset=startpt.clone(), mip=mip, num_mips=num_mips, ) vol = CloudVolume(layer_path) vol.provenance.processing.append({ 'method': { 'task': 'DeleteTask', 'mip': mip, 'num_mips': num_mips, 'shape': shape.tolist(), }, 'by': OPERATOR_CONTACT, 'date': strftime('%Y-%m-%d %H:%M %Z'), }) vol.commit_provenance()
def make_info_file(volume_size,resolution,layer_dir,commit=True): """ ---PURPOSE--- Make the cloudvolume info file. ---INPUT--- volume_size [Nx,Ny,Nz] in voxels, e.g. [2160,2560,1271] pix_scale_nm [size of x pix in nm,size of y pix in nm,size of z pix in nm], e.g. [5000,5000,10000] commit if True, will write the info/provenance file to disk. if False, just creates it in memory """ info = CloudVolume.create_new_info( num_channels = 1, layer_type = 'segmentation', # 'image' or 'segmentation' data_type = 'uint16', # encoding = 'raw', # other options: 'jpeg', 'compressed_segmentation' (req. uint32 or uint64) resolution = resolution, # Size of X,Y,Z pixels in nanometers, voxel_offset = [ 0, 0, 0 ], # values X,Y,Z values in voxels chunk_size = [ 1024,1024,1 ], # rechunk of image X,Y,Z in voxels -- only used for downsampling task I think volume_size = volume_size, # X,Y,Z size in voxels ) vol = CloudVolume(f'file://{layer_dir}', info=info) vol.provenance.description = "Test on spock for profiling precomputed creation" vol.provenance.owners = ['*****@*****.**'] # list of contact email addresses if commit: vol.commit_info() # generates info json file vol.commit_provenance() # generates provenance json file print("Created CloudVolume info file: ",vol.info_cloudpath) return vol
def __iter__(self): for startpt in xyzrange(vol.bounds.minpt, vol.bounds.maxpt, shape): yield WatershedRemapTask( map_path=map_path, src_path=src_layer_path, dest_path=dest_layer_path, shape=shape.clone(), offset=startpt.clone(), ) dvol = CloudVolume(dest_layer_path) dvol.provenance.processing.append({ 'method': { 'task': 'WatershedRemapTask', 'src': src_layer_path, 'dest': dest_layer_path, 'remap_file': map_path, 'shape': list(shape), }, 'by': OPERATOR_CONTACT, 'date': strftime('%Y-%m-%d %H:%M %Z'), }) dvol.commit_provenance()
def create_transfer_tasks(task_queue, src_layer_path, dest_layer_path, chunk_size=None, shape=Vec(2048, 2048, 64), fill_missing=False, translate=(0, 0, 0)): shape = Vec(*shape) translate = Vec(*translate) vol = CloudVolume(src_layer_path) if not chunk_size: chunk_size = vol.info['scales'][0]['chunk_sizes'][0] chunk_size = Vec(*chunk_size) try: dvol = CloudVolume(dest_layer_path) except Exception: # no info file info = copy.deepcopy(vol.info) dvol = CloudVolume(dest_layer_path, info=info) dvol.info['scales'] = dvol.info['scales'][:1] dvol.info['scales'][0]['chunk_sizes'] = [chunk_size.tolist()] dvol.commit_info() create_downsample_scales(dest_layer_path, mip=0, ds_shape=shape, preserve_chunk_size=True) bounds = vol.bounds.clone() for startpt in tqdm(xyzrange(bounds.minpt, bounds.maxpt, shape), desc="Inserting Transfer Tasks"): task = TransferTask( src_path=src_layer_path, dest_path=dest_layer_path, shape=shape.clone(), offset=startpt.clone(), fill_missing=fill_missing, translate=translate, ) task_queue.insert(task) task_queue.wait('Uploading Transfer Tasks') dvol = CloudVolume(dest_layer_path) dvol.provenance.processing.append({ 'method': { 'task': 'TransferTask', 'src': src_layer_path, 'dest': dest_layer_path, 'shape': list(map(int, shape)), 'fill_missing': fill_missing, 'translate': list(map(int, translate)), }, 'by': USER_EMAIL, 'date': strftime('%Y-%m-%d %H:%M %Z'), }) dvol.commit_provenance()
def create_downsampling_tasks(task_queue, layer_path, mip=-1, fill_missing=False, axis='z', num_mips=5, preserve_chunk_size=True): def ds_shape(mip): shape = vol.mip_underlying(mip)[:3] shape.x *= 2**num_mips shape.y *= 2**num_mips return shape vol = CloudVolume(layer_path, mip=mip) shape = ds_shape(vol.mip) vol = create_downsample_scales(layer_path, mip, shape, preserve_chunk_size=preserve_chunk_size) if not preserve_chunk_size: shape = ds_shape(vol.mip + 1) bounds = vol.bounds.clone() for startpt in tqdm(xyzrange(bounds.minpt, bounds.maxpt, shape), desc="Inserting Downsample Tasks"): task = DownsampleTask( layer_path=layer_path, mip=vol.mip, shape=shape.clone(), offset=startpt.clone(), axis=axis, fill_missing=fill_missing, ) task_queue.insert(task) task_queue.wait('Uploading') vol.provenance.processing.append({ 'method': { 'task': 'DownsampleTask', 'mip': mip, 'shape': shape.tolist(), 'axis': axis, 'method': 'downsample_with_averaging' if vol.layer_type == 'image' else 'downsample_segmentation', }, 'by': '*****@*****.**', 'date': strftime('%Y-%m-%d %H:%M %Z'), }) vol.commit_provenance()
def on_finish(self): job_details = { 'method': { 'task': 'TransferTask', 'src': src_layer_path, 'dest': dest_layer_path, 'shape': list(map(int, shape)), 'fill_missing': fill_missing, 'translate': list(map(int, translate)), 'skip_downsamples': skip_downsamples, 'delete_black_uploads': bool(delete_black_uploads), 'background_color': background_color, 'bounds': [dest_bounds.minpt.tolist(), dest_bounds.maxpt.tolist()], 'mip': mip, 'agglomerate': bool(agglomerate), 'timestamp': timestamp, 'compress': compress, 'encoding': encoding, 'memory_target': memory_target, 'factor': (tuple(factor) if factor else None), 'sparse': bool(sparse), }, 'by': operator_contact(), 'date': strftime('%Y-%m-%d %H:%M %Z'), } dest_vol = CloudVolume(dest_layer_path) dest_vol.provenance.sources = [src_layer_path] dest_vol.provenance.processing.append(job_details) dest_vol.commit_provenance() if not no_src_update and src_vol.meta.path.protocol in ('gs', 's3', 'file'): src_vol.provenance.processing.append(job_details) src_vol.commit_provenance()
def on_finish(self): vol = CloudVolume(layer_path) vol.provenance.processing.append({ 'method': { 'task': 'DeleteTask', 'mip': mip, 'num_mips': num_mips, 'shape': shape.tolist(), }, 'by': OPERATOR_CONTACT, 'date': strftime('%Y-%m-%d %H:%M %Z'), }) vol.commit_provenance()
def on_finish(self): dvol = CloudVolume(dest_layer_path) dvol.provenance.processing.append({ 'method': { 'task': 'WatershedRemapTask', 'src': src_layer_path, 'dest': dest_layer_path, 'remap_file': map_path, 'shape': list(shape), }, 'by': OPERATOR_CONTACT, 'date': strftime('%Y-%m-%d %H:%M %Z'), }) dvol.commit_provenance()
def on_finish(self): vol = CloudVolume(layer_path, max_redirects=0) vol.provenance.processing.append({ 'method': { 'task': 'DeleteTask', 'mip': mip, 'num_mips': num_mips, 'shape': shape.tolist(), }, 'by': operator_contact(), 'date': strftime('%Y-%m-%d %H:%M %Z'), }) vol.commit_provenance()
def __iter__(self): for x, y, z in xyzrange(grid_size): output_offset = tuple( s + x * b for (s, x, b) in zip(output_block_start, (z, y, x), output_block_size)) yield InferenceTask(image_layer_path=image_layer_path, convnet_path=convnet_path, mask_layer_path=mask_layer_path, output_layer_path=output_layer_path, output_offset=output_offset, output_shape=output_block_size, patch_size=patch_size, patch_overlap=patch_overlap, cropping_margin_size=cropping_margin_size, output_key=output_key, num_output_channels=num_output_channels, image_mip=image_mip, output_mip=output_mip, mask_mip=mask_mip) vol = CloudVolume(output_layer_path, mip=output_mip) vol.provenance.processing.append({ 'method': { 'task': 'InferenceTask', 'image_layer_path': image_layer_path, 'convnet_path': convnet_path, 'mask_layer_path': mask_layer_path, 'output_layer_path': output_layer_path, 'output_offset': output_offset, 'output_shape': output_block_size, 'patch_size': patch_size, 'patch_overlap': patch_overlap, 'cropping_margin_size': cropping_margin_size, 'output_key': output_key, 'num_output_channels': num_output_channels, 'image_mip': image_mip, 'output_mip': output_mip, 'mask_mip': mask_mip, }, 'by': OPERATOR_CONTACT, 'date': strftime('%Y-%m-%d %H:%M %Z'), }) vol.commit_provenance()
def __iter__(self): # For a prefix like 100, tasks 1-99 will be missed. Account for them by # enumerating them individually with a suffixed ':' to limit matches to # only those small numbers for prefix in range(1, start): yield UnshardedSkeletonMergeTask( cloudpath=layer_path, prefix=str(prefix) + ':', crop=crop, dust_threshold=dust_threshold, max_cable_length=max_cable_length, tick_threshold=tick_threshold, delete_fragments=delete_fragments, ) # enumerate from e.g. 100 to 999 for prefix in range(start, end): yield UnshardedSkeletonMergeTask( cloudpath=layer_path, prefix=prefix, crop=crop, dust_threshold=dust_threshold, max_cable_length=max_cable_length, tick_threshold=tick_threshold, delete_fragments=delete_fragments, ) vol = CloudVolume(layer_path) vol.provenance.processing.append({ 'method': { 'task': 'UnshardedSkeletonMergeTask', 'cloudpath': layer_path, 'crop': crop, 'dust_threshold': dust_threshold, 'tick_threshold': tick_threshold, 'delete_fragments': delete_fragments, 'max_cable_length': max_cable_length, }, 'by': operator_contact(), 'date': strftime('%Y-%m-%d %H:%M %Z'), }) vol.commit_provenance()
def __iter__(self): for z in range(bounds.minpt.z, bounds.maxpt.z + 1): zoffset.z = z yield LuminanceLevelsTask( src_path=layer_path, levels_path=levels_path, shape=shape, offset=zoffset, coverage_factor=coverage_factor, mip=mip, ) if protocol == 'boss': raise StopIteration() if levels_path: try: vol = CloudVolume(levels_path) except cloudvolume.exceptions.InfoUnavailableError: vol = CloudVolume(levels_path, info=vol.info) else: vol = CloudVolume(layer_path, mip=mip) vol.provenance.processing.append({ 'method': { 'task': 'LuminanceLevelsTask', 'src': layer_path, 'levels_path': levels_path, 'shape': Vec(*shape).tolist(), 'offset': Vec(*offset).tolist(), 'bounds': [bounds.minpt.tolist(), bounds.maxpt.tolist()], 'coverage_factor': coverage_factor, 'mip': mip, }, 'by': OPERATOR_CONTACT, 'date': strftime('%Y-%m-%d %H:%M %Z'), }) vol.commit_provenance()
def create_transfer_tasks(task_queue, src_layer_path, dest_layer_path, shape=Vec(2048, 2048, 64), fill_missing=False, translate=(0, 0, 0)): shape = Vec(*shape) translate = Vec(*translate) vol = CloudVolume(src_layer_path) create_downsample_scales(dest_layer_path, mip=0, ds_shape=shape, preserve_chunk_size=True) bounds = vol.bounds.clone() for startpt in tqdm(xyzrange(bounds.minpt, bounds.maxpt, shape), desc="Inserting Transfer Tasks"): task = TransferTask( src_path=src_layer_path, dest_path=dest_layer_path, shape=shape.clone(), offset=startpt.clone(), fill_missing=fill_missing, translate=translate, ) task_queue.insert(task) task_queue.wait('Uploading Transfer Tasks') dvol = CloudVolume(dest_layer_path) dvol.provenance.processing.append({ 'method': { 'task': 'TransferTask', 'src': src_layer_path, 'dest': dest_layer_path, 'shape': list(map(int, shape)), }, 'by': '*****@*****.**', 'date': strftime('%Y-%m-%d %H:%M %Z'), }) dvol.commit_provenance()
def on_finish(self): job_details = { "method": { "task": "ImageShardTransferTask", "src": src_layer_path, "dest": dst_layer_path, "shape": list(map(int, shape)), "fill_missing": fill_missing, "translate": list(map(int, translate)), "bounds": [bounds.minpt.tolist(), bounds.maxpt.tolist()], "mip": mip, }, "by": operator_contact(), "date": strftime("%Y-%m-%d %H:%M %Z"), } dvol = CloudVolume(dst_layer_path) dvol.provenance.sources = [src_layer_path] dvol.provenance.processing.append(job_details) dvol.commit_provenance()
def make_info_file(): info = CloudVolume.create_new_info( num_channels = 1, layer_type = 'segmentation', # 'image' or 'segmentation' data_type = 'uint32', # can pick any popular uint encoding = 'raw', # other options: 'jpeg', 'compressed_segmentation' (req. uint32 or uint64) resolution = [ 25000, 25000, 25000 ], # X,Y,Z values in nanometers, 40 microns in each dim voxel_offset = [ 0, 0, 0 ], # values X,Y,Z values in voxels chunk_size = [ 1024, 1024, 1 ], # rechunk of image X,Y,Z in voxels volume_size = [320, 528, 456], # X,Y,Z size in voxels ) # If you're using amazon or the local file system, you can replace 'gs' with 's3' or 'file' vol = CloudVolume('file:///home/ahoag/ngdemo/demo_bucket/atlas/allenatlas_2017', info=info) vol.provenance.description = "Segmentation volume for the 3D labeled allen atlas" vol.provenance.owners = ['*****@*****.**'] # list of contact email addresses vol.commit_info() # generates gs://bucket/dataset/layer/info json file vol.commit_provenance() # generates gs://bucket/dataset/layer/provenance json file print("Created CloudVolume info file: ",vol.info_cloudpath) return vol
def create_luminance_levels_tasks(task_queue, layer_path, coverage_factor=0.01, shape=None, offset=(0, 0, 0), mip=0): vol = CloudVolume(layer_path) if shape == None: shape = vol.shape.clone() shape.z = 1 offset = Vec(*offset) for z in range(vol.bounds.minpt.z, vol.bounds.maxpt.z + 1): offset.z = z task = LuminanceLevelsTask( src_path=layer_path, shape=shape, offset=offset, coverage_factor=coverage_factor, mip=mip, ) task_queue.insert(task) task_queue.wait('Uploading Luminance Levels Tasks') vol.provenance.processing.append({ 'method': { 'task': 'LuminanceLevelsTask', 'src': layer_path, 'shape': Vec(*shape).tolist(), 'offset': Vec(*offset).tolist(), 'coverage_factor': coverage_factor, 'mip': mip, }, 'by': USER_EMAIL, 'date': strftime('%Y-%m-%d %H:%M %Z'), }) vol.commit_provenance()
def __iter__(self): for startpt in xyzrange(bounds.minpt, bounds.maxpt, shape): task_shape = min2(shape.clone(), dvol_bounds.maxpt - startpt) yield TransferTask( src_path=src_layer_path, dest_path=dest_layer_path, shape=task_shape, offset=startpt.clone(), fill_missing=fill_missing, translate=translate, mip=mip, ) job_details = { 'method': { 'task': 'TransferTask', 'src': src_layer_path, 'dest': dest_layer_path, 'shape': list(map(int, shape)), 'fill_missing': fill_missing, 'translate': list(map(int, translate)), 'bounds': [bounds.minpt.tolist(), bounds.maxpt.tolist()], 'mip': mip, }, 'by': OPERATOR_CONTACT, 'date': strftime('%Y-%m-%d %H:%M %Z'), } dvol = CloudVolume(dest_layer_path) dvol.provenance.sources = [src_layer_path] dvol.provenance.processing.append(job_details) dvol.commit_provenance() if vol.path.protocol != 'boss': vol.provenance.processing.append(job_details) vol.commit_provenance()
def transfer_to(self, cloudpath, bbox, mip, block_size=None, compress=True, compress_level=None): """ Transfer files from one storage location to another, bypassing volume painting. This enables using a single CloudVolume instance to transfer big volumes. In some cases, gsutil or aws s3 cli tools may be more appropriate. This method is provided for convenience. It may be optimized for better performance over time as demand requires. cloudpath (str): path to storage layer bbox (Bbox object): ROI to transfer mip (int): resolution level block_size (int): number of file chunks to transfer per I/O batch. compress (bool): Set to False to upload as uncompressed """ from cloudvolume import CloudVolume if mip is None: mip = self.config.mip bbox = Bbox.create(bbox, self.meta.bounds(mip)) realized_bbox = bbox.expand_to_chunk_size( self.meta.chunk_size(mip), offset=self.meta.voxel_offset(mip)) realized_bbox = Bbox.clamp(realized_bbox, self.meta.bounds(mip)) if bbox != realized_bbox: raise exceptions.AlignmentError( "Unable to transfer non-chunk aligned bounding boxes. Requested: {}, Realized: {}" .format(bbox, realized_bbox)) default_block_size_MB = 50 # MB chunk_MB = self.meta.chunk_size(mip).rectVolume() * np.dtype( self.meta.dtype).itemsize * self.meta.num_channels if self.meta.layer_type == 'image': # kind of an average guess for some EM datasets, have seen up to 1.9x and as low as 1.1 # affinites are also images, but have very different compression ratios. e.g. 3x for kempressed chunk_MB /= 1.3 else: # segmentation chunk_MB /= 100.0 # compression ratios between 80 and 800.... chunk_MB /= 1024.0 * 1024.0 if block_size: step = block_size else: step = int(default_block_size_MB // chunk_MB) + 1 try: destvol = CloudVolume(cloudpath, mip=mip) except exceptions.InfoUnavailableError: destvol = CloudVolume(cloudpath, mip=mip, info=self.meta.info, provenance=self.meta.provenance.serialize()) destvol.commit_info() destvol.commit_provenance() except exceptions.ScaleUnavailableError: destvol = CloudVolume(cloudpath) for i in range(len(destvol.scales) + 1, len(self.meta.scales)): destvol.scales.append(self.meta.scales[i]) destvol.commit_info() destvol.commit_provenance() num_blocks = np.ceil( self.meta.bounds(mip).volume() / self.meta.chunk_size(mip).rectVolume()) / step num_blocks = int(np.ceil(num_blocks)) cloudpaths = chunknames(bbox, self.meta.bounds(mip), self.meta.key(mip), self.meta.chunk_size(mip), protocol=self.meta.path.protocol) pbar = tqdm( desc='Transferring Blocks of {} Chunks'.format(step), unit='blocks', disable=(not self.config.progress), total=num_blocks, ) with pbar: with Storage(self.meta.cloudpath) as src_stor: with Storage(cloudpath) as dest_stor: for _ in range(num_blocks, 0, -1): srcpaths = list(itertools.islice(cloudpaths, step)) files = src_stor.get_files(srcpaths) files = [(f['filename'], f['content']) for f in files] dest_stor.put_files( files=files, compress=compress, compress_level=compress_level, content_type=tx.content_type(destvol), ) pbar.update()
def create_sharded_multires_mesh_tasks( cloudpath:str, shard_index_bytes=2**13, minishard_index_bytes=2**15, num_lod:int = 1, draco_compression_level:int = 1, vertex_quantization_bits:int = 16, minishard_index_encoding="gzip", mesh_dir:Optional[str] = None, spatial_index_db:Optional[str] = None ) -> Iterator[MultiResShardedMeshMergeTask]: configure_multires_info( cloudpath, vertex_quantization_bits, mesh_dir ) # rebuild b/c sharding changes the mesh source class cv = CloudVolume(cloudpath, progress=True, spatial_index_db=spatial_index_db) cv.mip = cv.mesh.meta.mip # 17 sec to download for pinky100 all_labels = cv.mesh.spatial_index.query(cv.bounds * cv.resolution) (shard_bits, minishard_bits, preshift_bits) = \ compute_shard_params_for_hashed( num_labels=len(all_labels), shard_index_bytes=int(shard_index_bytes), minishard_index_bytes=int(minishard_index_bytes), ) spec = ShardingSpecification( type='neuroglancer_uint64_sharded_v1', preshift_bits=preshift_bits, hash='murmurhash3_x86_128', minishard_bits=minishard_bits, shard_bits=shard_bits, minishard_index_encoding=minishard_index_encoding, data_encoding="raw", # draco encoded meshes ) cv.mesh.meta.info['sharding'] = spec.to_dict() cv.mesh.meta.commit_info() cv = CloudVolume(cloudpath) # perf: ~66.5k hashes/sec on M1 ARM64 shardfn = lambda lbl: cv.mesh.reader.spec.compute_shard_location(lbl).shard_number shard_labels = defaultdict(list) for label in tqdm(all_labels, desc="Hashes"): shard_labels[shardfn(label)].append(label) del all_labels cf = CloudFiles(cv.skeleton.meta.layerpath, progress=True) files = ( (str(shardno) + '.labels', labels) for shardno, labels in shard_labels.items() ) cf.put_jsons( files, compress="gzip", cache_control="no-cache", total=len(shard_labels) ) cv.provenance.processing.append({ 'method': { 'task': 'MultiResShardedMeshMergeTask', 'cloudpath': cloudpath, 'mip': cv.mesh.meta.mip, 'num_lod': num_lod, 'vertex_quantization_bits': vertex_quantization_bits, 'preshift_bits': preshift_bits, 'minishard_bits': minishard_bits, 'shard_bits': shard_bits, 'mesh_dir': mesh_dir, 'draco_compression_level': draco_compression_level, }, 'by': operator_contact(), 'date': strftime('%Y-%m-%d %H:%M %Z'), }) cv.commit_provenance() return [ partial(MultiResShardedMeshMergeTask, cloudpath, shard_no, num_lod=num_lod, mesh_dir=mesh_dir, spatial_index_db=spatial_index_db, draco_compression_level=draco_compression_level, ) for shard_no in shard_labels.keys() ]
def transfer_to(self, cloudpath, bbox, mip, block_size=None, compress=True, compress_level=None): """ Transfer files from one storage location to another, bypassing volume painting. This enables using a single CloudVolume instance to transfer big volumes. In some cases, gsutil or aws s3 cli tools may be more appropriate. This method is provided for convenience. It may be optimized for better performance over time as demand requires. cloudpath (str): path to storage layer bbox (Bbox object): ROI to transfer mip (int): resolution level block_size (int): number of file chunks to transfer per I/O batch. compress (bool): Set to False to upload as uncompressed """ from cloudvolume import CloudVolume if mip is None: mip = self.config.mip if self.is_sharded(mip): raise exceptions.UnsupportedFormatError( f"Sharded sources are not supported. got: {self.meta.cloudpath}" ) bbox = Bbox.create(bbox, self.meta.bounds(mip)) realized_bbox = bbox.expand_to_chunk_size( self.meta.chunk_size(mip), offset=self.meta.voxel_offset(mip)) realized_bbox = Bbox.clamp(realized_bbox, self.meta.bounds(mip)) if bbox != realized_bbox: raise exceptions.AlignmentError( "Unable to transfer non-chunk aligned bounding boxes. Requested: {}, Realized: {}" .format(bbox, realized_bbox)) default_block_size_MB = 50 # MB chunk_MB = self.meta.chunk_size(mip).rectVolume() * np.dtype( self.meta.dtype).itemsize * self.meta.num_channels if self.meta.layer_type == 'image': # kind of an average guess for some EM datasets, have seen up to 1.9x and as low as 1.1 # affinites are also images, but have very different compression ratios. e.g. 3x for kempressed chunk_MB /= 1.3 else: # segmentation chunk_MB /= 100.0 # compression ratios between 80 and 800.... chunk_MB /= 1024.0 * 1024.0 if block_size: step = block_size else: step = int(default_block_size_MB // chunk_MB) + 1 try: destvol = CloudVolume(cloudpath, mip=mip) except exceptions.InfoUnavailableError: destvol = CloudVolume(cloudpath, mip=mip, info=self.meta.info, provenance=self.meta.provenance.serialize()) destvol.commit_info() destvol.commit_provenance() except exceptions.ScaleUnavailableError: destvol = CloudVolume(cloudpath) for i in range(len(destvol.scales) + 1, len(self.meta.scales)): destvol.scales.append(self.meta.scales[i]) destvol.commit_info() destvol.commit_provenance() if destvol.image.is_sharded(mip): raise exceptions.UnsupportedFormatError( f"Sharded destinations are not supported. got: {destvol.cloudpath}" ) num_blocks = np.ceil( self.meta.bounds(mip).volume() / self.meta.chunk_size(mip).rectVolume()) / step num_blocks = int(np.ceil(num_blocks)) cloudpaths = chunknames(bbox, self.meta.bounds(mip), self.meta.key(mip), self.meta.chunk_size(mip), protocol=self.meta.path.protocol) pbar = tqdm( desc='Transferring Blocks of {} Chunks'.format(step), unit='blocks', disable=(not self.config.progress), total=num_blocks, ) cfsrc = CloudFiles(self.meta.cloudpath, secrets=self.config.secrets) cfdest = CloudFiles(cloudpath) def check(files): errors = [ file for file in files if \ (file['content'] is None or file['error'] is not None) ] if errors: error_paths = [f['path'] for f in errors] raise exceptions.EmptyFileException( "{} were empty or had IO errors.".format( ", ".join(error_paths))) return files with pbar: for srcpaths in sip(cloudpaths, step): files = check(cfsrc.get(srcpaths, raw=True)) cfdest.puts(compression.transcode(files, encoding=compress, level=compress_level, in_place=True), compress=compress, content_type=tx.content_type(destvol), raw=True) pbar.update()
def segment(args): """Run segmentation on contiguous block of affinities from CV Args: args: ArgParse object from main """ bbox_start = Vec(*args.bbox_start) bbox_size = Vec(*args.bbox_size) chunk_size = Vec(*args.chunk_size) bbox = Bbox(bbox_start, bbox_start + bbox_size) src_cv = CloudVolume(args.src_path, fill_missing=True, parallel=args.parallel) info = CloudVolume.create_new_info( num_channels=1, layer_type='segmentation', data_type='uint64', encoding='raw', resolution=src_cv.info['scales'][args.mip]['resolution'], voxel_offset=bbox_start, chunk_size=chunk_size, volume_size=bbox_size, mesh='mesh_mip_{}_err_{}'.format(args.mip, args.max_simplification_error)) dst_cv = CloudVolume(args.dst_path, info=info, parallel=args.parallel) dst_cv.provenance.description = 'ws+agg using waterz' dst_cv.provenance.processing.append({ 'method': { 'task': 'watershed+agglomeration', 'src_path': args.src_path, 'dst_path': args.dst_path, 'mip': args.mip, 'shape': bbox_size.tolist(), 'bounds': [ bbox.minpt.tolist(), bbox.maxpt.tolist(), ], }, 'by': args.owner, 'date': strftime('%Y-%m-%d%H:%M %Z'), }) dst_cv.provenance.owners = [args.owner] dst_cv.commit_info() dst_cv.commit_provenance() if args.segment: print('Downloading affinities') aff = src_cv[bbox.to_slices()] aff = np.transpose(aff, (3, 0, 1, 2)) aff = np.ascontiguousarray(aff, dtype=np.float32) thresholds = [args.threshold] print('Starting ws+agg') seg_gen = waterz.agglomerate(aff, thresholds) seg = next(seg_gen) print('Deleting affinities') del aff print('Uploading segmentation') dst_cv[bbox.to_slices()] = seg if args.mesh: print('Starting meshing') with LocalTaskQueue(parallel=args.parallel) as tq: tasks = tc.create_meshing_tasks( layer_path=args.dst_path, mip=args.mip, shape=args.chunk_size, simplification=True, max_simplification_error=args.max_simplification_error, progress=True) tq.insert_all(tasks) tasks = tc.create_mesh_manifest_tasks(layer_path=args.dst_path, magnitude=args.magnitude) tq.insert_all(tasks) print("Meshing complete")
def create_sharded_skeleton_merge_tasks(layer_path, dust_threshold, tick_threshold, shard_index_bytes=2**13, minishard_index_bytes=2**15, minishard_index_encoding='gzip', data_encoding='gzip', max_cable_length=None, spatial_index_db=None): cv = CloudVolume(layer_path, progress=True, spatial_index_db=spatial_index_db) cv.mip = cv.skeleton.meta.mip # 17 sec to download for pinky100 all_labels = cv.skeleton.spatial_index.query(cv.bounds * cv.resolution) (shard_bits, minishard_bits, preshift_bits) = \ compute_shard_params_for_hashed( num_labels=len(all_labels), shard_index_bytes=int(shard_index_bytes), minishard_index_bytes=int(minishard_index_bytes), ) spec = ShardingSpecification( type='neuroglancer_uint64_sharded_v1', preshift_bits=preshift_bits, hash='murmurhash3_x86_128', minishard_bits=minishard_bits, shard_bits=shard_bits, minishard_index_encoding=minishard_index_encoding, data_encoding=data_encoding, ) cv.skeleton.meta.info['sharding'] = spec.to_dict() cv.skeleton.meta.commit_info() # rebuild b/c sharding changes the skeleton source cv = CloudVolume(layer_path, progress=True, spatial_index_db=spatial_index_db) cv.mip = cv.skeleton.meta.mip # perf: ~36k hashes/sec shardfn = lambda lbl: cv.skeleton.reader.spec.compute_shard_location( lbl).shard_number shard_labels = defaultdict(list) for label in tqdm(all_labels, desc="Hashes"): shard_labels[shardfn(label)].append(label) cf = CloudFiles(cv.skeleton.meta.layerpath, progress=True) files = ((str(shardno) + '.labels', labels) for shardno, labels in shard_labels.items()) cf.put_jsons(files, compress="gzip", cache_control="no-cache", total=len(shard_labels)) cv.provenance.processing.append({ 'method': { 'task': 'ShardedSkeletonMergeTask', 'cloudpath': layer_path, 'mip': cv.skeleton.meta.mip, 'dust_threshold': dust_threshold, 'tick_threshold': tick_threshold, 'max_cable_length': max_cable_length, 'preshift_bits': preshift_bits, 'minishard_bits': minishard_bits, 'shard_bits': shard_bits, }, 'by': operator_contact(), 'date': strftime('%Y-%m-%d %H:%M %Z'), }) cv.commit_provenance() return [ ShardedSkeletonMergeTask(layer_path, shard_no, dust_threshold, tick_threshold, max_cable_length=max_cable_length) for shard_no in shard_labels.keys() ]
def ingest(args): """ Ingest an HDF file to a CloudVolume bucket """ if args.local_hdf_path: hdf_file = h5py.File(args.local_hdf_path, "r") else: with Storage(args.cloud_src_path) as storage: hdf_file = h5py.File(storage.get_file(args.cloud_hdf_filename), "r") cur_hdf_group = hdf_file for group_name in args.hdf_keys_to_dataset: cur_hdf_group = cur_hdf_group[group_name] hdf_dataset = cur_hdf_group if args.zyx: dataset_shape = np.array( [hdf_dataset.shape[2], hdf_dataset.shape[1], hdf_dataset.shape[0]]) else: dataset_shape = np.array([*hdf_dataset.shape]) if args.layer_type == "image": data_type = "uint8" else: data_type = "uint64" voxel_offset = args.voxel_offset info = CloudVolume.create_new_info( num_channels=1, layer_type=args.layer_type, data_type=data_type, encoding="raw", resolution=args.resolution, voxel_offset=voxel_offset, chunk_size=args.chunk_size, volume_size=dataset_shape, ) provenance = { "description": args.provenance_description, "owners": [args.owner] } vol = CloudVolume(args.dst_path, info=info, provenance=provenance) vol.commit_info() vol.commit_provenance() all_files = set() for x in np.arange(voxel_offset[0], voxel_offset[0] + dataset_shape[0], args.chunk_size[0]): for y in np.arange(voxel_offset[1], voxel_offset[1] + dataset_shape[1], args.chunk_size[1]): for z in np.arange(voxel_offset[2], voxel_offset[2] + dataset_shape[2], args.chunk_size[2]): all_files.add(tuple((x, y, z))) progress_dir = mkdir( "progress/") # unlike os.mkdir doesn't crash on prexisting done_files = set() for done_file in os.listdir(progress_dir): done_files.add(tuple(done_file.split(","))) to_upload = all_files.difference(done_files) for chunk_start_tuple in to_upload: chunk_start = np.array(list(chunk_start_tuple)) end_of_dataset = np.array(voxel_offset) + dataset_shape chunk_end = chunk_start + np.array(args.chunk_size) chunk_end = Vec(*chunk_end) chunk_end = Vec.clamp(chunk_end, Vec(0, 0, 0), end_of_dataset) chunk_hdf_start = chunk_start - voxel_offset chunk_hdf_end = chunk_end - voxel_offset if args.zyx: chunk = hdf_dataset[chunk_hdf_start[2]:chunk_hdf_end[2], chunk_hdf_start[1]:chunk_hdf_end[1], chunk_hdf_start[0]:chunk_hdf_end[0], ] chunk = chunk.T else: chunk = hdf_dataset[chunk_hdf_start[0]:chunk_hdf_end[0], chunk_hdf_start[1]:chunk_hdf_end[1], chunk_hdf_start[2]:chunk_hdf_end[2], ] print("Processing ", chunk_start_tuple) array = np.array(chunk, dtype=np.dtype(data_type), order="F") vol[chunk_start[0]:chunk_end[0], chunk_start[1]:chunk_end[1], chunk_start[2]:chunk_end[2], ] = array touch(os.path.join(progress_dir, str(chunk_start_tuple)))
class NumpyToNeuroglancer(): viewer = None def __init__(self, animal, volume, scales, layer_type, data_type, num_channels=1, chunk_size=[256, 256, 128], offset=[0, 0, 0]): self.volume = volume self.scales = scales self.layer_type = layer_type self.data_type = data_type self.chunk_size = chunk_size self.precomputed_vol = None self.offset = offset self.starting_points = None self.animal = animal self.num_channels = num_channels def add_annotation_point(): ... def init_precomputed(self, path, volume_size, starting_points=None, progress_id=None): info = CloudVolume.create_new_info( num_channels=self.num_channels, layer_type=self.layer_type, # 'image' or 'segmentation' data_type=self.data_type, # encoding= 'raw', # other options: 'jpeg', 'compressed_segmentation' (req. uint32 or uint64) resolution=self.scales, # Size of X,Y,Z pixels in nanometers, voxel_offset=self.offset, # values X,Y,Z values in voxels chunk_size=self.chunk_size, # rechunk of image X,Y,Z in voxels volume_size=volume_size, # X,Y,Z size in voxels ) self.starting_points = starting_points self.progress_id = progress_id self.precomputed_vol = CloudVolume(f'file://{path}', mip=0, info=info, compress=True, progress=False) self.precomputed_vol.commit_info() self.precomputed_vol.commit_provenance() def init_volume(self, path): info = CloudVolume.create_new_info( num_channels=self.volume.shape[2] if len(self.volume.shape) > 2 else 1, layer_type=self.layer_type, data_type=self. data_type, # str(self.volume.dtype), # Channel images might be 'uint8' encoding= 'raw', # raw, jpeg, compressed_segmentation, fpzip, kempressed resolution=self.scales, # Voxel scaling, units are in nanometers voxel_offset=self.offset, # x,y,z offset in voxels from the origin chunk_size=self.chunk_size, # units are voxels volume_size=self.volume. shape[:3], # e.g. a cubic millimeter dataset ) self.precomputed_vol = CloudVolume(f'file://{path}', mip=0, info=info, compress=True, progress=False) self.precomputed_vol.commit_info() #self.precomputed_vol[:, :, :] = self.volume[:, :, :] def add_segment_properties(self, segment_properties): if self.precomputed_vol is None: raise NotImplementedError( 'You have to call init_precomputed before calling this function.' ) self.precomputed_vol.info['segment_properties'] = 'names' self.precomputed_vol.commit_info() segment_properties_path = os.path.join( self.precomputed_vol.layer_cloudpath.replace('file://', ''), 'names') os.makedirs(segment_properties_path, exist_ok=True) info = { "@type": "neuroglancer_segment_properties", "inline": { "ids": [str(number) for number, label in segment_properties], "properties": [{ "id": "label", "type": "label", "values": [str(label) for number, label in segment_properties] }] } } with open(os.path.join(segment_properties_path, 'info'), 'w') as file: json.dump(info, file, indent=2) def add_rechunking(self, outpath, downsample, chunks=None): if self.precomputed_vol is None: raise NotImplementedError( 'You have to call init_precomputed before calling this function.' ) cpus, _ = get_cpus() tq = LocalTaskQueue(parallel=cpus) outpath = f'file://{outpath}' if chunks is None: chunks = calculate_chunks(downsample, 0) tasks = tc.create_transfer_tasks(self.precomputed_vol.layer_cloudpath, dest_layer_path=outpath, chunk_size=chunks, skip_downsamples=True) tq.insert(tasks) tq.execute() def add_downsampled_volumes(self, chunk_size=[128, 128, 64], num_mips=4): if self.precomputed_vol is None: raise NotImplementedError( 'You have to call init_precomputed before calling this function.' ) _, cpus = get_cpus() tq = LocalTaskQueue(parallel=cpus) tasks = tc.create_downsampling_tasks( self.precomputed_vol.layer_cloudpath, preserve_chunk_size=False, num_mips=num_mips, chunk_size=chunk_size, compress=True) tq.insert(tasks) tq.execute() def add_segmentation_mesh(self, shape=[448, 448, 448], mip=0): if self.precomputed_vol is None: raise NotImplementedError( 'You have to call init_precomputed before calling this function.' ) _, cpus = get_cpus() tq = LocalTaskQueue(parallel=cpus) tasks = tc.create_meshing_tasks( self.precomputed_vol.layer_cloudpath, mip=mip, max_simplification_error=40, shape=shape, compress=True) # The first phase of creating mesh tq.insert(tasks) tq.execute() # It should be able to incoporated to above tasks, but it will give a weird bug. Don't know the reason tasks = tc.create_mesh_manifest_tasks( self.precomputed_vol.layer_cloudpath ) # The second phase of creating mesh tq.insert(tasks) tq.execute() def process_simple_slice(self, file_key): index, infile = file_key print(index, infile) try: image = Image.open(infile) except: print('Could not open', infile) width, height = image.size array = np.array(image, dtype=self.data_type, order='F') array = array.reshape((1, height, width)).T self.precomputed_vol[:, :, index] = array touchfile = os.path.join(self.progress_dir, os.path.basename(infile)) touch(touchfile) image.close() return def process_mesh(self, file_key): index, infile = file_key if os.path.exists( os.path.join(self.progress_dir, os.path.basename(infile))): print(f"Section {index} already processed, skipping ") return img = io.imread(infile) labels = [[v - 8, v - 1] for v in range(9, 256, 8)] arr = np.copy(img) for label in labels: mask = (arr >= label[0]) & (arr <= label[1]) arr[mask] = label[1] arr[arr > 248] = 255 img = arr.T del arr self.precomputed_vol[:, :, index] = img.reshape(img.shape[0], img.shape[1], 1) touchfile = os.path.join(self.progress_dir, os.path.basename(infile)) touch(touchfile) del img return def process_coronal_slice(self, file_key): index, infile = file_key if os.path.exists( os.path.join(self.progress_dir, os.path.basename(infile))): print(f"Slice {index} already processed, skipping ") return img = io.imread(infile) starty, endy, startx, endx = self.starting_points #img = np.rot90(img, 2) #img = np.flip(img) img = img[starty:endy, startx:endx] img = img.reshape(img.shape[0], img.shape[1], 1) #print(index, infile, img.shape, img.dtype, self.precomputed_vol.dtype, self.precomputed_vol.shape) self.precomputed_vol[:, :, index] = img touchfile = os.path.join(self.progress_dir, os.path.basename(infile)) touch(touchfile) del img return def process_image(self, file_key): index, infile = file_key basefile = os.path.basename(infile) #completed = file_processed(self.animal, self.progress_id, basefile) completed = False if completed: print(f"Section {index} already processed, skipping ") return img = io.imread(infile, img_num=0) img = img.reshape(self.num_channels, img.shape[0], img.shape[1]).T self.precomputed_vol[:, :, index] = img #set_file_completed(self.animal, self.progress_id, basefile) del img return def process_3channel(self, file_key): index, infile = file_key basefile = os.path.basename(infile) completed = file_processed(self.animal, self.progress_id, basefile) if completed: print(f"Section {index} already processed, skipping ") return img = io.imread(infile, img_num=0) img = img.reshape(img.shape[0], img.shape[1], 1, img.shape[2]) img = np.rot90(img, 1) img = np.flipud(img) self.precomputed_vol[:, :, index] = img set_file_completed(self.animal, self.progress_id, basefile) del img return def add_volume(self, volume, layer_name=None, clear_layer=False): if self.viewer is None: self.viewer = neuroglancer.Viewer() if layer_name is None: layer_name = f'{self.layer_type}_{self.scales}' source = neuroglancer.LocalVolume( data=volume, dimensions=neuroglancer.CoordinateSpace(names=['x', 'y', 'z'], units='nm', scales=self.scales), voxel_offset=self.offset) if self.layer_type == 'segmentation': layer = neuroglancer.SegmentationLayer(source=source) else: layer = neuroglancer.ImageLayer(source=source) with self.viewer.txn() as s: if clear_layer: s.layers.clear() s.layers[layer_name] = layer print(f'A new layer named {layer_name} is added to:') print(self.viewer) def preview(self, layer_name=None, clear_layer=False): self.add_volume(self.volume, layer_name=layer_name, clear_layer=clear_layer)