x_scale_nm, y_scale_nm, z_scale_nm = 5000, 5000, 10000 """ Handle the different steps """ if step == 'step0': print("step 0") volume_size = (x_dim, y_dim, z_dim) resolution = (x_scale_nm, y_scale_nm, z_scale_nm) vol = make_info_file(volume_size=volume_size, layer_dir=layer_dir, resolution=resolution, atlas_type='Princeton') elif step == 'step1': print("step 1") # Find the individual z planes in the full_sizedatafld - these are the blended images at the raw resolution sorted_files = sorted(all_slices) vol = CloudVolume(f'file://{layer_dir}') done_files = set([int(z) for z in os.listdir(progress_dir)]) all_files = set(range(vol.bounds.minpt.z, vol.bounds.maxpt.z + 1)) to_upload = [int(z) for z in list(all_files.difference(done_files))] to_upload.sort() print(f"Have {len(to_upload)} planes to upload") with ProcessPoolExecutor(max_workers=16) as executor: executor.map(process_slice, to_upload) elif step == 'step2': # downsampling print("step 2") vol = CloudVolume(f'file://{layer_dir}') tasks = make_downsample_tasks(vol, mip_start=0, num_mips=4) with LocalTaskQueue(parallel=18) as tq: tq.insert_all(tasks)
def DeleteSkeletonFilesTask(cloudpath: str, prefix: str, skel_dir: Optional[str] = None): cv = CloudVolume(cloudpath, skel_dir=skel_dir) cf = CloudFiles(cv.skeleton.meta.layerpath) cf.delete(cf.list(prefix=prefix))
class ReadPrecomputedOperator(OperatorBase): def __init__(self, volume_path: str, mip: int = 0, expand_margin_size=(0, 0, 0), fill_missing: bool = False, validate_mip: int = None, blackout_sections: bool = None, dry_run: bool = False, name: str = 'cutout'): super().__init__(name=name) self.volume_path = volume_path self.mip = mip self.expand_margin_size = expand_margin_size self.fill_missing = fill_missing self.validate_mip = validate_mip self.blackout_sections = blackout_sections self.dry_run = dry_run if blackout_sections: with Storage(volume_path) as stor: self.blackout_section_ids = stor.get_json( 'blackout_section_ids.json')['section_ids'] verbose = (logging.getLogger().getEffectiveLevel() <= 30) self.vol = CloudVolume(self.volume_path, bounded=False, fill_missing=self.fill_missing, progress=verbose, mip=self.mip, cache=False, green_threads=True) def __call__(self, output_bbox): chunk_slices = tuple( slice(s.start - m, s.stop + m) for s, m in zip(output_bbox.to_slices(), self.expand_margin_size)) if self.dry_run: input_bbox = Bbox.from_slices(chunk_slices) return Chunk.from_bbox(input_bbox) logging.info('cutout {} from {}'.format(chunk_slices[::-1], self.volume_path)) # always reverse the indexes since cloudvolume use x,y,z indexing chunk = self.vol[chunk_slices[::-1]] chunk = np.asarray(chunk) # the cutout is fortran ordered, so need to transpose and make it C order chunk = chunk.transpose() # we can delay this transpose later # actually we do not need to make it contiguous # chunk = np.ascontiguousarray(chunk) # if the channel number is 1, squeeze it as 3d array # this should not be neccessary # TODO: remove this step and use 4D array all over this package. # always use 4D array will simplify some operations voxel_offset = tuple(s.start for s in chunk_slices) if chunk.shape[0] == 1: chunk = np.squeeze(chunk, axis=0) else: voxel_offset = (0, ) + voxel_offset chunk = Chunk(chunk, voxel_offset=voxel_offset, voxel_size=tuple(self.vol.resolution[::-1])) if self.blackout_sections: chunk = self._blackout_sections(chunk) if self.validate_mip: self._validate_chunk(chunk) return chunk def _blackout_sections(self, chunk): """ make some sections black. this was normally used for the section with bad alignment. The ConvNet was supposed to handle them better with black image. TODO: make this function as a separate operator """ # current code only works with 3d image assert chunk.ndim == 3, "current code assumes that the chunk is 3D image." for z in self.blackout_section_ids: z0 = z - chunk.voxel_offset[0] if z0 >= 0 and z0 < chunk.shape[0]: chunk[z0, :, :] = 0 return chunk def _validate_chunk(self, chunk): """ check that all the input voxels was downloaded without black region We have found some black regions in previous inference run, so hopefully this will solve the problem. """ if chunk.ndim == 4 and chunk.shape[0] > 1: chunk = chunk[0, :, :, :] validate_vol = CloudVolume(self.volume_path, bounded=False, fill_missing=self.fill_missing, progress=False, mip=self.validate_mip, cache=False, green_threads=True) chunk_mip = self.mip logging.info('validate chunk in mip {}'.format(self.validate_mip)) assert self.validate_mip >= chunk_mip # only use the region corresponds to higher mip level # clamp the surrounding regions in XY plane # this assumes that the input dataset was downsampled starting from the # beginning offset in the info file voxel_offset = chunk.voxel_offset # factor3 follows xyz order in CloudVolume factor3 = np.array([ 2**(self.validate_mip - chunk_mip), 2 **(self.validate_mip - chunk_mip), 1 ], dtype=np.int32) clamped_offset = tuple(go + f - (go - vo) % f for go, vo, f in zip( voxel_offset[::-1], self.vol.voxel_offset, factor3)) clamped_stop = tuple( go + s - (go + s - vo) % f for go, s, vo, f in zip(voxel_offset[::-1], chunk.shape[::-1], vol.voxel_offset, factor3)) clamped_slices = tuple( slice(o, s) for o, s in zip(clamped_offset, clamped_stop)) clamped_bbox = Bbox.from_slices(clamped_slices) clamped_input = chunk.cutout(clamped_slices[::-1]) # transform to xyz order clamped_input = np.transpose(clamped_input) # get the corresponding bounding box for validation validate_bbox = self.vol.bbox_to_mip(clamped_bbox, mip=chunk_mip, to_mip=self.validate_mip) #validate_bbox = clamped_bbox // factor3 # downsample the input using avaraging # keep the z as it is since the mip only applies to xy plane # recursivly downsample the input # if we do it directly, the downsampled input will not be the same with the recursive one # because of the rounding error of integer division for _ in range(self.validate_mip - chunk_mip): clamped_input = downsample_with_averaging(clamped_input, (2, 2, 1)) # validation by template matching assert validate_by_template_matching(clamped_input) validate_input = validate_vol[validate_bbox.to_slices()] if validate_input.shape[3] == 1: validate_input = np.squeeze(validate_input, axis=3) # use the validate input to check the downloaded input assert np.alltrue(validate_input == clamped_input)
def uploadskeletons(skelsource, skelseglist, skelnamelist, path): """Upload skeleton (of cloudvolume class) to a local server. Parameters ---------- skelsource : List containing cloud volume skeletons skelseglist : List containing the segids(skid) skelnamelist : List containing the names of skeletons path : path to the local data server Returns ------- cv : cloudvolume class object """ info = { "@type": "neuroglancer_skeletons", "transform": skelsource[0].transform.flatten(), "vertex_attributes": [{ "id": "radius", "data_type": "float32", "num_components": 1 }], "scales": "um" } path = 'file://' + path + '/precomputed' cv = CloudVolume(path, info=info) # prepare for info file cv.skeleton.meta.info['@type'] = 'neuroglancer_skeletons' cv.skeleton.meta.info['transform'] = skelsource[0].transform.flatten() cv.skeleton.meta.info['vertex_attributes'] = [{ 'id': 'radius', 'data_type': 'float32', 'num_components': 1 }] del cv.skeleton.meta.info['sharding'] del cv.skeleton.meta.info['spatial_index'] cv.skeleton.meta.info['segment_properties'] = 'seg_props' cv.skeleton.meta.commit_info() files = [ os.path.join(cv.skeleton.meta.skeleton_path, str(skel.id)) for skel in skelsource ] for fileidx in range(len(files)): fullfilepath = files[fileidx] fullfilepath = os.path.join(cv.basepath, os.path.basename(path), fullfilepath) uploadskel = Skeleton(vertices=skelsource[fileidx].vertices, edges=skelsource[fileidx].edges) print(fullfilepath) with open(fullfilepath, 'wb') as f: f.write(uploadskel.to_precomputed()) segfilepath = os.path.join(cv.basepath, os.path.basename(path), cv.skeleton.meta.skeleton_path, 'seg_props') if not os.path.exists(segfilepath): os.makedirs(segfilepath) print('creating:', segfilepath) allsegproplist = [] for segid in skelseglist: segpropdict = {} segpropdict['id'] = segid segpropdict['type'] = 'label' segpropdict['values'] = skelnamelist allsegproplist.append(segpropdict) seginfo = { "@type": "neuroglancer_segment_properties", "inline": { "ids": skelseglist, "properties": allsegproplist } } segfile = os.path.join(segfilepath, 'info') with open(segfile, 'w') as segfile: json.dump(seginfo, segfile) return cv
def segment(args): """Run segmentation on contiguous block of affinities from CV Args: args: ArgParse object from main """ bbox_start = Vec(*args.bbox_start) bbox_size = Vec(*args.bbox_size) chunk_size = Vec(*args.chunk_size) bbox = Bbox(bbox_start, bbox_start + bbox_size) src_cv = CloudVolume(args.src_path, fill_missing=True, parallel=args.parallel) info = CloudVolume.create_new_info( num_channels=1, layer_type='segmentation', data_type='uint64', encoding='raw', resolution=src_cv.info['scales'][args.mip]['resolution'], voxel_offset=bbox_start, chunk_size=chunk_size, volume_size=bbox_size, mesh='mesh_mip_{}_err_{}'.format(args.mip, args.max_simplification_error)) dst_cv = CloudVolume(args.dst_path, info=info, parallel=args.parallel) dst_cv.provenance.description = 'ws+agg using waterz' dst_cv.provenance.processing.append({ 'method': { 'task': 'watershed+agglomeration', 'src_path': args.src_path, 'dst_path': args.dst_path, 'mip': args.mip, 'shape': bbox_size.tolist(), 'bounds': [ bbox.minpt.tolist(), bbox.maxpt.tolist(), ], }, 'by': args.owner, 'date': strftime('%Y-%m-%d%H:%M %Z'), }) dst_cv.provenance.owners = [args.owner] dst_cv.commit_info() dst_cv.commit_provenance() if args.segment: print('Downloading affinities') aff = src_cv[bbox.to_slices()] aff = np.transpose(aff, (3, 0, 1, 2)) aff = np.ascontiguousarray(aff, dtype=np.float32) thresholds = [args.threshold] print('Starting ws+agg') seg_gen = waterz.agglomerate(aff, thresholds) seg = next(seg_gen) print('Deleting affinities') del aff print('Uploading segmentation') dst_cv[bbox.to_slices()] = seg if args.mesh: print('Starting meshing') with LocalTaskQueue(parallel=args.parallel) as tq: tasks = tc.create_meshing_tasks( layer_path=args.dst_path, mip=args.mip, shape=args.chunk_size, simplification=True, max_simplification_error=args.max_simplification_error, progress=True) tq.insert_all(tasks) tasks = tc.create_mesh_manifest_tasks(layer_path=args.dst_path, magnitude=args.magnitude) tq.insert_all(tasks) print("Meshing complete")
app.logger.error('Error: {}'.format(e)) return {'error': str(e)} app.logger.debug('Locations queried: {}'.format(str(locs))) if locs.shape[0] > config.MaxLocations: err = { 'error': 'Max number of locations ({}) exceeded'.format(config.MaxLocations) } return make_response(jsonify(err), 400) try: seg_ids = process.get_multiple_ids(locs, vol, max_workers=config.MaxWorkers) except BaseException: tb = traceback.format_exc() app.logger.error('Error: {}'.format(tb)) return make_response(jsonify({'error': str(tb)}), 500) return jsonify(seg_ids.tolist()) started = datetime.datetime.now() vol = CloudVolume(**config.CloudVolumeKwargs) if __name__ == "__main__": app.run(host='0.0.0.0')
def create_skeleton_layer(s3_bucket, skel_res, img_dims, num_res=7): """Creates segmentation layer for skeletons Arguments: s3_bucket {str} -- path to precomputed skeleton destination skel_res {list} -- x,y,z dimensions of highest res voxel size (nm) img_dims {list} -- x,y,z voxel dimensions of tiff images Keyword Arguments: num_res {int} -- number of image resolutions to be downsampled Returns: vol {cloudvolume.CloudVolume} -- CloudVolume to upload skeletons to """ # create cloudvolume info info = CloudVolume.create_new_info( num_channels=1, layer_type="segmentation", data_type="uint64", # Channel images might be 'uint8' encoding="raw", # raw, jpeg, compressed_segmentation, fpzip, kempressed # Voxel scaling, units are in nanometers resolution=skel_res, voxel_offset=[0, 0, 0], # x,y,z offset in voxels from the origin # Pick a convenient size for your underlying chunk representation # Powers of two are recommended, doesn't need to cover image exactly chunk_size=[int(i / 4) for i in img_dims], # chunk_size=[128, 128, 64], # units are voxels volume_size=[i * 2**(num_res - 1) for i in img_dims], # units are voxels skeletons="skeletons", ) skel_info = { "@type": "neuroglancer_skeletons", "transform": [1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0], "vertex_attributes": [ { "id": "radius", "data_type": "float32", "num_components": 1 }, { "id": "vertex_types", "data_type": "float32", "num_components": 1 }, { "id": "vertex_color", "data_type": "float32", "num_components": 4 }, ], } # get cloudvolume info vol = CloudVolume(s3_bucket, info=info, parallel=True) [vol.add_scale((2**i, 2**i, 2**i)) for i in range(num_res)] # num_res - 1 vol.commit_info() # upload skeleton info to /skeletons/ dir with storage.SimpleStorage(vol.cloudpath) as stor: stor.put_json(str(Path("skeletons") / "info"), skel_info) return vol
def analyze_synapse( segmentation_vol, vc_vol, sj_vol, mask_vol, mask_mip, output_dir, chunk_size, overlap, offset, size): ''' segmentaion, vc, sj are by default at same mip level ''' vc_thresh = 5 sj_thresh = 5 chunk_size = np.array(chunk_size) overlap = np.array(overlap) if mpi_rank == 0: os.makedirs(output_dir, exist_ok=True) cv_args = dict(progress=False, parallel=False, fill_missing=True, bounded=False) seg_cv = CloudVolume('file://%s' % segmentation_vol, mip=0, **cv_args) vc_cv = CloudVolume('file://%s' % vc_vol, mip=0, **cv_args) sj_cv = CloudVolume('file://%s' % sj_vol, mip=0, **cv_args) mask_cv = CloudVolume('file://%s' % mask_vol, mip=mask_mip, **cv_args) if offset is None or size is None: union_bb = Bbox.intersection(seg_cv.meta.bounds(0), vc_cv.meta.bounds(0)) offset = union_bb.minpt size = union_bb.size3() offset = np.array(offset) size = np.array(size) print(offset, size) union_bb = Bbox(offset, offset + size) print(union_bb) bbs = get_chunk_bboxes(union_bb, chunk_size, overlap) print(len(bbs)) all_inds = np.arange(len(bbs)) np.random.shuffle(all_inds) sub_inds = np.array_split(all_inds, mpi_size) # sub_bbs = np.array_split(bbs, mpi_size) else: seg_cv = None vc_cv = None sj_cv = None mask_cv = None bbs = None sub_inds = None # sub_bbs = None seg_cv = mpi_comm.bcast(seg_cv, 0) vc_cv = mpi_comm.bcast(vc_cv, 0) sj_cv = mpi_comm.bcast(sj_cv, 0) mask_cv = mpi_comm.bcast(mask_cv, 0) # sub_bbs = mpi_comm.scatter(sub_bbs, 0) bbs = mpi_comm.bcast(bbs, 0) sub_inds = mpi_comm.scatter(sub_inds, 0) padding = overlap // 2 all_vc_dfs = [] all_syn_dfs = [] # for ind, bb in tqdm(enumerate(sub_bbs), total=len(sub_bbs), desc='iterate bbs'): for ind in tqdm(sub_inds, total=len(sub_inds), desc='iterate bbs'): bb = bbs[ind] bb = Bbox(bb.minpt + padding, bb.maxpt - padding) offset = bb.minpt seg_chunk = np.array(seg_cv[bb])[..., 0] vc_chunk = np.array(vc_cv[bb])[..., 0] sj_chunk = np.array(sj_cv[bb])[..., 0] mask_chunk = np.array(mask_cv[bb])[..., 0] if np.logical_or.reduce(seg_chunk.ravel()) == 0: continue vc_df, vc_labels = find_vc_fast( seg_chunk, mask_chunk, vc_chunk, offset, vc_thresh=vc_thresh, size_thresh=100) if vc_df is None: continue all_vc_dfs.append(vc_df) pre_synapse_df, sj_labels = find_sj( vc_df, seg_chunk, vc_labels, mask_chunk, sj_chunk, offset, pad=(3, 3, 2), border_thickness=(3, 3, 2), min_sj_size=60, max_neighbor_count=3) if pre_synapse_df is None: continue synapse_df, sj_psd_annos = find_post_syn(pre_synapse_df, seg_chunk, sj_labels, offset, rad=(20, 20, 3), max_angle=60.0, border_thickness=(3, 3, 2)) if len(synapse_df): cube_df_path = os.path.join(output_dir, 'synapse_%d_%d_%d.csv' % (offset[0], offset[1], offset[2])) synapse_df = synapse_df.set_index(['pre_seg_id', 'post_seg_id']) synapse_df.to_csv(cube_df_path) all_syn_dfs.append(synapse_df) mpi_comm.barrier() logging.warning('rank %d reached', mpi_rank) all_vc_dfs = mpi_comm.reduce(all_vc_dfs, MPI.SUM, 0) all_syn_dfs = mpi_comm.reduce(all_syn_dfs, MPI.SUM, 0) if mpi_rank == 0: all_vc_df = pd.concat(all_vc_dfs) vc_out_path = os.path.join(output_dir, 'vc.csv') all_vc_df.to_csv(vc_out_path) all_syn_df = pd.concat(all_syn_dfs) syn_out_path = os.path.join(output_dir, 'synapse.csv') all_syn_df.to_csv(syn_out_path)
import shutil import gzip import json from cloudvolume import CloudVolume, chunks, Storage, PrecomputedSkeleton from cloudvolume.storage import SimpleStorage from cloudvolume.lib import mkdir, Bbox, Vec from cloudvolume.skeletonservice import SkeletonDecodeError info = CloudVolume.create_new_info( num_channels=1, # Increase this number when we add more tests for RGB layer_type='segmentation', data_type='uint16', encoding='raw', resolution=[1,1,1], voxel_offset=(0,0,0), skeletons=True, volume_size=(100, 100, 100), chunk_size=(64, 64, 64), ) def test_skeletons(): # Skeleton of my initials # z=0: W ; z=1 S vertices = np.array([ [ 0, 1, 0 ], [ 1, 0, 0 ], [ 1, 1, 0 ],
def test_cache_validity(): image = np.zeros(shape=(128,128,128,1), dtype=np.uint8) dirpath = '/tmp/cloudvolume/caching-validity-' + str(TEST_NUMBER) layer_path = 'file://' + dirpath vol = create_volume_from_image( image=image, offset=(1,1,1), layer_path=layer_path, layer_type='image', resolution=(1,1,1), encoding='raw' ) vol.cache.enabled = True vol.cache.flush() vol.commit_info() def test_with_mock_cache_info(info, shoulderror): finfo = os.path.join(vol.cache.path, 'info') with open(finfo, 'w') as f: f.write(json.dumps(info)) if shoulderror: try: CloudVolume(vol.layer_cloudpath, cache=True) except ValueError: pass else: assert False else: CloudVolume(vol.layer_cloudpath, cache=True) test_with_mock_cache_info(vol.info, shoulderror=False) info = vol.info.copy() info['scales'][0]['size'][0] = 666 test_with_mock_cache_info(info, shoulderror=False) test_with_mock_cache_info({ 'zomg': 'wow' }, shoulderror=True) def tiny_change(key, val): info = vol.info.copy() info[key] = val test_with_mock_cache_info(info, shoulderror=True) tiny_change('type', 'zoolander') tiny_change('data_type', 'uint32') tiny_change('num_channels', 2) tiny_change('mesh', 'mesh') def scale_change(key, val, mip=0): info = vol.info.copy() info['scales'][mip][key] = val test_with_mock_cache_info(info, shoulderror=True) scale_change('voxel_offset', [ 1, 2, 3 ]) scale_change('resolution', [ 1, 2, 3 ]) scale_change('encoding', 'npz') vol.cache.flush() # Test no info file at all CloudVolume(vol.layer_cloudpath, cache=True) vol.cache.flush()
def myfunction(arg1): return arg1 import networkx as nx import numpy as np # pip install numpy tifffile cloud-volume import numpy as np import tifffile from cloudvolume import CloudVolume vol = CloudVolume("s3://open-neurodata/kasthuri/kasthuri11/image", mip=0, use_https=True) # load data into numpy array cutout = vol[11264:11776, 13312:13824, 912:928] # save cutout as TIFF tifffile.imwrite("data.tiff", data=np.transpose(cutout)) print(nx.info(graph)) graph2 = nx.Graph(graph) nx.average_clustering(graph2) #global efficiency ugraph = nx.to_undirected(graph) nx.global_efficiency(ugraph)
def test_cloud_access(): vol = CloudVolume('gs://seunglab-test/test_v0/image') vol = CloudVolume('s3://seunglab-test/test_dataset/image')
def test_boss_download(): vol = CloudVolume('gs://seunglab-test/test_v0/image') bossvol = CloudVolume('boss://automated_testing/test_v0/image') vimg = vol[:,:,:5] bimg = bossvol[:,:,:5] assert np.all(bimg == vimg) assert bimg.dtype == vimg.dtype vol.bounded = False vol.fill_missing = True bossvol.bounded = False bossvol.fill_missing = True assert np.all(vol[-100:100,-100:100,-10:10] == bossvol[-100:100,-100:100,-10:10]) # BOSS using a different algorithm for creating downsamples # so hard to compare 1:1 w/ pixels. bossvol.bounded = True bossvol.fill_missing = False bossvol.mip = 1 bimg = bossvol[:,:,5:6] assert np.any(bimg > 0)
def _serial_processing( self, seg_ids, ngl, num_verts, start_vert, include_neighborhood, write=False, batch_size=None, file_path=None, ): """Core code which actually extracts features.""" voxel_dict = {} counter = 0 batch_id = 0 for seg_id in seg_ids: if self.segment_url is None: segment = ngl.cv.skeleton.get(seg_id) else: cv_skel = CloudVolume(self.segment_url) segment = cv_skel.skeleton.get(seg_id) if num_verts is not None and num_verts <= len(segment.vertices): if num_verts <= len(segment.vertices): verts = segment.vertices[start_vert:num_verts] else: warnings.warn( UserWarning( f"Number of vertices {num_verts} greater than total vertices {len(segment.vertices)}. Defaulting to max len." ) ) verts = segment.vertices[start_vert:] else: verts = segment.vertices[start_vert:] start_vert = 0 for v_id, vertex in enumerate(verts): start = time.time() img, bounds, voxel = ngl.pull_voxel(seg_id, v_id, self.size) img_off = ngl.pull_bounds_img(bounds + self.offset) end = time.time() self.download_time += end - start start = time.time() features = self._convert_to_features(img) features_off = self._convert_to_features(img_off) end = time.time() self.conversion_time += end - start voxel_dict[counter] = { **{"Segment": int(seg_id), "Vertex": int(v_id), "Label": 1}, **features, } counter += 1 voxel_dict[counter] = { **{"Segment": int(seg_id), "Vertex": int(v_id), "Label": 0}, **features_off, } counter += 1 if write: if counter % batch_size == 0 or (counter + 1) % batch_size == 0: df = pd.DataFrame.from_dict(voxel_dict, "index") path = ( file_path + str(batch_id * batch_size) + "_" + str((batch_id + 1) * batch_size) + "_" + str(seg_id) + "_" + str(v_id) + ".feather" ) start = time.time() feather.write_dataframe(df, path) end = time.time() self.write_time += end - start voxel_dict = {} batch_id += 1 if file_path is None: if write: if not (counter % batch_size == 0 or (counter + 1) % batch_size == 0): df = pd.DataFrame.from_dict(voxel_dict, "index") path = ( file_path + str(batch_id * batch_size) + "_" + str(counter) + "_" + str(seg_id) + "_" + str(v_id) + ".feather" ) feather.write_dataframe(df, path) else: df = pd.DataFrame.from_dict(voxel_dict, "index") return df
def execute(self): vol = CloudVolume(self.cloudpath, self.mip, non_aligned_writes=self.non_aligned_writes) bounds = Bbox(self.offset, self.shape + self.offset) bounds = Bbox.clamp(bounds, vol.bounds) img = np.zeros(bounds.size3(), dtype=vol.dtype) + self.value vol[bounds] = img
def test_skeletons(): # Skeleton of my initials # z=0: W ; z=1 S vertices = np.array([ [ 0, 1, 0 ], [ 1, 0, 0 ], [ 1, 1, 0 ], [ 2, 0, 0 ], [ 2, 1, 0 ], [ 0, 0, 1 ], [ 1, 0, 1 ], [ 1, 1, 1 ], [ 0, 1, 1 ], [ 0, 2, 1 ], [ 1, 2, 1 ], ], np.float32) edges = np.array([ [0, 1], [1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [6, 7], [7, 8], [8, 9], [9, 10], [10, 11], ], dtype=np.uint32) radii = np.array([ 1.0, 2.5, 3.0, 4.1, 1.2, 5.6, 2000.123123, 15.33332221, 8128.124, -1, 1824.03 ], dtype=np.float32) vol = CloudVolume('file:///tmp/cloudvolume/test-skeletons', info=info) vol.skeleton.upload(segid=1, vertices=vertices, edges=edges, radii=radii) skel = vol.skeleton.get(1) assert skel.id == 1 assert np.all(skel.vertices == vertices) assert np.all(skel.edges == edges) assert np.all(skel.radii == radii) assert np.all(skel.vertex_types == 0) assert vol.skeleton.path == 'skeletons' with SimpleStorage('file:///tmp/cloudvolume/test-skeletons/') as stor: rawskel = stor.get_file('skeletons/1') assert len(rawskel) == 8 + 11 * (12 + 8 + 4 + 1) stor.delete_file('skeletons/1') try: vol.skeleton.get(5) assert False except SkeletonDecodeError: pass
def execute(self): # This could be made more sophisticated using exists vol = CloudVolume(self.cloudpath, self.mip, fill_missing=False) bounds = Bbox(self.offset, self.shape + self.offset) bounds = Bbox.clamp(bounds, vol.bounds) image = vol[bounds]
def large_local_to_cloud(data_path, cloud_path, begin=None, end=None, dtype=None, multi=False, z_step=64, layer_type=None, chunk_size=(64, 64, 64), resolution=None, scale=0): ''' when data is a tiffstack above RAM limit layer_type: 'image' or 'segmentation' resolution: tuple of 3 ''' if not begin and not end: S, L = check_stack_len(data_path) # start and length else: S, L = begin, end - begin print(S, L) first_slice = dxchange.read_tiff(data_path) X, Y = first_slice.shape #volume_size = (L,X,Y) volume_size = (X, Y, L) if not dtype: data_type = first_slice.dtype else: data_type = dtype data_generator = large_data_generator(data_path, S, S + L, z_step, data_type, multi) if not os.path.exists(cloud_path): os.makedirs(cloud_path) info = CloudVolume.create_new_info( 1, layer_type=layer_type, data_type=str(data_type), encoding='raw', chunk_size=chunk_size, resolution=list(resolution), voxel_offset=(0, 0, 0), volume_size=volume_size, ) info = build_pyramid_info(info, scale) if layer_type == 'segmentation': info['mesh'] = 'mesh' pprint(info) with open(os.path.join(cloud_path, 'info'), 'w') as f: json.dump(info, f) print('>>', L, z_step) for i, data in tqdm(data_generator, total=L // z_step): curr_z_start = i curr_z_step = z_step for j in range(0, scale): vol = CloudVolume('file://' + cloud_path, mip=j, compress='') # Basic Example x, y, z = vol.volume_size if j == 0 and i + curr_z_step >= z: curr_z_step = z - curr_z_start if j > 0: data = data[::2, ::2, ::2] data = data[0:x, 0:y, 0:z] vol[:, :, curr_z_start:curr_z_start + curr_z_step] = data[:, :, :curr_z_step] curr_z_start //= 2 curr_z_step //= 2 return
def remap_segmentation(cv, chunk_x, chunk_y, chunk_z, mip=2, overlap_vx=1, time_stamp=None, progress=False): ws_cv = CloudVolume(cv.meta.cloudpath, mip=mip, progress=progress, fill_missing=cv.fill_missing) mip_diff = mip - cv.meta.watershed_mip mip_chunk_size = np.array(cv.meta.graph_chunk_size, dtype=np.int) / np.array( [2**mip_diff, 2**mip_diff, 1]) mip_chunk_size = mip_chunk_size.astype(np.int) offset = Vec(chunk_x, chunk_y, chunk_z) * mip_chunk_size bbx = Bbox(offset, offset + mip_chunk_size + overlap_vx) if cv.meta.chunks_start_at_voxel_offset: bbx += ws_cv.voxel_offset bbx = Bbox.clamp(bbx, ws_cv.bounds) seg = ws_cv[bbx][..., 0] if not np.any(seg): return seg sv_remapping, unsafe_dict = get_lx_overlapping_remappings( cv, chunk_x, chunk_y, chunk_z, time_stamp=time_stamp, progress=progress) seg = fastremap.mask_except(seg, list(sv_remapping.keys()), in_place=True) fastremap.remap(seg, sv_remapping, preserve_missing_labels=True, in_place=True) for unsafe_root_id in tqdm(unsafe_dict.keys(), desc="Unsafe Relabel", disable=(not progress)): bin_seg = seg == unsafe_root_id if np.sum(bin_seg) == 0: continue l2_edges = [] cc_seg = cc3d.connected_components(bin_seg) for i_cc in range(1, np.max(cc_seg) + 1): bin_cc_seg = cc_seg == i_cc overlaps = [] overlaps.extend(np.unique(seg[-2, :, :][bin_cc_seg[-1, :, :]])) overlaps.extend(np.unique(seg[:, -2, :][bin_cc_seg[:, -1, :]])) overlaps.extend(np.unique(seg[:, :, -2][bin_cc_seg[:, :, -1]])) overlaps = np.unique(overlaps) linked_l2_ids = overlaps[np.in1d(overlaps, unsafe_dict[unsafe_root_id])] if len(linked_l2_ids) == 0: seg[bin_cc_seg] = 0 elif len(linked_l2_ids) == 1: seg[bin_cc_seg] = linked_l2_ids[0] else: seg[bin_cc_seg] = linked_l2_ids[0] for i_l2_id in range(len(linked_l2_ids) - 1): for j_l2_id in range(i_l2_id + 1, len(linked_l2_ids)): l2_edges.append( [linked_l2_ids[i_l2_id], linked_l2_ids[j_l2_id]]) if len(l2_edges) > 0: g = nx.Graph() g.add_edges_from(l2_edges) ccs = nx.connected_components(g) for cc in ccs: cc_ids = np.sort(list(cc)) seg[np.in1d(seg, cc_ids[1:]).reshape(seg.shape)] = cc_ids[0] return seg
def create_sharded_skeleton_merge_tasks(layer_path, dust_threshold, tick_threshold, shard_index_bytes=2**13, minishard_index_bytes=2**15, minishard_index_encoding='gzip', data_encoding='gzip', max_cable_length=None, spatial_index_db=None): cv = CloudVolume(layer_path, progress=True, spatial_index_db=spatial_index_db) cv.mip = cv.skeleton.meta.mip # 17 sec to download for pinky100 all_labels = cv.skeleton.spatial_index.query(cv.bounds * cv.resolution) (shard_bits, minishard_bits, preshift_bits) = \ compute_shard_params_for_hashed( num_labels=len(all_labels), shard_index_bytes=int(shard_index_bytes), minishard_index_bytes=int(minishard_index_bytes), ) spec = ShardingSpecification( type='neuroglancer_uint64_sharded_v1', preshift_bits=preshift_bits, hash='murmurhash3_x86_128', minishard_bits=minishard_bits, shard_bits=shard_bits, minishard_index_encoding=minishard_index_encoding, data_encoding=data_encoding, ) cv.skeleton.meta.info['sharding'] = spec.to_dict() cv.skeleton.meta.commit_info() # rebuild b/c sharding changes the skeleton source cv = CloudVolume(layer_path, progress=True, spatial_index_db=spatial_index_db) cv.mip = cv.skeleton.meta.mip # perf: ~36k hashes/sec shardfn = lambda lbl: cv.skeleton.reader.spec.compute_shard_location( lbl).shard_number shard_labels = defaultdict(list) for label in tqdm(all_labels, desc="Hashes"): shard_labels[shardfn(label)].append(label) cf = CloudFiles(cv.skeleton.meta.layerpath, progress=True) files = ((str(shardno) + '.labels', labels) for shardno, labels in shard_labels.items()) cf.put_jsons(files, compress="gzip", cache_control="no-cache", total=len(shard_labels)) cv.provenance.processing.append({ 'method': { 'task': 'ShardedSkeletonMergeTask', 'cloudpath': layer_path, 'mip': cv.skeleton.meta.mip, 'dust_threshold': dust_threshold, 'tick_threshold': tick_threshold, 'max_cable_length': max_cable_length, 'preshift_bits': preshift_bits, 'minishard_bits': minishard_bits, 'shard_bits': shard_bits, }, 'by': operator_contact(), 'date': strftime('%Y-%m-%d %H:%M %Z'), }) cv.commit_provenance() return [ ShardedSkeletonMergeTask(layer_path, shard_no, dust_threshold, tick_threshold, max_cable_length=max_cable_length) for shard_no in shard_labels.keys() ]
class NumpyToNeuroglancer(): viewer = None def __init__(self, animal, volume, scales, layer_type, data_type, num_channels=1, chunk_size=[256, 256, 128], offset=[0, 0, 0]): self.volume = volume self.scales = scales self.layer_type = layer_type self.data_type = data_type self.chunk_size = chunk_size self.precomputed_vol = None self.offset = offset self.starting_points = None self.animal = animal self.num_channels = num_channels def add_annotation_point(): ... def init_precomputed(self, path, volume_size, starting_points=None, progress_id=None): info = CloudVolume.create_new_info( num_channels=self.num_channels, layer_type=self.layer_type, # 'image' or 'segmentation' data_type=self.data_type, # encoding= 'raw', # other options: 'jpeg', 'compressed_segmentation' (req. uint32 or uint64) resolution=self.scales, # Size of X,Y,Z pixels in nanometers, voxel_offset=self.offset, # values X,Y,Z values in voxels chunk_size=self.chunk_size, # rechunk of image X,Y,Z in voxels volume_size=volume_size, # X,Y,Z size in voxels ) self.starting_points = starting_points self.progress_id = progress_id self.precomputed_vol = CloudVolume(f'file://{path}', mip=0, info=info, compress=True, progress=False) self.precomputed_vol.commit_info() self.precomputed_vol.commit_provenance() def init_volume(self, path): info = CloudVolume.create_new_info( num_channels=self.volume.shape[2] if len(self.volume.shape) > 2 else 1, layer_type=self.layer_type, data_type=self. data_type, # str(self.volume.dtype), # Channel images might be 'uint8' encoding= 'raw', # raw, jpeg, compressed_segmentation, fpzip, kempressed resolution=self.scales, # Voxel scaling, units are in nanometers voxel_offset=self.offset, # x,y,z offset in voxels from the origin chunk_size=self.chunk_size, # units are voxels volume_size=self.volume. shape[:3], # e.g. a cubic millimeter dataset ) self.precomputed_vol = CloudVolume(f'file://{path}', mip=0, info=info, compress=True, progress=False) self.precomputed_vol.commit_info() #self.precomputed_vol[:, :, :] = self.volume[:, :, :] def add_segment_properties(self, segment_properties): if self.precomputed_vol is None: raise NotImplementedError( 'You have to call init_precomputed before calling this function.' ) self.precomputed_vol.info['segment_properties'] = 'names' self.precomputed_vol.commit_info() segment_properties_path = os.path.join( self.precomputed_vol.layer_cloudpath.replace('file://', ''), 'names') os.makedirs(segment_properties_path, exist_ok=True) info = { "@type": "neuroglancer_segment_properties", "inline": { "ids": [str(number) for number, label in segment_properties], "properties": [{ "id": "label", "type": "label", "values": [str(label) for number, label in segment_properties] }] } } with open(os.path.join(segment_properties_path, 'info'), 'w') as file: json.dump(info, file, indent=2) def add_rechunking(self, outpath, downsample, chunks=None): if self.precomputed_vol is None: raise NotImplementedError( 'You have to call init_precomputed before calling this function.' ) cpus, _ = get_cpus() tq = LocalTaskQueue(parallel=cpus) outpath = f'file://{outpath}' if chunks is None: chunks = calculate_chunks(downsample, 0) tasks = tc.create_transfer_tasks(self.precomputed_vol.layer_cloudpath, dest_layer_path=outpath, chunk_size=chunks, skip_downsamples=True) tq.insert(tasks) tq.execute() def add_downsampled_volumes(self, chunk_size=[128, 128, 64], num_mips=4): if self.precomputed_vol is None: raise NotImplementedError( 'You have to call init_precomputed before calling this function.' ) _, cpus = get_cpus() tq = LocalTaskQueue(parallel=cpus) tasks = tc.create_downsampling_tasks( self.precomputed_vol.layer_cloudpath, preserve_chunk_size=False, num_mips=num_mips, chunk_size=chunk_size, compress=True) tq.insert(tasks) tq.execute() def add_segmentation_mesh(self, shape=[448, 448, 448], mip=0): if self.precomputed_vol is None: raise NotImplementedError( 'You have to call init_precomputed before calling this function.' ) _, cpus = get_cpus() tq = LocalTaskQueue(parallel=cpus) tasks = tc.create_meshing_tasks( self.precomputed_vol.layer_cloudpath, mip=mip, max_simplification_error=40, shape=shape, compress=True) # The first phase of creating mesh tq.insert(tasks) tq.execute() # It should be able to incoporated to above tasks, but it will give a weird bug. Don't know the reason tasks = tc.create_mesh_manifest_tasks( self.precomputed_vol.layer_cloudpath ) # The second phase of creating mesh tq.insert(tasks) tq.execute() def process_simple_slice(self, file_key): index, infile = file_key print(index, infile) try: image = Image.open(infile) except: print('Could not open', infile) width, height = image.size array = np.array(image, dtype=self.data_type, order='F') array = array.reshape((1, height, width)).T self.precomputed_vol[:, :, index] = array touchfile = os.path.join(self.progress_dir, os.path.basename(infile)) touch(touchfile) image.close() return def process_mesh(self, file_key): index, infile = file_key if os.path.exists( os.path.join(self.progress_dir, os.path.basename(infile))): print(f"Section {index} already processed, skipping ") return img = io.imread(infile) labels = [[v - 8, v - 1] for v in range(9, 256, 8)] arr = np.copy(img) for label in labels: mask = (arr >= label[0]) & (arr <= label[1]) arr[mask] = label[1] arr[arr > 248] = 255 img = arr.T del arr self.precomputed_vol[:, :, index] = img.reshape(img.shape[0], img.shape[1], 1) touchfile = os.path.join(self.progress_dir, os.path.basename(infile)) touch(touchfile) del img return def process_coronal_slice(self, file_key): index, infile = file_key if os.path.exists( os.path.join(self.progress_dir, os.path.basename(infile))): print(f"Slice {index} already processed, skipping ") return img = io.imread(infile) starty, endy, startx, endx = self.starting_points #img = np.rot90(img, 2) #img = np.flip(img) img = img[starty:endy, startx:endx] img = img.reshape(img.shape[0], img.shape[1], 1) #print(index, infile, img.shape, img.dtype, self.precomputed_vol.dtype, self.precomputed_vol.shape) self.precomputed_vol[:, :, index] = img touchfile = os.path.join(self.progress_dir, os.path.basename(infile)) touch(touchfile) del img return def process_image(self, file_key): index, infile = file_key basefile = os.path.basename(infile) #completed = file_processed(self.animal, self.progress_id, basefile) completed = False if completed: print(f"Section {index} already processed, skipping ") return img = io.imread(infile, img_num=0) img = img.reshape(self.num_channels, img.shape[0], img.shape[1]).T self.precomputed_vol[:, :, index] = img #set_file_completed(self.animal, self.progress_id, basefile) del img return def process_3channel(self, file_key): index, infile = file_key basefile = os.path.basename(infile) completed = file_processed(self.animal, self.progress_id, basefile) if completed: print(f"Section {index} already processed, skipping ") return img = io.imread(infile, img_num=0) img = img.reshape(img.shape[0], img.shape[1], 1, img.shape[2]) img = np.rot90(img, 1) img = np.flipud(img) self.precomputed_vol[:, :, index] = img set_file_completed(self.animal, self.progress_id, basefile) del img return def add_volume(self, volume, layer_name=None, clear_layer=False): if self.viewer is None: self.viewer = neuroglancer.Viewer() if layer_name is None: layer_name = f'{self.layer_type}_{self.scales}' source = neuroglancer.LocalVolume( data=volume, dimensions=neuroglancer.CoordinateSpace(names=['x', 'y', 'z'], units='nm', scales=self.scales), voxel_offset=self.offset) if self.layer_type == 'segmentation': layer = neuroglancer.SegmentationLayer(source=source) else: layer = neuroglancer.ImageLayer(source=source) with self.viewer.txn() as s: if clear_layer: s.layers.clear() s.layers[layer_name] = layer print(f'A new layer named {layer_name} is added to:') print(self.viewer) def preview(self, layer_name=None, clear_layer=False): self.add_volume(self.volume, layer_name=layer_name, clear_layer=clear_layer)
def create_skeletonizing_tasks(cloudpath, mip, shape=Vec(512, 512, 512), teasar_params={ 'scale': 10, 'const': 10 }, info=None, object_ids=None, mask_ids=None, fix_branching=True, fix_borders=True, fix_avocados=False, fill_holes=False, dust_threshold=1000, progress=False, parallel=1, fill_missing=False, sharded=False, spatial_index=True, synapses=None, num_synapses=None): """ Assign tasks with one voxel overlap in a regular grid to be densely skeletonized. The default shape (512,512,512) was designed to work within 6 GB of RAM on average at parallel=1 but can exceed this amount for certain objects such as glia. 4 GB is usually OK. When this run completes, you'll follow up with create_skeleton_merge_tasks to postprocess the generated fragments into single skeletons. WARNING: If you are processing hundreds of millions of labels or more and are using Cloud Storage this can get expensive ($8 per million labels typically accounting for fragment generation and postprocessing)! This scale is when the experimental sharded format generator becomes crucial to use. cloudpath: cloudvolume path mip: which mip level to skeletonize For a 4x4x40 dataset, mip 3 is good. Mip 4 starts introducing artifacts like snaking skeletons along the edge of thin objects. teasar_params: NOTE: see github.com/seung-lab/kimimaro for an updated list see https://github.com/seung-lab/kimimaro/wiki/Intuition-for-Setting-Parameters-const-and-scale for help with setting these parameters. NOTE: DBF = Distance from Boundary Field (i.e. euclidean distance transform) scale: float, multiply invalidation radius by distance from boundary const: float, add this physical distance to the invalidation radius soma_detection_threshold: if object has a DBF value larger than this, root will be placed at largest DBF value and special one time invalidation will be run over that root location (see soma_invalidation scale) expressed in chosen physical units (i.e. nm) pdrf_scale: scale factor in front of dbf, used to weight DBF over euclidean distance (higher to pay more attention to dbf) pdrf_exponent: exponent in dbf formula on distance from edge, faster if factor of 2 (default 16) soma_invalidation_scale: the 'scale' factor used in the one time soma root invalidation (default .5) soma_invalidation_const: the 'const' factor used in the one time soma root invalidation (default 0) (units in chosen physical units (i.e. nm)) info: supply your own info file object_ids: mask out all but these ids if specified mask_ids: mask out these ids if specified fix_branching: Trades speed for quality of branching at forks. You'll almost always want this set to True. fix_borders: Allows trivial merging of single overlap tasks. You'll only want to set this to false if you're working on single or non-overlapping volumes. dust_threshold: don't skeletonize labels smaller than this number of voxels as seen by a single task. progress: show a progress bar parallel: number of processes to deploy against a single task. parallelizes over labels, it won't speed up a single complex label. You can be slightly more memory efficient using a single big task with parallel than with seperate tasks that add up to the same volume. Unless you know what you're doing, stick with parallel=1 for cloud deployments. fill_missing: passthrough to CloudVolume, fill missing image tiles with zeros instead of throwing an error if True. sharded: (bool) if true, output a single mapbuffer dict containing all skeletons in a task, which will serve as input to a sharded format generator. You don't want this unless you know what you're doing. If False, generate a skeleton fragment file per a label for later agglomeration using the SkeletonMergeTask. spatial_index: (bool) Concurrently generate a json file that describes which labels were skeletonized in a given task. This makes it possible to query for skeletons by bounding box later on using CloudVolume. synapses: If provided, after skeletonization of a label is complete, draw additional paths to one of the nearest voxels to synapse centroids. (x,y,z) centroid is specified in physical coordinates. Iterable yielding ((x,y,z),segid,swc_label) num_synapses: If synapses is an iterator, you must provide the total number of synapses. """ shape = Vec(*shape) vol = CloudVolume(cloudpath, mip=mip, info=info) kdtree, labelsmap = None, None if synapses: centroids, kdtree, labelsmap = synapses_in_space(synapses, N=num_synapses) if not 'skeletons' in vol.info: vol.info['skeletons'] = 'skeletons_mip_{}'.format(mip) vol.commit_info() if spatial_index: if 'spatial_index' not in vol.skeleton.meta.info or not vol.skeleton.meta.info[ 'spatial_index']: vol.skeleton.meta.info['spatial_index'] = {} vol.skeleton.meta.info['@type'] = 'neuroglancer_skeletons' vol.skeleton.meta.info['spatial_index']['resolution'] = tuple( vol.resolution) vol.skeleton.meta.info['spatial_index']['chunk_size'] = tuple( shape * vol.resolution) vol.skeleton.meta.info['mip'] = int(mip) vol.skeleton.meta.info['vertex_attributes'] = vol.skeleton.meta.info[ 'vertex_attributes'][:1] vol.skeleton.meta.commit_info() will_postprocess = bool(np.any(vol.bounds.size3() > shape)) bounds = vol.bounds.clone() class SkeletonTaskIterator(FinelyDividedTaskIterator): def task(self, shape, offset): bbox_synapses = None if synapses: bbox_synapses = self.synapses_for_bbox(shape, offset) return SkeletonTask( cloudpath=cloudpath, shape=(shape + 1).clone(), # 1px overlap on the right hand side offset=offset.clone(), mip=mip, teasar_params=teasar_params, will_postprocess=will_postprocess, info=info, object_ids=object_ids, mask_ids=mask_ids, fix_branching=fix_branching, fix_borders=fix_borders, fix_avocados=fix_avocados, dust_threshold=dust_threshold, progress=progress, parallel=parallel, fill_missing=bool(fill_missing), sharded=bool(sharded), spatial_index=bool(spatial_index), spatial_grid_shape=shape.clone( ), # used for writing index filenames synapses=bbox_synapses, ) def synapses_for_bbox(self, shape, offset): """ Returns { seigd: [ ((x,y,z), swc_label), ... ] where x,y,z are in voxel coordinates with the origin set to the bottom left corner of this cutout. """ bbox = Bbox(offset, shape + offset) * vol.resolution center = bbox.center() diagonal = Vec(*((bbox.maxpt - center))) pts = [ centroids[i, :] for i in kdtree.query_ball_point(center, diagonal.length()) ] pts = [ tuple(Vec(*pt, dtype=int)) for pt in pts if bbox.contains(pt) ] synapses = defaultdict(list) for pt in pts: for label, swc_label in labelsmap[pt]: voxel_pt = Vec(*pt, dtype=np.float32) / vol.resolution - offset synapses[label].append( (tuple(voxel_pt.astype(int)), swc_label)) return synapses def on_finish(self): vol.provenance.processing.append({ 'method': { 'task': 'SkeletonTask', 'cloudpath': cloudpath, 'mip': mip, 'shape': shape.tolist(), 'dust_threshold': dust_threshold, 'teasar_params': teasar_params, 'object_ids': object_ids, 'mask_ids': mask_ids, 'will_postprocess': will_postprocess, 'fix_branching': fix_branching, 'fix_borders': fix_borders, 'fix_avocados': fix_avocados, 'progress': progress, 'parallel': parallel, 'fill_missing': bool(fill_missing), 'sharded': bool(sharded), 'spatial_index': bool(spatial_index), 'synapses': bool(synapses), }, 'by': operator_contact(), 'date': strftime('%Y-%m-%d %H:%M %Z'), }) vol.commit_provenance() return SkeletonTaskIterator(bounds, shape)
def uploadshardedskeletons(skelsource, skelseglist, skelnamelist, path): """Upload sharded skeletons to a local server. Parameters ---------- skelsource : List containing cloud volume skeletons skelseglist : List containing the segids(skid) skelnamelist : List containing the names of skeletons path : path to the local data server Returns ------- cv : cloudvolume class object """ info = { "@type": "neuroglancer_skeletons", "transform": skelsource[0].transform.flatten(), "vertex_attributes": [{ "id": "radius", "data_type": "float32", "num_components": 1 }], "scales": "um" } path = 'file://' + path + '/precomputed' cv = CloudVolume(path, info=info) # prepare for info file cv.skeleton.meta.info['@type'] = 'neuroglancer_skeletons' cv.skeleton.meta.info['transform'] = skelsource[0].transform.flatten() cv.skeleton.meta.info['vertex_attributes'] = [{ 'id': 'radius', 'data_type': 'float32', 'num_components': 1 }] # prepare sharding info spec = ShardingSpecification( 'neuroglancer_uint64_sharded_v1', preshift_bits=9, hash='murmurhash3_x86_128', minishard_bits=6, shard_bits=15, minishard_index_encoding='raw', data_encoding='raw', ) cv.skeleton.meta.info['sharding'] = spec.to_dict() cv.skeleton.meta.info['segment_properties'] = 'seg_props' cv.skeleton.meta.commit_info() precomputedskels = {} for skelidx in range(len(skelsource)): skelid = int(skelsource[skelidx].id) skel = Skeleton(skelsource[skelidx].vertices, edges=skelsource[skelidx].edges, segid=skelid, extra_attributes=[{ "id": "radius", "data_type": "float32", "num_components": 1, }]).physical_space() precomputedskels[skelid] = skel.to_precomputed() shardfiles = spec.synthesize_shards(precomputedskels) shardedfilepath = os.path.join(cv.basepath, os.path.basename(path), cv.skeleton.meta.skeleton_path) for fname in shardfiles.keys(): with open(shardedfilepath + '/' + fname, 'wb') as f: f.write(shardfiles[fname]) segfilepath = os.path.join(cv.basepath, os.path.basename(path), cv.skeleton.meta.skeleton_path, 'seg_props') if not os.path.exists(segfilepath): os.makedirs(segfilepath) print('creating:', segfilepath) allsegproplist = [] for segid in skelseglist: segpropdict = {} segpropdict['id'] = segid segpropdict['type'] = 'label' segpropdict['values'] = skelnamelist allsegproplist.append(segpropdict) seginfo = { "@type": "neuroglancer_segment_properties", "inline": { "ids": skelseglist, "properties": allsegproplist } } segfile = os.path.join(segfilepath, 'info') with open(segfile, 'w') as segfile: json.dump(seginfo, segfile) return cv
sliceArgs.append(slice(startSlice, startSlice + 1)) else: sliceArgs.append(slice(None)) sliceArgs = tuple(sliceArgs) return arr[sliceArgs] def generateThumbnailsVolume(volume, x_out, y_out, z_out, isColor=False): sliceX = obtainCenterSlice(volume, 0, isColor) sliceY = obtainCenterSlice(volume, 1, isColor) sliceZ = obtainCenterSlice(volume, 2, isColor) generateThumbnailsSlices(sliceX[0], sliceY[:, 0], sliceZ[:, :, 0], x_out, y_out, z_out) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("input_path") parser.add_argument("x_out") parser.add_argument("y_out") parser.add_argument("z_out") p = parser.parse_args() volume = CloudVolume( urlPrefix + str(p.input_path), progress=True, fill_missing=True ) #TODO: fill_missing = True? Or allow thumbnails to crash? #We should probably reduce the thumbnail dimensions rather than fill_missing. generateThumbnailsVolume(volume, p.x_out, p.y_out, p.z_out)
def execute(self): vol = CloudVolume( self.cloudpath, mip=self.mip, info=self.info, cdn_cache=False, parallel=self.parallel, fill_missing=self.fill_missing, ) bbox = Bbox.clamp(self.bounds, vol.bounds) index_bbox = Bbox.clamp(self.index_bounds, vol.bounds) path = skeldir(self.cloudpath) path = os.path.join(self.cloudpath, path) all_labels = vol[bbox.to_slices()] all_labels = all_labels[:, :, :, 0] if self.mask_ids: all_labels = fastremap.mask(all_labels, self.mask_ids) extra_targets_after = {} if self.synapses: extra_targets_after = kimimaro.synapses_to_targets( all_labels, self.synapses) skeletons = kimimaro.skeletonize( all_labels, self.teasar_params, object_ids=self.object_ids, anisotropy=vol.resolution, dust_threshold=self.dust_threshold, progress=self.progress, fix_branching=self.fix_branching, fix_borders=self.fix_borders, fix_avocados=self.fix_avocados, fill_holes=self.fill_holes, parallel=self.parallel, extra_targets_after=extra_targets_after.keys(), ) for segid, skel in skeletons.items(): skel.vertices[:] += bbox.minpt * vol.resolution if self.synapses: for segid, skel in skeletons.items(): terminal_nodes = skel.vertices[skel.terminals()] for i, vert in enumerate(terminal_nodes): vert = vert / vol.resolution - self.bounds.minpt vert = tuple(np.round(vert).astype(int)) if vert in extra_targets_after.keys(): skel.vertex_types[i] = extra_targets_after[vert] # neuroglancer doesn't support int attributes strip_integer_attributes(skeletons.values()) if self.sharded: self.upload_batch(vol, path, index_bbox, skeletons) else: self.upload_individuals(vol, path, bbox, skeletons) if self.spatial_index: self.upload_spatial_index(vol, path, index_bbox, skeletons)
def transfer_to(self, cloudpath, bbox, mip, block_size=None, compress=True, compress_level=None): """ Transfer files from one storage location to another, bypassing volume painting. This enables using a single CloudVolume instance to transfer big volumes. In some cases, gsutil or aws s3 cli tools may be more appropriate. This method is provided for convenience. It may be optimized for better performance over time as demand requires. cloudpath (str): path to storage layer bbox (Bbox object): ROI to transfer mip (int): resolution level block_size (int): number of file chunks to transfer per I/O batch. compress (bool): Set to False to upload as uncompressed """ from cloudvolume import CloudVolume if mip is None: mip = self.config.mip bbox = Bbox.create(bbox, self.meta.bounds(mip)) realized_bbox = bbox.expand_to_chunk_size( self.meta.chunk_size(mip), offset=self.meta.voxel_offset(mip)) realized_bbox = Bbox.clamp(realized_bbox, self.meta.bounds(mip)) if bbox != realized_bbox: raise exceptions.AlignmentError( "Unable to transfer non-chunk aligned bounding boxes. Requested: {}, Realized: {}" .format(bbox, realized_bbox)) default_block_size_MB = 50 # MB chunk_MB = self.meta.chunk_size(mip).rectVolume() * np.dtype( self.meta.dtype).itemsize * self.meta.num_channels if self.meta.layer_type == 'image': # kind of an average guess for some EM datasets, have seen up to 1.9x and as low as 1.1 # affinites are also images, but have very different compression ratios. e.g. 3x for kempressed chunk_MB /= 1.3 else: # segmentation chunk_MB /= 100.0 # compression ratios between 80 and 800.... chunk_MB /= 1024.0 * 1024.0 if block_size: step = block_size else: step = int(default_block_size_MB // chunk_MB) + 1 try: destvol = CloudVolume(cloudpath, mip=mip) except exceptions.InfoUnavailableError: destvol = CloudVolume(cloudpath, mip=mip, info=self.meta.info, provenance=self.meta.provenance.serialize()) destvol.commit_info() destvol.commit_provenance() except exceptions.ScaleUnavailableError: destvol = CloudVolume(cloudpath) for i in range(len(destvol.scales) + 1, len(self.meta.scales)): destvol.scales.append(self.meta.scales[i]) destvol.commit_info() destvol.commit_provenance() num_blocks = np.ceil( self.meta.bounds(mip).volume() / self.meta.chunk_size(mip).rectVolume()) / step num_blocks = int(np.ceil(num_blocks)) cloudpaths = chunknames(bbox, self.meta.bounds(mip), self.meta.key(mip), self.meta.chunk_size(mip), protocol=self.meta.path.protocol) pbar = tqdm( desc='Transferring Blocks of {} Chunks'.format(step), unit='blocks', disable=(not self.config.progress), total=num_blocks, ) with pbar: with Storage(self.meta.cloudpath) as src_stor: with Storage(cloudpath) as dest_stor: for _ in range(num_blocks, 0, -1): srcpaths = list(itertools.islice(cloudpaths, step)) files = src_stor.get_files(srcpaths) files = [(f['filename'], f['content']) for f in files] dest_stor.put_files( files=files, compress=compress, compress_level=compress_level, content_type=tx.content_type(destvol), ) pbar.update()
def _validate_chunk(self, chunk): """ check that all the input voxels was downloaded without black region We have found some black regions in previous inference run, so hopefully this will solve the problem. """ if chunk.ndim == 4 and chunk.shape[0] > 1: chunk = chunk[0, :, :, :] validate_vol = CloudVolume(self.volume_path, bounded=False, fill_missing=self.fill_missing, progress=False, mip=self.validate_mip, cache=False, green_threads=True) chunk_mip = self.mip logging.info('validate chunk in mip {}'.format(self.validate_mip)) assert self.validate_mip >= chunk_mip # only use the region corresponds to higher mip level # clamp the surrounding regions in XY plane # this assumes that the input dataset was downsampled starting from the # beginning offset in the info file voxel_offset = chunk.voxel_offset # factor3 follows xyz order in CloudVolume factor3 = np.array([ 2**(self.validate_mip - chunk_mip), 2 **(self.validate_mip - chunk_mip), 1 ], dtype=np.int32) clamped_offset = tuple(go + f - (go - vo) % f for go, vo, f in zip( voxel_offset[::-1], self.vol.voxel_offset, factor3)) clamped_stop = tuple( go + s - (go + s - vo) % f for go, s, vo, f in zip(voxel_offset[::-1], chunk.shape[::-1], vol.voxel_offset, factor3)) clamped_slices = tuple( slice(o, s) for o, s in zip(clamped_offset, clamped_stop)) clamped_bbox = Bbox.from_slices(clamped_slices) clamped_input = chunk.cutout(clamped_slices[::-1]) # transform to xyz order clamped_input = np.transpose(clamped_input) # get the corresponding bounding box for validation validate_bbox = self.vol.bbox_to_mip(clamped_bbox, mip=chunk_mip, to_mip=self.validate_mip) #validate_bbox = clamped_bbox // factor3 # downsample the input using avaraging # keep the z as it is since the mip only applies to xy plane # recursivly downsample the input # if we do it directly, the downsampled input will not be the same with the recursive one # because of the rounding error of integer division for _ in range(self.validate_mip - chunk_mip): clamped_input = downsample_with_averaging(clamped_input, (2, 2, 1)) # validation by template matching assert validate_by_template_matching(clamped_input) validate_input = validate_vol[validate_bbox.to_slices()] if validate_input.shape[3] == 1: validate_input = np.squeeze(validate_input, axis=3) # use the validate input to check the downloaded input assert np.alltrue(validate_input == clamped_input)
def _upload_output(self): vol = CloudVolume(self.output_layer_path, compress='gzip', fill_missing=True, bounded=False, autocrop=True, mip=self.image_mip, progress=True) output_slices = self.output_bounds.to_slices() self.output = np.transpose(self.output) vol[output_slices[::-1]+(slice(0,self.output.shape[-1]),)] = self.output
def __init__(self, output_path: str, output_format: str, mip: int = None, voxel_size: tuple = None, simplification_factor: int = 100, max_simplification_error: int = 8, dust_threshold: int = None, ids: set = None, manifest: bool = False, name: str = 'meshing', verbose: bool = True): """ Parameters ------------ output_path: path to store mesh files output_format: format of output {'ply', 'obj', 'precomputed'} voxel_size: size of voxels simplification_factor: mesh simplification factor. max_simplification_error: maximum tolerance error of meshing. dust_threshold: do not mesh tiny objects with voxel number less than threshold ids: only mesh the selected segmentation ids, other segments will not be meshed. manifest: create manifest files or not. This should not be True if you are only doing meshing for a segmentation chunk. name: operator name. verbose: print out informations or not. Note that some functions are adopted from igneous. """ super().__init__(name=name, verbose=verbose) self.simplification_factor = simplification_factor self.max_simplification_error = max_simplification_error # zmesh use fortran order, translate zyx to xyz self.output_path = output_path self.output_format = output_format self.dust_threshold = dust_threshold self.ids = ids self.manifest = manifest if manifest: assert output_format == 'precomputed' mesh_path = output_path if output_format == 'precomputed': # adjust the mesh path according to info vol = CloudVolume(self.output_path, mip) info = vol.info if 'mesh' not in info: # add mesh to info and update it info['mesh'] = 'mesh_err_{}'.format(max_simplification_error) vol.info = info vol.commit_info() mesh_path = os.path.join(output_path, info['mesh']) self.voxel_size = vol.resolution[::-1] self.mesher = Mesher(vol.resolution) else: self.mesher = Mesher(voxel_size[::-1]) self.storage = Storage(mesh_path)
def merge_graph(seg_map, merge_output, global_merge_dict, gid=None): resolution = list(seg_map.values())[0]['resolution'] chunk_size = list(seg_map.values())[0]['chunk_size'] bbox_list = [v['bbox'] for v in seg_map.values()] minpt = np.min(np.stack([np.array(b.minpt) for b in bbox_list], 0), axis=0) maxpt = np.max(np.stack([np.array(b.maxpt) for b in bbox_list], 0), axis=0) union_offset = minpt union_size = maxpt - minpt union_bbox = Bbox(minpt, maxpt) #print(union_bbox) # create new canvas cv_merge_path = '%s/precomputed-%d_%d_%d_%d_%d_%d/' % ( merge_output, union_offset[0], union_offset[1], union_offset[2], union_size[0], union_size[1], union_size[2]) #print(cv_merge_path) cv_merge = prepare_precomputed(cv_merge_path, offset=union_offset, size=union_size, resolution=resolution, chunk_size=chunk_size) #print(cv_merge.shape) # Pre paint the cv with 0 cv_merge[union_bbox] = np.zeros((union_size), dtype=np.uint32) cv_args = dict(bounded=True, fill_missing=True, autocrop=False, cache=False, compress_cache=None, cdn_cache=False, progress=False, provenance=None, compress=True, non_aligned_writes=True, parallel=False) #val_dict = dict() #print('>>>>rank: %d, map_keys %s' % (mpi_rank, str(seg_map.keys()))) pbar = tqdm(seg_map.items(), desc='merging') for seg_key, seg in pbar: bb = seg['bbox'] cv = CloudVolume('file://' + seg['output'], mip=0, **cv_args) # val = cv.download_to_shared_memory(np.s_[:], str(i)) val = cv[...] # print('keys: %s <-> %s' % (seg_key, global_merge_dict.keys())) if seg_key in global_merge_dict: val = perform_remap(val, global_merge_dict[seg_key]) #logging.error('rank %d val_shape: %s, bbox %s', mpi_rank, val.shape, bb) #val_dict[bb] = val curr_val = cv_merge[bb][:] non_zeros = curr_val != 0 val[non_zeros] = curr_val[non_zeros] cv_merge[bb] = val return { gid: dict( bbox=union_bbox, output=cv_merge_path, resolution=resolution, chunk_size=chunk_size, ) }