def execute(self): # cache is necessary for local computation, but on GCE download is very fast # so cache isn't necessary. cv = CloudVolume(self.cloudpath, cache=False, progress=self.progress) labels = self.labels_for_shard(cv) locations = self.locations_for_labels(labels, cv) skeletons = self.process_skeletons(locations, cv) if len(skeletons) == 0: return shard_files = synthesize_shard_files(cv.skeleton.reader.spec, skeletons) if len(shard_files) != 1: raise ValueError( "Only one shard file should be generated per task. Expected: {} Got: {} " .format(str(self.shard_no), ", ".join(shard_files.keys()))) uploadable = [(fname, data) for fname, data in shard_files.items()] with Storage(cv.skeleton.meta.layerpath, progress=self.progress) as stor: stor.put_files( files=uploadable, compress=False, content_type='application/octet-stream', cache_control='no-cache', )
def MultiResShardedMeshMergeTask(cloudpath: str, shard_no: str, draco_compression_level: int = 1, mesh_dir: Optional[str] = None, num_lod: int = 1, spatial_index_db: Optional[str] = None, progress: bool = False): cv = CloudVolume(cloudpath, spatial_index_db=spatial_index_db) cv.mip = cv.mesh.meta.mip if mesh_dir is None and 'mesh' in cv.info: mesh_dir = cv.info['mesh'] # This looks messy because we are trying to avoid retaining # unnecessary memory. In the original skeleton iteration, this was # using 50 GB+ memory on minnie65. So it makes sense to be just # as careful with a heavier type of object. locations = locations_for_labels(cv, labels_for_shard(cv, shard_no)) filenames = set(itertools.chain(*locations.values())) labels = set(locations.keys()) del locations meshes = collect_mesh_fragments(cv, labels, filenames, mesh_dir, progress) del labels del filenames meshes = { label: process_mesh(cv, label, mesh_frags, num_lod, draco_compression_level) for label, mesh_frags in tqdm(meshes.items(), disable=(not progress)) } data_offset = { label: len(manifest) for label, (manifest, mesh) in meshes.items() } meshes = { label: mesh + manifest.to_binary() for label, (manifest, mesh) in meshes.items() } if len(meshes) == 0: return shard_files = synthesize_shard_files(cv.mesh.reader.spec, meshes, data_offset) del meshes del data_offset if len(shard_files) != 1: raise ValueError("Only one shard file should be generated per task. " "Expected: {} Got: {} ".format( str(shard_no), ", ".join(shard_files.keys()))) cf = CloudFiles(cv.mesh.meta.layerpath, progress=progress) cf.puts( ((fname, data) for fname, data in shard_files.items()), compress=False, content_type='application/octet-stream', cache_control='no-cache', )
def ShardedFromUnshardedSkeletonMergeTask( src: str, dest: str, shard_no: str, cache_control: bool = False, skel_dir: Optional[str] = None, progress: bool = False, ): cv_src = CloudVolume(src) if skel_dir is None and 'skeletons' in cv.info: skel_dir = cv.info['skeletons'] cv_dest = CloudVolume(dest, skel_dir=skel_dir, progress=progress) labels = labels_for_shard(cv_dest, shard_no, progress) skeletons = cv_src.skeleton.get(labels) del labels if len(skeletons) == 0: return skeletons = strip_integer_attributes(skeletons) skeletons = {skel.id: skel.to_precomputed() for skel in skeletons} shard_files = synthesize_shard_files(cv_dest.skeleton.reader.spec, skeletons) if len(shard_files) != 1: raise ValueError( "Only one shard file should be generated per task. Expected: {} Got: {} " .format(str(shard_no), ", ".join(shard_files.keys()))) cf = CloudFiles(cv_dest.skeleton.meta.layerpath, progress=progress) cf.puts( ((fname, data) for fname, data in shard_files.items()), compress=False, content_type='application/octet-stream', cache_control='no-cache', )
def execute(self): # cache is necessary for local computation, but on GCE download is very fast # so cache isn't necessary. cv = CloudVolume(self.cloudpath, progress=self.progress, spatial_index_db=self.spatial_index_db) # This looks messy because we are trying to avoid retaining # unnecessary memory. In the original iteration, this was # using 50 GB+ memory on minnie65. With changes to this # and the spatial_index, we are getting it down to something reasonable. locations = self.locations_for_labels( labels_for_shard(cv, self.shard_no, self.progress), cv) filenames = set(itertools.chain(*locations.values())) labels = set(locations.keys()) del locations skeletons = self.get_unfused(labels, filenames, cv) del labels del filenames skeletons = self.process_skeletons(skeletons, in_place=True) if len(skeletons) == 0: return shard_files = synthesize_shard_files(cv.skeleton.reader.spec, skeletons) if len(shard_files) != 1: raise ValueError( "Only one shard file should be generated per task. Expected: {} Got: {} " .format(str(self.shard_no), ", ".join(shard_files.keys()))) cf = CloudFiles(cv.skeleton.meta.layerpath, progress=self.progress) cf.puts( ((fname, data) for fname, data in shard_files.items()), compress=False, content_type='application/octet-stream', cache_control='no-cache', )
def create_mesh_shard( cv:CloudVolume, meshes:dict, num_lod:int, draco_compression_level:int, progress:bool, shard_no:str ): meshes = { label: process_mesh( cv, label, mesh_frags, num_lod, draco_compression_level ) for label, mesh_frags in tqdm(meshes.items(), disable=(not progress)) } data_offset = { label: len(manifest) for label, (manifest, mesh) in meshes.items() } meshes = { label: mesh + manifest.to_binary() for label, (manifest, mesh) in meshes.items() } if len(meshes) == 0: return None, None shard_files = synthesize_shard_files( cv.mesh.reader.spec, meshes, data_offset ) if len(shard_files) != 1: raise ValueError( "Only one shard file should be generated per task. " "Expected: {} Got: {} ".format( str(shard_no), ", ".join(shard_files.keys()) )) filename = first(shard_files.keys()) return filename, shard_files[filename]
pool = pathos.pools.ProcessPool(parallel) for skel in pool.uimap(complex_merge, skeletons.values()): merged_skeletons[skel.id] = skel.to_precomputed() pbar.update(1) pool.close() pool.join() pool.clear() return merged_skeletons if has_checkpoint('complex-merge'): merged_skeletons = load_checkpoint('complex-merge') else: skeletons = checkpoint('simple-merge', load_raw_skeletons) postprocessfn = lambda: postprocess(skeletons) merged_skeletons = checkpoint('complex-merge', postprocessfn) del skeletons del postprocessfn shard_files = synthesize_shard_files(spec, merged_skeletons, progress=True) uploadable = [(fname, data) for fname, data in shard_files.items()] with Storage(cv.skeleton.meta.layerpath) as stor: stor.put_files( files=uploadable, compress=False, content_type='application/octet-stream', cache_control='no-cache', )