def test_downsample_with_offset(): delete_layer() storage, data = create_layer(size=(512,512,128,1), offset=(3,7,11)) cv = CloudVolume(storage.layer_path) assert len(cv.scales) == 4 assert len(cv.available_mips) == 4 cv.commit_info() create_downsampling_tasks(MockTaskQueue(), storage.layer_path, mip=0, num_mips=3) cv.refresh_info() assert len(cv.available_mips) == 4 assert np.array_equal(cv.mip_volume_size(0), [ 512, 512, 128 ]) assert np.array_equal(cv.mip_volume_size(1), [ 256, 256, 128 ]) assert np.array_equal(cv.mip_volume_size(2), [ 128, 128, 128 ]) assert np.array_equal(cv.mip_volume_size(3), [ 64, 64, 128 ]) assert np.all(cv.mip_voxel_offset(3) == (0,0,11)) cv.mip = 0 assert np.all(cv[3:67, 7:71, 11:75] == data[0:64, 0:64, 0:64]) data_ds1 = downsample.downsample_with_averaging(data, factor=[2, 2, 1, 1]) cv.mip = 1 assert np.all(cv[1:33, 3:35, 11:75] == data_ds1[0:32, 0:32, 0:64]) data_ds2 = downsample.downsample_with_averaging(data_ds1, factor=[2, 2, 1, 1]) cv.mip = 2 assert np.all(cv[0:16, 1:17, 11:75] == data_ds2[0:16, 0:16, 0:64]) data_ds3 = downsample.downsample_with_averaging(data_ds2, factor=[2, 2, 1, 1]) cv.mip = 3 assert np.all(cv[0:8, 0:8, 11:75] == data_ds3[0:8,0:8,0:64])
def test_downsample_w_missing(): delete_layer() storage, data = create_layer(size=(512,512,128,1), offset=(3,7,11)) cv = CloudVolume(storage.layer_path) assert len(cv.scales) == 4 assert len(cv.available_mips) == 4 delete_layer() cv.commit_info() try: create_downsampling_tasks(MockTaskQueue(), storage.layer_path, mip=0, num_mips=3, fill_missing=False) except EmptyVolumeException: pass create_downsampling_tasks(MockTaskQueue(), storage.layer_path, mip=0, num_mips=3, fill_missing=True) cv.refresh_info() assert len(cv.available_mips) == 4 assert np.array_equal(cv.mip_volume_size(0), [ 512, 512, 128 ]) assert np.array_equal(cv.mip_volume_size(1), [ 256, 256, 128 ]) assert np.array_equal(cv.mip_volume_size(2), [ 128, 128, 128 ]) assert np.array_equal(cv.mip_volume_size(3), [ 64, 64, 128 ]) assert np.all(cv.mip_voxel_offset(3) == (0,0,11)) cv.mip = 0 cv.fill_missing = True assert np.count_nonzero(cv[3:67, 7:71, 11:75]) == 0
def make_downsample_tasks(vol, mip_start=0, num_mips=3): """ ---PURPOSE--- Make downsamples of the precomputed data ---INPUT--- vol The cloudvolume.Cloudvolume() object mip_start The mip level to start at with the downsamples num_mips The number of mip levels to create, starting from mip_start """ # cloudpath = 'file:///home/ahoag/ngdemo/demo_bucket/m61467_demons_20190702/190821_647' cloudpath = vol.cloudpath # with LocalTaskQueue(parallel=8) as tq: tasks = tc.create_downsampling_tasks( cloudpath, mip= mip_start, # Start downsampling from this mip level (writes to next level up) fill_missing=False, # Ignore missing chunks and fill them with black axis='z', num_mips= num_mips, # number of downsamples to produce. Downloaded shape is chunk_size * 2^num_mip chunk_size=[ 128, 128, 64 ], # manually set chunk size of next scales, overrides preserve_chunk_size preserve_chunk_size= True, # use existing chunk size, don't halve to get more downsamples ) return tasks
def test_downsample_higher_mip(): delete_layer() storage, data = create_layer(size=(512,512,64,1), offset=(3,7,11)) cv = CloudVolume(storage.layer_path) cv.info['scales'] = cv.info['scales'][:1] cv.commit_info() create_downsampling_tasks(MockTaskQueue(), storage.layer_path, mip=0, num_mips=2) cv.refresh_info() assert len(cv.available_mips) == 3 create_downsampling_tasks(MockTaskQueue(), storage.layer_path, mip=1, num_mips=2) cv.refresh_info() assert len(cv.available_mips) == 4 cv.mip = 3 assert cv[:,:,:].shape == (64,64,64,1)
def downsample(opt): gs_path = opt.gs_output # Downsample if opt.downsample: with LocalTaskQueue(parallel=opt.parallel) as tq: tasks = tc.create_downsampling_tasks(gs_path, mip=0, fill_missing=True) tq.insert_all(tasks)
def test_downsample_higher_mip(): delete_layer() cf, data = create_layer(size=(512,512,64,1), offset=(3,7,11)) cv = CloudVolume(cf.cloudpath) cv.info['scales'] = cv.info['scales'][:1] tq = MockTaskQueue() cv.commit_info() tasks = create_downsampling_tasks(cf.cloudpath, mip=0, num_mips=2) tq.insert_all(tasks) cv.refresh_info() assert len(cv.available_mips) == 3 tasks = create_downsampling_tasks(cf.cloudpath, mip=1, num_mips=2) tq.insert_all(tasks) cv.refresh_info() assert len(cv.available_mips) == 4 cv.mip = 3 assert cv[:,:,:].shape == (64,64,64,1)
def test_downsample_no_offset(): delete_layer() storage, data = create_layer(size=(1024,1024,128,1), offset=(0,0,0)) cv = CloudVolume(storage.layer_path) assert len(cv.scales) == 5 assert len(cv.available_mips) == 5 cv.commit_info() create_downsampling_tasks(MockTaskQueue(), storage.layer_path, mip=0, num_mips=4) cv.refresh_info() assert len(cv.available_mips) == 5 assert np.array_equal(cv.mip_volume_size(0), [ 1024, 1024, 128 ]) assert np.array_equal(cv.mip_volume_size(1), [ 512, 512, 128 ]) assert np.array_equal(cv.mip_volume_size(2), [ 256, 256, 128 ]) assert np.array_equal(cv.mip_volume_size(3), [ 128, 128, 128 ]) assert np.array_equal(cv.mip_volume_size(4), [ 64, 64, 128 ]) slice64 = np.s_[0:64, 0:64, 0:64] cv.mip = 0 assert np.all(cv[slice64] == data[slice64]) data_ds1 = downsample.downsample_with_averaging(data, factor=[2, 2, 1, 1]) cv.mip = 1 assert np.all(cv[slice64] == data_ds1[slice64]) data_ds2 = downsample.downsample_with_averaging(data_ds1, factor=[2, 2, 1, 1]) cv.mip = 2 assert np.all(cv[slice64] == data_ds2[slice64]) data_ds3 = downsample.downsample_with_averaging(data_ds2, factor=[2, 2, 1, 1]) cv.mip = 3 assert np.all(cv[slice64] == data_ds3[slice64]) data_ds4 = downsample.downsample_with_averaging(data_ds3, factor=[2, 2, 1, 1]) cv.mip = 4 assert np.all(cv[slice64] == data_ds4[slice64])
def downsample_dataset(dataset_name, from_mip=-1, num_mips=1, local=False, n_download_workers=1, n_threads=32): if dataset_name == "pinky": ws_path = "gs://neuroglancer/svenmd/pinky40_v11/watershed/" elif dataset_name == "basil": ws_path = "gs://neuroglancer/svenmd/basil_4k_oldnet_cg/watershed/" elif dataset_name == "pinky100": ws_path = "gs://neuroglancer/nkem/pinky100_v0/ws/lost_no-random/bbox1_0/" else: raise Exception("Dataset unknown") if local: if n_threads == 1: with MockTaskQueue() as task_queue: tc.create_downsampling_tasks(task_queue, ws_path, mip=from_mip, fill_missing=True, num_mips=num_mips, n_download_workers=n_download_workers, preserve_chunk_size=True) else: with LocalTaskQueue(parallel=n_threads) as task_queue: tc.create_downsampling_tasks(task_queue, ws_path, mip=from_mip, fill_missing=True, num_mips=num_mips, n_download_workers=n_download_workers, preserve_chunk_size=True) else: with TaskQueue(queue_server='sqs', qurl="https://sqs.us-east-1.amazonaws.com/098703261575/nkem-igneous") as task_queue: tc.create_downsampling_tasks(task_queue, ws_path, mip=from_mip, fill_missing=True, num_mips=num_mips, n_download_workers=n_download_workers, preserve_chunk_size=True)
def create_downsamples(animal, channel, suffix, downsample): fileLocationManager = FileLocationManager(animal) channel_outdir = f'C{channel}' first_chunk = calculate_chunks(downsample, 0) mips = [0, 1, 2, 3, 4, 5, 6, 7] if downsample: channel_outdir += 'T' mips = [0, 1] outpath = os.path.join(fileLocationManager.neuroglancer_data, f'{channel_outdir}') outpath = f'file://{outpath}' if suffix is not None: outpath += suffix channel_outdir += "_rechunkme" INPUT_DIR = os.path.join(fileLocationManager.neuroglancer_data, f'{channel_outdir}') if not os.path.exists(INPUT_DIR): print(f'DIR {INPUT_DIR} does not exist, exiting.') sys.exit() cloudpath = f"file://{INPUT_DIR}" _, workers = get_cpus() tq = LocalTaskQueue(parallel=workers) tasks = tc.create_transfer_tasks(cloudpath, dest_layer_path=outpath, chunk_size=first_chunk, mip=0, skip_downsamples=True) tq.insert(tasks) tq.execute() #mips = 7 shows good results in neuroglancer for mip in mips: cv = CloudVolume(outpath, mip) chunks = calculate_chunks(downsample, mip) factors = calculate_factors(downsample, mip) tasks = tc.create_downsampling_tasks(cv.layer_cloudpath, mip=mip, num_mips=1, factor=factors, preserve_chunk_size=False, compress=True, chunk_size=chunks) tq.insert(tasks) tq.execute() print("Done!")
def make_demo_downsample(type_vol="647", mip_start=0, num_mips=3): cloudpath = "file://"+home_dir+"/"+brain+"/"+type_vol with LocalTaskQueue(parallel=8) as tq: tasks = tc.create_downsampling_tasks( cloudpath, mip=mip_start, # Start downsampling from this mip level (writes to next level up) fill_missing=False, # Ignore missing chunks and fill them with black axis="z", num_mips=num_mips, # number of downsamples to produce. Downloaded shape is chunk_size * 2^num_mip chunk_size=[ 128, 128, 32 ], # manually set chunk size of next scales, overrides preserve_chunk_size preserve_chunk_size=True, # use existing chunk size, don"t halve to get more downsamples ) tq.insert_all(tasks) print("Done!")
def add_downsampled_volumes(self, chunk_size=[128, 128, 64], num_mips=4): if self.precomputed_vol is None: raise NotImplementedError( 'You have to call init_precomputed before calling this function.' ) _, cpus = get_cpus() tq = LocalTaskQueue(parallel=cpus) tasks = tc.create_downsampling_tasks( self.precomputed_vol.layer_cloudpath, preserve_chunk_size=False, num_mips=num_mips, chunk_size=chunk_size, compress=True) tq.insert(tasks) tq.execute()
def test_downsample_no_offset(compression_method): delete_layer() storage, data = create_layer(size=(1024, 1024, 128, 1), offset=(0, 0, 0)) cv = CloudVolume(storage.layer_path) assert len(cv.scales) == 1 assert len(cv.available_mips) == 1 cv.commit_info() tq = MockTaskQueue() tasks = create_downsampling_tasks(storage.layer_path, mip=0, num_mips=4, compress=compression_method) tq.insert_all(tasks) cv.refresh_info() assert len(cv.available_mips) == 5 assert np.array_equal(cv.mip_volume_size(0), [1024, 1024, 128]) assert np.array_equal(cv.mip_volume_size(1), [512, 512, 128]) assert np.array_equal(cv.mip_volume_size(2), [256, 256, 128]) assert np.array_equal(cv.mip_volume_size(3), [128, 128, 128]) assert np.array_equal(cv.mip_volume_size(4), [64, 64, 128]) slice64 = np.s_[0:64, 0:64, 0:64] cv.mip = 0 assert np.all(cv[slice64] == data[slice64]) data_ds1, = tinybrain.downsample_with_averaging(data, factor=[2, 2, 1, 1]) cv.mip = 1 assert np.all(cv[slice64] == data_ds1[slice64]) data_ds2, = tinybrain.downsample_with_averaging(data, factor=[4, 4, 1, 1]) cv.mip = 2 assert np.all(cv[slice64] == data_ds2[slice64]) data_ds3, = tinybrain.downsample_with_averaging(data, factor=[8, 8, 1, 1]) cv.mip = 3 assert np.all(cv[slice64] == data_ds3[slice64]) data_ds4, = tinybrain.downsample_with_averaging(data, factor=[16, 16, 1, 1]) cv.mip = 4 assert np.all(cv[slice64] == data_ds4[slice64])
def test_downsample_no_offset_2x2x2(): delete_layer() cf, data = create_layer(size=(512,512,512,1), offset=(0,0,0)) cv = CloudVolume(cf.cloudpath) assert len(cv.scales) == 1 assert len(cv.available_mips) == 1 cv.commit_info() tq = MockTaskQueue() tasks = create_downsampling_tasks( cf.cloudpath, mip=0, num_mips=3, compress=None, factor=(2,2,2) ) tq.insert_all(tasks) cv.refresh_info() assert len(cv.available_mips) == 4 assert np.array_equal(cv.mip_volume_size(0), [ 512, 512, 512 ]) assert np.array_equal(cv.mip_volume_size(1), [ 256, 256, 256 ]) assert np.array_equal(cv.mip_volume_size(2), [ 128, 128, 128 ]) assert np.array_equal(cv.mip_volume_size(3), [ 64, 64, 64 ]) slice64 = np.s_[0:64, 0:64, 0:64] cv.mip = 0 assert np.all(cv[slice64] == data[slice64]) data_ds1, = tinybrain.downsample_with_averaging(data, factor=[2, 2, 2, 1]) cv.mip = 1 assert np.all(cv[slice64] == data_ds1[slice64]) data_ds2, = tinybrain.downsample_with_averaging(data, factor=[4, 4, 4, 1]) cv.mip = 2 assert np.all(cv[slice64] == data_ds2[slice64]) data_ds3, = tinybrain.downsample_with_averaging(data, factor=[8, 8, 8, 1]) cv.mip = 3 assert np.all(cv[slice64] == data_ds3[slice64])
def downsample( ctx, path, queue, mip, fill_missing, num_mips, cseg, sparse, chunk_size, compress, volumetric, delete_bg, bg_color ): """ Create an image pyramid for grayscale or labeled images. By default, we use 2x2x1 downsampling. The levels of the pyramid are called "mips" (from the fake latin "Multum in Parvo" or "many in small"). The base of the pyramid, the highest resolution layer, is mip 0. Each level of the pyramid is one mip level higher. The general strategy is to downsample starting from mip 0. This builds several levels. Once that job is complete, pass in the current top mip level of the pyramid. This builds it even taller (referred to as "superdownsampling"). """ encoding = ("compressed_segmentation" if cseg else None) factor = (2,2,1) if volumetric: factor = (2,2,2) tasks = tc.create_downsampling_tasks( path, mip=mip, fill_missing=fill_missing, num_mips=num_mips, sparse=sparse, chunk_size=chunk_size, encoding=encoding, delete_black_uploads=delete_bg, background_color=bg_color, compress=compress, factor=factor ) parallel = int(ctx.obj.get("parallel", 1)) tq = TaskQueue(normalize_path(queue)) tq.insert(tasks, parallel=parallel)
def downsample(ctx, path, queue, mip, fill_missing, num_mips, cseg, compresso, sparse, chunk_size, compress, volumetric, delete_bg, bg_color, sharded, memory, xrange, yrange, zrange): """ Create an image pyramid for grayscale or labeled images. By default, we use 2x2x1 downsampling. The levels of the pyramid are called "mips" (from the fake latin "Multum in Parvo" or "many in small"). The base of the pyramid, the highest resolution layer, is mip 0. Each level of the pyramid is one mip level higher. The general strategy is to downsample starting from mip 0. This builds several levels. Once that job is complete, pass in the current top mip level of the pyramid. This builds it even taller (referred to as "superdownsampling"). """ path = cloudfiles.paths.normalize(path) if cseg and compresso: print("igneous: must choose one of --cseg or --compresso") return if sharded and num_mips != 1: print( "igneous: sharded downsamples only support producing one mip at a time." ) return encoding = None if cseg: encoding = "compressed_segmentation" elif compresso: encoding = "compresso" factor = (2, 2, 1) if volumetric: factor = (2, 2, 2) bounds = None if xrange or yrange or zrange: bounds = CloudVolume(path).meta.bounds(mip) if xrange: bounds.minpt.x = xrange[0] bounds.maxpt.x = xrange[1] if yrange: bounds.minpt.y = yrange[0] bounds.maxpt.y = yrange[1] if zrange: bounds.minpt.z = zrange[0] bounds.maxpt.z = zrange[1] if sharded: tasks = tc.create_image_shard_downsample_tasks( path, mip=mip, fill_missing=fill_missing, sparse=sparse, chunk_size=chunk_size, encoding=encoding, memory_target=memory, factor=factor, bounds=bounds) else: tasks = tc.create_downsampling_tasks(path, mip=mip, fill_missing=fill_missing, num_mips=num_mips, sparse=sparse, chunk_size=chunk_size, encoding=encoding, delete_black_uploads=delete_bg, background_color=bg_color, compress=compress, factor=factor, bounds=bounds) parallel = int(ctx.obj.get("parallel", 1)) tq = TaskQueue(normalize_path(queue)) tq.insert(tasks, parallel=parallel)
def upload_seg( meta: PreviewMeta, data: ndarray, slack_response: SlackResponse, transpose: bool = False, ): from numpy import transpose as np_transpose em = CloudVolume(meta.em_layer, mip=meta.dst_mip) output_layer = f"{environ['GT_BUCKET_PATH']}/{meta.author}/preview/{token_hex(8)}" info = CloudVolume.create_new_info( num_channels=1, layer_type="segmentation", data_type="uint32", encoding="raw", resolution=em.resolution, voxel_offset=meta.dst_bbox.minpt, volume_size=meta.dst_bbox.size3(), mesh=f"mesh_mip_{meta.dst_mip}_err_0", chunk_size=(64, 64, 8), ) dst_cv = CloudVolume(output_layer, info=info, mip=0, cdn_cache=False) dst_cv.provenance.description = "Image directory ingest" dst_cv.provenance.processing.append({ "method": { "task": "ingest", "image_path": meta.em_layer, }, "date": str(datetime.today()), "script": "cloud_bot", }) dst_cv.provenance.owners = [meta.author] dst_cv.commit_info() dst_cv.commit_provenance() checkpoint_notify("Processing data.", slack_response) crop_bbox = meta.dst_bbox - meta.src_bbox.minpt data = data[crop_bbox.to_slices()] dst_cv[meta.dst_bbox.to_slices()] = (np_transpose(data, (1, 0, 2)) if transpose else data) with LocalTaskQueue(parallel=16) as tq: tasks = tc.create_downsampling_tasks(output_layer, mip=0, fill_missing=True, preserve_chunk_size=True) tq.insert_all(tasks) checkpoint_notify("Creating meshing tasks.", slack_response) tasks = tc.create_meshing_tasks( output_layer, mip=meta.dst_mip, simplification=False, shape=(320, 320, 40), max_simplification_error=0, ) tq.insert_all(tasks) tasks = tc.create_mesh_manifest_tasks(output_layer, magnitude=1) tq.insert_all(tasks) return output_layer
def create_layer(animal, id, start, debug): """ This is the important method called from main. This does all the work. Args: animal: string to identify the animal/stack Returns: Nothing, creates a directory of the precomputed volume. Copy this directory somewhere apache can read it. e.g., /net/birdstore/Active_Atlas_Data/data_root/pipeline_data/ """ # Set all relevant directories INPUT = '/net/birdstore/Active_Atlas_Data/data_root/pipeline_data/DK52/preps/CH3/thumbnail_aligned' OUTPUT = '/net/birdstore/Active_Atlas_Data/data_root/pipeline_data/DK52/preps/CH3/shapes' PRECOMPUTE_PATH = f'/net/birdstore/Active_Atlas_Data/data_root/pipeline_data/{animal}/neuroglancer_data/shapes' ATLAS_DIR = '/net/birdstore/Active_Atlas_Data/data_root/atlas_data' outpath = os.path.join(ATLAS_DIR, 'shapes', animal) os.makedirs(OUTPUT, exist_ok=True) os.makedirs(outpath, exist_ok=True) files = os.listdir(INPUT) num_sections = len(files) midpoint = num_sections // 2 midfilepath = os.path.join(INPUT, files[midpoint]) midfile = io.imread(midfilepath, img_num=0) height = midfile.shape[0] width = midfile.shape[1] structures = set() colors = {'infrahypoglossal': 200, 'perifacial': 210, 'suprahypoglossal': 220} aligned_shape = np.array((width, height)) section_structure_vertices = defaultdict(dict) with connection.cursor() as cursor: sql = """select el.frame + %s as section, el.points, elab.name from engine_labeledshape el inner join engine_job ej on el.job_id = ej.id inner join engine_label elab on el.label_id = elab.id where elab.task_id = %s order by elab.name, el.frame""" cursor.execute(sql, [start, id]) rows = cursor.fetchall() for row in rows: section = row[0] pts = row[1] structure = row[2] structures.add(structure) pts = np.array([tuple(map(float, x.split())) for x in pts.strip().split(',')]) vertices = pts.reshape(pts.shape[0]//2, 2).astype(np.float64) addme = vertices[0].reshape(1,2) vertices = np.concatenate((vertices,addme), axis=0) lp = vertices.shape[0] if lp > 2: new_len = max(lp, 100) vertices = interpolate(vertices, new_len) section_structure_vertices[section][structure] = vertices ##### Alignment of annotation coordinates volume = np.zeros((aligned_shape[1], aligned_shape[0], num_sections), dtype=np.uint8) #for section in section_structure_vertices: for section, file in enumerate(files): template = np.zeros((aligned_shape[1], aligned_shape[0]), dtype=np.uint8) for structure in section_structure_vertices[section]: points = section_structure_vertices[section][structure] print(section, structure, points.shape, np.amax(points), np.amin(points)) cv2.fillPoly(template, [points.astype(np.int32)], colors[structure]) outfile = str(section).zfill(3) + ".tif" imgpath = os.path.join(OUTPUT, outfile) cv2.imwrite(imgpath, template) volume[:, :, section - 1] = template print(colors) sys.exit() volume_filepath = os.path.join(outpath, f'{animal}_shapes.npy') volume = np.swapaxes(volume, 0, 1) print('Saving:', volume_filepath, 'with shape', volume.shape) #with open(volume_filepath, 'wb') as file: # np.save(file, volume) # now use 9-1 notebook to convert to a precomputed. # Voxel resolution in nanometer (how much nanometer each element in numpy array represent) resol = (14464, 14464, 20000) # Voxel offset offset = (0, 0, 0) # Layer type layer_type = 'segmentation' # number of channels num_channels = 1 # segmentation properties in the format of [(number1, label1), (number2, label2) ...] # where number is an integer that is in the volume and label is a string that describes that segmenetation segmentation_properties = [(len(structures) + index + 1, structure) for index, structure in enumerate(structures)] cloudpath = f'file://{PRECOMPUTE_PATH}' info = CloudVolume.create_new_info( num_channels = num_channels, layer_type = layer_type, data_type = str(volume.dtype), # Channel images might be 'uint8' encoding = 'raw', # raw, jpeg, compressed_segmentation, fpzip, kempressed resolution = resol, # Voxel scaling, units are in nanometers voxel_offset = offset, # x,y,z offset in voxels from the origin chunk_size = [64, 64, 64], # units are voxels volume_size = volume.shape, # e.g. a cubic millimeter dataset ) vol = CloudVolume(cloudpath, mip=0, info=info, compress=True) vol.commit_info() vol[:, :, :] = volume[:, :, :] vol.info['segment_properties'] = 'names' vol.commit_info() segment_properties_path = os.path.join(PRECOMPUTE_PATH, 'names') os.makedirs(segment_properties_path, exist_ok=True) info = { "@type": "neuroglancer_segment_properties", "inline": { "ids": [str(number) for number, label in segmentation_properties], "properties": [{ "id": "label", "description": "Name of structures", "type": "label", "values": [str(label) for number, label in segmentation_properties] }] } } print('Creating names in', segment_properties_path) with open(os.path.join(segment_properties_path, 'info'), 'w') as file: json.dump(info, file, indent=2) # Setting parallel to a number > 1 hangs the script. It still runs fast with parallel=1 tq = LocalTaskQueue(parallel=1) tasks = tc.create_downsampling_tasks(cloudpath, compress=True) # Downsample the volumes tq.insert(tasks) tq.execute() print('Finished')
def ingest(data, opt, tag=None): # Neuroglancer format data = py_utils.to_tensor(data) data = data.transpose((3,2,1,0)) num_channels = data.shape[-1] shape = data.shape[:-1] # Offset if opt.offset is None: opt.offset = opt.begin # MIP level correction if opt.gs_input and opt.in_mip > 0: o = opt.offset p = pow(2,opt.in_mip) offset = (o[0]//p, o[1]//p, o[2]) else: offset = opt.offset # Patch offset correction (when output patch is smaller than input patch) patch_offset = (np.array(opt.inputsz) - np.array(opt.outputsz)) // 2 offset = tuple(np.array(offset) + np.flip(patch_offset, 0)) # Create info info = make_info(num_channels, 'image', str(data.dtype), shape, opt.resolution, offset=offset, chunk_size=opt.chunk_size) print(info) gs_path = opt.gs_output if '{}' in opt.gs_output: if opt.keywords: gs_path = gs_path.format(*opt.keywords) else: if opt.center is not None: coord = "x{}_y{}_z{}".format(*opt.center) coord += "_s{}-{}-{}".format(*opt.size) else: coord = '_'.join(['{}-{}'.format(b,e) for b,e in zip(opt.begin,opt.end)]) gs_path = gs_path.format(coord) # Tagging if tag is not None: if gs_path[-1] == '/': gs_path += tag else: gs_path += ('/' + tag) print("gs_output:\n{}".format(gs_path)) cvol = cv.CloudVolume(gs_path, mip=0, info=info, parallel=opt.parallel) cvol[:,:,:,:] = data cvol.commit_info() # Downsample if opt.downsample: import igneous from igneous.task_creation import create_downsampling_tasks with LocalTaskQueue(parallel=opt.parallel) as tq: # create_downsampling_tasks(tq, gs_path, mip=0, fill_missing=True) tasks = create_downsampling_tasks(gs_path, mip=0, fill_missing=True) tq.insert_all(tasks)
dest_layer_path=rechunked_cloudpath, chunk_size=first_chunk, mip=0, skip_downsamples=True) print(len(tasks)) tq.insert(tasks) tq.execute() elif step == 'step3': # downsampling print("step 3, downsampling") tq = LocalTaskQueue(parallel=cpus) downsample = "full" mips = [0, 1, 2, 3, 4] for mip in mips: print(f"Mip: {mip}") cv = CloudVolume(rechunked_cloudpath, mip) chunks = calculate_chunks(downsample, mip) factors = calculate_factors(downsample, mip) print(f"Chunk size: {chunks}") print(f"Downsample factors: {factors}") tasks = tc.create_downsampling_tasks(cv.layer_cloudpath, mip=mip, num_mips=1, factor=factors, preserve_chunk_size=False, compress=True, chunk_size=chunks) tq.insert(tasks) tq.execute() print()
def create_structures(animal): """ This is the important method called from main. This does all the work. Args: animal: string to identify the animal/stack Returns: Nothing, creates a directory of the precomputed volume. Copy this directory somewhere apache can read it. e.g., /net/birdstore/Active_Atlas_Data/data_root/pipeline_data/ """ sqlController = SqlController(animal) fileLocationManager = FileLocationManager(animal) # Set all relevant directories THUMBNAIL_PATH = os.path.join(fileLocationManager.prep, 'CH1', 'thumbnail') CSV_PATH = '/net/birdstore/Active_Atlas_Data/data_root/atlas_data/foundation_brain_annotations' CLEANED = os.path.join(fileLocationManager.prep, 'CH1', 'thumbnail_cleaned') PRECOMPUTE_PATH = f'/net/birdstore/Active_Atlas_Data/data_root/atlas_data/foundation_brain_annotations/{animal}' width = sqlController.scan_run.width height = sqlController.scan_run.height width = int(width * SCALING_FACTOR) height = int(height * SCALING_FACTOR) aligned_shape = np.array((width, height)) THUMBNAILS = sorted(os.listdir(THUMBNAIL_PATH)) num_section = len(THUMBNAILS) structure_dict = sqlController.get_structures_dict() csvfile = os.path.join(CSV_PATH, f'{animal}_annotation.csv') hand_annotations = pd.read_csv(csvfile) hand_annotations['vertices'] = hand_annotations['vertices'] \ .apply(lambda x: x.replace(' ', ','))\ .apply(lambda x: x.replace('\n',','))\ .apply(lambda x: x.replace(',]',']'))\ .apply(lambda x: x.replace(',,', ','))\ .apply(lambda x: x.replace(',,', ','))\ .apply(lambda x: x.replace(',,', ',')).apply(lambda x: x.replace(',,', ',')) hand_annotations['vertices'] = hand_annotations['vertices'].apply(lambda x: ast.literal_eval(x)) structures = list(hand_annotations['name'].unique()) section_structure_vertices = defaultdict(dict) for structure in tqdm(structures): contour_annotations, first_sec, last_sec = get_contours_from_annotations(animal, structure, hand_annotations, densify=4) for section in contour_annotations: section_structure_vertices[section][structure] = contour_annotations[section][structure][1] ##### Reproduce create_clean transform section_offset = {} for file_name in tqdm(THUMBNAILS): filepath = os.path.join(THUMBNAIL_PATH, file_name) img = io.imread(filepath) section = int(file_name.split('.')[0]) section_offset[section] = (aligned_shape - img.shape[:2][::-1]) // 2 ##### Reproduce create_alignment transform image_name_list = sorted(os.listdir(CLEANED)) anchor_idx = len(image_name_list) // 2 transformation_to_previous_sec = {} for i in range(1, len(image_name_list)): fixed_fn = os.path.splitext(image_name_list[i - 1])[0] moving_fn = os.path.splitext(image_name_list[i])[0] transformation_to_previous_sec[i] = load_consecutive_section_transform(animal, moving_fn, fixed_fn) transformation_to_anchor_sec = {} # Converts every transformation for moving_idx in range(len(image_name_list)): if moving_idx == anchor_idx: transformation_to_anchor_sec[image_name_list[moving_idx]] = np.eye(3) elif moving_idx < anchor_idx: T_composed = np.eye(3) for i in range(anchor_idx, moving_idx, -1): T_composed = np.dot(np.linalg.inv(transformation_to_previous_sec[i]), T_composed) transformation_to_anchor_sec[image_name_list[moving_idx]] = T_composed else: T_composed = np.eye(3) for i in range(anchor_idx + 1, moving_idx + 1): T_composed = np.dot(transformation_to_previous_sec[i], T_composed) transformation_to_anchor_sec[image_name_list[moving_idx]] = T_composed warp_transforms = create_warp_transforms(animal, transformation_to_anchor_sec, 'thumbnail', 'thumbnail') ordered_transforms = sorted(warp_transforms.items()) section_transform = {} for section, transform in ordered_transforms: section_num = int(section.split('.')[0]) transform = np.linalg.inv(transform) section_transform[section_num] = transform ##### Alignment of annotation coordinates keys = [k for k in structure_dict.keys()] # This missing_sections will need to be manually built up from Beth's spreadsheet missing_sections = {k: [117] for k in keys} fill_sections = defaultdict(dict) pr5_sections = [] other_structures = set() volume = np.zeros((aligned_shape[1], aligned_shape[0], num_section), dtype=np.uint8) for section in section_structure_vertices: template = np.zeros((aligned_shape[1], aligned_shape[0]), dtype=np.uint8) for structure in section_structure_vertices[section]: points = np.array(section_structure_vertices[section][structure]) points = points // 32 points = points + section_offset[section] # create_clean offset points = transform_create_alignment(points, section_transform[section]) # create_alignment transform points = points.astype(np.int32) try: missing_list = missing_sections[structure] except: missing_list = [] if section in missing_list: fill_sections[structure][section] = points if 'pr5' in structure.lower(): pr5_sections.append(section) try: # color = colors[structure.upper()] color = structure_dict[structure][1] # structure dict returns a list of [description, color] # for each key except: color = 255 other_structures.add(structure) cv2.polylines(template, [points], True, color, 2, lineType=cv2.LINE_AA) volume[:, :, section - 1] = template # fill up missing sections template = np.zeros((aligned_shape[1], aligned_shape[0]), dtype=np.uint8) for structure, v in fill_sections.items(): color = structure_dict[structure][1] for section, points in v.items(): cv2.polylines(template, [points], True, color, 2, lineType=cv2.LINE_AA) volume[:, :, section] = template volume_filepath = os.path.join(CSV_PATH, f'{animal}_annotations.npy') volume = np.swapaxes(volume, 0, 1) print('Saving:', volume_filepath, 'with shape', volume.shape) with open(volume_filepath, 'wb') as file: np.save(file, volume) # now use 9-1 notebook to convert to a precomputed. # Voxel resolution in nanometer (how much nanometer each element in numpy array represent) resol = (14464, 14464, 20000) # Voxel offset offset = (0, 0, 0) # Layer type layer_type = 'segmentation' # number of channels num_channels = 1 # segmentation properties in the format of [(number1, label1), (number2, label2) ...] # where number is an integer that is in the volume and label is a string that describes that segmenetation segmentation_properties = [(number, f'{structure}: {label}') for structure, (label, number) in structure_dict.items()] extra_structures = ['Pr5', 'VTg', 'DRD', 'IF', 'MPB', 'Op', 'RPC', 'LSO', 'MVe', 'CnF', 'pc', 'DTgC', 'LPB', 'Pr5DM', 'DTgP', 'RMC', 'VTA', 'IPC', 'DRI', 'LDTg', 'IPA', 'PTg', 'DTg', 'IPL', 'SuVe', 'Sol', 'IPR', '8n', 'Dk', 'IO', 'Cb', 'Pr5VL', 'APT', 'Gr', 'RR', 'InC', 'X', 'EW'] segmentation_properties += [(len(structure_dict) + index + 1, structure) for index, structure in enumerate(extra_structures)] cloudpath = f'file://{PRECOMPUTE_PATH}' info = CloudVolume.create_new_info( num_channels = num_channels, layer_type = layer_type, data_type = str(volume.dtype), # Channel images might be 'uint8' encoding = 'raw', # raw, jpeg, compressed_segmentation, fpzip, kempressed resolution = resol, # Voxel scaling, units are in nanometers voxel_offset = offset, # x,y,z offset in voxels from the origin chunk_size = [64, 64, 64], # units are voxels volume_size = volume.shape, # e.g. a cubic millimeter dataset ) vol = CloudVolume(cloudpath, mip=0, info=info, compress=False) vol.commit_info() vol[:, :, :] = volume[:, :, :] vol.info['segment_properties'] = 'names' vol.commit_info() segment_properties_path = os.path.join(PRECOMPUTE_PATH, 'names') os.makedirs(segment_properties_path, exist_ok=True) info = { "@type": "neuroglancer_segment_properties", "inline": { "ids": [str(number) for number, label in segmentation_properties], "properties": [{ "id": "label", "description": "Name of structures", "type": "label", "values": [str(label) for number, label in segmentation_properties] }] } } print('Creating names in', segment_properties_path) with open(os.path.join(segment_properties_path, 'info'), 'w') as file: json.dump(info, file, indent=2) # Setting parallel to a number > 1 hangs the script. It still runs fast with parallel=1 tq = LocalTaskQueue(parallel=1) tasks = tc.create_downsampling_tasks(cloudpath, compress=False) # Downsample the volumes tq.insert(tasks) tq.execute() print('Finished')