def expand_arr_1d(arr: da.Array, required_shape: Tuple[int]) -> da.Array: missing = (required_shape[0] - arr.shape[0], ) values = da.block([arr, da.zeros(missing, dtype=arr.dtype)]) mask = da.block( [da.zeros(arr.shape, dtype=bool), da.ones(missing, dtype=bool)]) return da.ma.masked_array(values, mask=mask)
def da_diagsvd(s, M, N): """ Construct the sigma matrix in SVD from singular values and size M, N. Parameters ---------- s : (M,) or (N,) array_like Singular values M : int Size of the matrix whose singular values are `s`. N : int Size of the matrix whose singular values are `s`. Returns ------- S : (M, N) ndarray The S-matrix in the singular value decomposition """ part = da.diag(s) MorN = len(s) if MorN == M: return da.block([part, da.zeros((M, N - M), dtype=s.dtype)]) elif MorN == N: return da.block([[part], [da.zeros((M - N, N), dtype=s.dtype)]]) else: raise ValueError("Length of s must be M or N.")
def _solve(self, HH, HY): """Compute output weights from HH and HY using Dask functionality. """ # make HH/HY divisible by chunk size n_features, _ = HH.shape padding = 0 if n_features > self.bsize_ and n_features % self.bsize_ > 0: print("Adjusting batch size {} to n_features {}".format( self.bsize_, n_features)) padding = self.bsize_ - (n_features % self.bsize_) P01 = da.zeros((n_features, padding)) P10 = da.zeros((padding, n_features)) P11 = da.zeros((padding, padding)) HH = da.block([[HH, P01], [P10, P11]]) P1 = da.zeros((padding, HY.shape[1])) HY = da.block([[HY], [P1]]) # rechunk, add bias, and solve HH = HH.rechunk( self.bsize_) + self.alpha * da.eye(HH.shape[1], chunks=self.bsize_) HY = HY.rechunk(self.bsize_) B = da.linalg.solve(HH, HY, sym_pos=True) if padding > 0: B = B[:n_features] return B
def stitch_fields(fields, blocksize): """ """ # weight block edges weighted_fields = da.map_blocks( weight_block, fields, blocksize=blocksize, dtype=np.float32, ) # remove block index dimensions sh = fields.shape[:3] list_of_blocks = [[[[weighted_fields[i, j, k]] for k in range(sh[2])] for j in range(sh[1])] for i in range(sh[0])] aug_fields = da.block(list_of_blocks) # merge overlap regions overlaps = tuple([int(round(x / 8)) for x in blocksize] + [ 0, ]) return da.map_overlap( merge_overlaps, aug_fields, blocksize=blocksize, depth=overlaps, boundary=0., trim=False, dtype=np.float32, chunks=blocksize + [ 3, ], )
def naive_merge(work_dir="/run/media/user/HDD 1TB/", prefix="split_part_", ask=False, rechunk=False): """ Write multiple files into a big array file. """ def get_tuple_id(file_name, prefix): """ this function returns the position of the block file in the total array """ strings = file_name.replace(prefix, "").split('_') integers = map(lambda s: int(s), strings) return tuple(integers) def get_max_dim(keys, dim): """ key = (x, y, z) position of the block file in the total array this function returns the number of blocks in a given dimension """ return max([key[dim] for key in keys]) total_time = time.time() file_names = {get_tuple_id(f.split('.')[0], prefix) : f for f in os.listdir(work_dir) if os.path.isfile(os.path.join(work_dir, f)) and prefix in f} keys = file_names.keys() i_max, j_max, k_max = (get_max_dim(keys, 0), get_max_dim(keys, 1), get_max_dim(keys, 2)) data = list() for i in range(i_max + 1): stack_i = list() for j in range(j_max + 1): stack_j = list() for k in range(k_max + 1): file_name = file_names[(i, j, k)] arr_k = get_dask_array_from_hdf5(file_path=os.path.join(work_dir, file_name), cast=True, key='/data') if rechunk: arr_k = arr_k.rechunk((arr_k.shape[0], arr_k.shape[1], "auto")) stack_j.append(arr_k) stack_i.append(stack_j) data.append(stack_i) arr = da.block(data) print("Output shape: " + str(arr.shape)) if ask: while True: try: save = input("Do you want to proceed to the saving ? (y/n)") if save in ["y", "n"]: break except ValueError: print("Invalid answer.") continue else: save = "y" if save == "y": print("start saving...") IO_time = time.time() save_arr(arr, "hdf5", work_dir + "merged.hdf5", key='/data', chunks_shape=None) IO_time = time.time() - IO_time total_time = time.time() - total_time return total_time, IO_time
def block_regex_tif(tif_filepath: str, lazy_arrays: list) -> dask.array: """Sort .tif files in order. Map key regex components to set chunking for .tif array. Block these chunks together and return as dask.array""" # THIS IS FOR PARSING BY SCAN_ITER AND CHANNELS # e.g. Scan_Iter_0000_CamA_ch0_CAM1_stack0000_488nm_0000000msec_0016966725msecAbs_000x_000y_000z_0000t.tif tif_files = [fn.split('\\')[-1] for fn in tif_filepath] fn_comp_sets = dict() for fn in tif_files: for i, comp in enumerate(os.path.splitext(fn)[0].split("_")): fn_comp_sets.setdefault(i, set()) fn_comp_sets[i].add(comp) fn_comp_sets = list(map(sorted, fn_comp_sets.values())) remap_comps = [ dict(map(reversed, enumerate(fn_comp_sets[2]))), # MUST be the index for scan_iter, e.g. '0003' dict(map(reversed, enumerate(fn_comp_sets[4]))) # MUST be the index for channel, e.g. 'ch0' ] # Create an empty object array to organize each chunk that loads a TIFF b = np.empty(tuple(map(len, remap_comps)) + (1, 1, 1), dtype=object) for fn, x in zip(tif_files, lazy_arrays): scan_iter = int(fn[fn.index("Scan_Iter_") + 10:fn.index("_Cam")].split("_")[0]) channel = int(fn[fn.index("_ch") + 3:].split("_")[0]) b[scan_iter, channel, 0, 0, 0] = x # YOU MUST HAVE SIMILAR CHANNEL PATTERNS TO SCAN_ITER PATTERNS OR ELSE THE PROCESS WILL FAIL # e.g. every Scan_Iter_ must have 8x ch0 and 4x ch1. Deviate from this pattern will result in an exception! # Stitch together the many blocks into a single array b = da.block(b.tolist()) return b
def _graph_standard_degrid(vis_dataset, grid, briggs_factors, cgk_1D, grid_parms): import dask import dask.array as da import xarray as xr import time import itertools # Getting data for gridding chan_chunk_size = vis_dataset[grid_parms["imaging_weight_name"]].chunks[2][0] freq_chan = da.from_array(vis_dataset.coords['chan'].values, chunks=(chan_chunk_size)) n_chunks_in_each_dim = vis_dataset[grid_parms["imaging_weight_name"]].data.numblocks chunk_indx = [] iter_chunks_indx = itertools.product(np.arange(n_chunks_in_each_dim[0]), np.arange(n_chunks_in_each_dim[1]), np.arange(n_chunks_in_each_dim[2]), np.arange(n_chunks_in_each_dim[3])) #n_delayed = np.prod(n_chunks_in_each_dim) chunk_sizes = vis_dataset[grid_parms["imaging_weight_name"]].chunks n_chan_chunks_img = n_chunks_in_each_dim[2] list_of_degrids = [] list_of_sum_weights = [] list_of_degrids = ndim_list(n_chunks_in_each_dim) # Build graph for c_time, c_baseline, c_chan, c_pol in iter_chunks_indx: if grid_parms['chan_mode'] == 'cube': a_c_chan = c_chan else: a_c_chan = 0 if grid_parms['do_imaging_weight']: sub_degrid = dask.delayed(_standard_imaging_weight_degrid_numpy_wrap)( grid.partitions[0,0,a_c_chan,c_pol], vis_dataset[grid_parms["uvw_name"]].data.partitions[c_time, c_baseline, 0], vis_dataset[grid_parms["imaging_weight_name"]].data.partitions[c_time, c_baseline, c_chan, c_pol], briggs_factors.partitions[:,a_c_chan,c_pol], freq_chan.partitions[c_chan], dask.delayed(grid_parms)) single_chunk_size = (chunk_sizes[0][c_time], chunk_sizes[1][c_baseline],chunk_sizes[2][c_chan], chunk_sizes[3][c_pol]) list_of_degrids[c_time][c_baseline][c_chan][c_pol] = da.from_delayed(sub_degrid, single_chunk_size,dtype=np.double) else: print('Degridding of visibilities and psf still needs to be implemented') #sub_grid_and_sum_weights = dask.delayed(_standard_grid_numpy_wrap)( #vis_dataset[vis_dataset[grid_parms["data"]].data.partitions[c_time, c_baseline, c_chan, c_pol], #vis_dataset[grid_parms["uvw"]].data.partitions[c_time, c_baseline, 0], #vis_dataset[grid_parms["imaging_weight"]].data.partitions[c_time, c_baseline, c_chan, c_pol], #freq_chan.partitions[c_chan], #dask.delayed(cgk_1D), dask.delayed(grid_parms)) degrid = da.block(list_of_degrids) return degrid
def run(ds, size_limit=4096, mip=False): # estimate resize ratio, no larger than 4k tile_shape, (im_shape, im_dtype) = ds.tile_shape, ds._load_array_info() shape = tuple(t * i for t, i in zip(tile_shape, im_shape)) logger.debug(f"original preview {shape}, {im_dtype}") ratio, layer_shape = 1, shape[1:] if len(shape) == 3 else shape while True: if all((s // ratio) > size_limit for s in layer_shape): logger.debug(f"ratio={ratio}, exceeds size limit ({size_limit})") ratio *= 2 else: break logger.info(f"target downsampling {ratio}x") # retrieve tiles def retrieve(tile): data = ds[tile] sampler = (slice(None, None, ratio), ) * 2 if data.ndim == 3: if mip: # flatten the entire tile data = data.max(axis=0) else: # normally, we don't sub-sample z sampler = (slice(None, None, None), ) + sampler data = data[sampler] return data def groupby_tiles(inventory, index: List[str]): """ Aggregation function that generates the proper internal list layout for all the tiles in their natural N-D layout. Args: inventory (pd.DataFrame): the listing inventory index (list of str): the column header """ tiles = [] for _, tile in inventory.groupby(index[0]): if len(index) > 1: # we are not at the fastest dimension yet, decrease 1 level tiles.append(groupby_tiles(tile, index[1:])) else: # fastest dimension, call retrieval function tiles.append(retrieve(tile)) return tiles index = ["tile_y", "tile_x"] if "tile_z" in ds.index.names: index = ["tile_z"] + index logger.info(f"a {len(index)}-D tiled dataset") # pack as a huge array preview = da.block(groupby_tiles(ds, index)) return preview
def merge_hdf5_multiple(input_dirpath, out_filepath, out_file, dataset_key, store): """ Merge separated hdf5 files into one hdf5 output file. Arguments: ---------- input_dirpath: path to input files out_filepath: path to output file out_file: empty pointer. will contain file object to be free after computations by Merge object. dataset_key: dataset key of the block stored into each input file """ def print_blocks(l, depth): tab = depth * ['\t'] if not isinstance(l, list): logger.info(''.join(tab) + '%s', l) else: logger.info(''.join(tab) + '[') for e in l: print_blocks(e, depth + 1) logger.info(''.join(tab) + ']') # get array parts from input files workdir = os.getcwd() os.chdir(input_dirpath) data = dict() for infilepath in glob.glob("[0-9]*_[0-9]*_[0-9]*.hdf5"): pos = infilepath.split('_') pos[-1] = pos[-1].split('.')[0] pos = tuple(list(map(lambda s: int(s), pos))) arr = get_dask_array_from_hdf5(infilepath, dataset_key, logic_cs="dataset_shape") data[pos] = arr os.chdir(workdir) if len(data.keys()) == 0: msg = 'Could not find input file matching regex' logger.error(msg) raise ValueError(msg) for pos in data.keys(): logger.debug('%s', pos) # create reconstructed_array blocks = to_list(data) print_blocks(blocks, 0) reconstructed_array = da.block(blocks) if not store: return reconstructed_array # store new array in output file out_file = h5py.File(out_filepath, 'w') dset = out_file.create_dataset('/data', shape=reconstructed_array.shape) return da.store(reconstructed_array, dset, compute=False)
def as_stitched_array(self): def read_tile(channel_index, t_index, pos_index, z_index): if not np.isnan(pos_index) and channel_index in self.c_z_t_p_tree and \ z_index in self.c_z_t_p_tree[channel_index] and \ t_index in self.c_z_t_p_tree[channel_index][z_index] and \ pos_index in self.c_z_t_p_tree[channel_index][z_index][t_index]: img = self.read_image(channel_index=channel_index, z_index=z_index, t_index=t_index, pos_index=pos_index, memmapped=True) else: img = self._empty_tile # crop to center of tile for stitching return img[self.half_overlap:-self.half_overlap, self.half_overlap:-self.half_overlap] def z_stack(c_index, t_index, p_index): if np.isnan(p_index): return da.stack(self.z_indices.size * [self._empty_tile[self.half_overlap:-self.half_overlap, self.half_overlap:-self.half_overlap]]) else: z_list = [] for z_index in self.z_indices: z_list.append(read_tile(c_index, t_index, p_index, z_index)) return da.stack(z_list) self.half_overlap = self.overlap[0] // 2 #get spatial layout of position indices zero_min_row_col = (self.row_col_array - np.nanmin(self.row_col_array, axis=0)) row_col_mat = np.nan * np.ones([int(np.nanmax(zero_min_row_col[:, 0])) + 1, int(np.nanmax(zero_min_row_col[:, 1])) + 1]) rows = zero_min_row_col[self.position_indices][:, 0] cols = zero_min_row_col[self.position_indices][:, 1] #mask in case some positions were corrupted mask = np.logical_not(np.isnan(rows)) row_col_mat[rows[mask].astype(np.int), cols[mask].astype(np.int)] = self.position_indices[mask] total = self.time_indices.size * self.channel_indices.size * row_col_mat.shape[0] * row_col_mat.shape[1] count = 1 stacks = [] for t_index in self.time_indices: stacks.append([]) for c_index in self.channel_indices: blocks = [] for row in row_col_mat: blocks.append([]) for p_index in row: print('\rAdding data chunk {} of {}'.format(count, total), end='') count += 1 blocks[-1].append(z_stack(c_index, t_index, p_index)) stacks[-1].append(da.block(blocks)) print('\rDask array opened') return da.stack(stacks)
def _read_delayed(self) -> da.core.Array: # Load Tiff with TiffFile(self._file) as tiff: # Check each scene has the same shape # If scene shape checking fails, use the specified scene and update # operating shape scenes = tiff.series operating_shape = scenes[0].shape if not self._scene_shape_is_consistent(tiff, S=self.specific_s_index): operating_shape = scenes[self.specific_s_index].shape scenes = [scenes[self.specific_s_index]] # Get sample yx plane sample = scenes[0].pages[0].asarray() # Combine length of scenes and operating shape # Replace YX dims with empty dimensions operating_shape = (len(scenes), *operating_shape) if scenes[0].keyframe.samplesperpixel != 1: # if it's a multichannel (RGB) we need to pull in the channels as well operating_shape = operating_shape[:-3] + (1, 1, 1) else: # the data is a 2D (Y, X) so read 2D planes operating_shape = operating_shape[:-2] + (1, 1) # Make ndarray for lazy arrays to fill lazy_arrays = np.ndarray(operating_shape, dtype=object) for all_page_index, (np_index, _) in enumerate(np.ndenumerate(lazy_arrays)): # Scene index is the first index in np_index scene_index = np_index[0] # This page index is current enumeration divided by scene index + 1 # For example if the image has 10 Z slices and 5 scenes, there # would be 50 total pages this_page_index = all_page_index // (scene_index + 1) # Fill the numpy array with the delayed arrays lazy_arrays[np_index] = da.from_delayed( delayed(TiffReader._imread)(self._file, scene_index, this_page_index), shape=sample.shape, dtype=sample.dtype, ) # Convert the numpy array of lazy readers into a dask array data = da.block(lazy_arrays.tolist()) # Only return the scene dimension if multiple scenes are present if len(scenes) == 1: data = data[0, :] return data
def recurse_axes(loop_axes, point_axes): if len(loop_axes.values()) == 0: print('\rAdding data chunk {} of {}'.format(self._count, total), end='') self._count += 1 if None not in point_axes.values() and self.has_image(**point_axes): return self.read_image(**point_axes, memmapped=True) else: # return np.zeros((self.image_height, self.image_width), self.dtype) return self._empty_tile else: #do position first because it makes stitching faster axis = 'position' if 'position' in loop_axes.keys() and stitched else list(loop_axes.keys())[0] remaining_axes = loop_axes.copy() del remaining_axes[axis] if axis == 'position' and stitched: #Stitch tiles acquired in a grid self.half_overlap = self.overlap[0] // 2 # get spatial layout of position indices zero_min_row_col = (self.row_col_array - np.nanmin(self.row_col_array, axis=0)) row_col_mat = np.nan * np.ones( [int(np.nanmax(zero_min_row_col[:, 0])) + 1, int(np.nanmax(zero_min_row_col[:, 1])) + 1]) positions_indices = np.array(list(loop_axes['position'])) rows = zero_min_row_col[positions_indices][:, 0] cols = zero_min_row_col[positions_indices][:, 1] # mask in case some positions were corrupted mask = np.logical_not(np.isnan(rows)) row_col_mat[rows[mask].astype(np.int), cols[mask].astype(np.int)] = positions_indices[mask] blocks = [] for row in row_col_mat: blocks.append([]) for p_index in row: print('\rAdding data chunk {} of {}'.format(self._count, total), end='') valed_axes = point_axes.copy() valed_axes[axis] = int(p_index) if not np.isnan(p_index) else None blocks[-1].append(da.stack(recurse_axes(remaining_axes, valed_axes))) if self.rgb: stitched_array = np.concatenate( [np.concatenate(row, axis=len(blocks[0][0].shape) - 2) for row in blocks], axis=len(blocks[0][0].shape) - 3) else: stitched_array = da.block(blocks) return stitched_array else: blocks = [] for val in loop_axes[axis]: valed_axes = point_axes.copy() valed_axes[axis] = val blocks.append(recurse_axes(remaining_axes, valed_axes)) return blocks
def create_array(self, name, shape, chunksize, dtype, timedim): chunks_in_each_dim = [ shape[i] // chunksize[i] for i in range(len(shape)) ] l = list(itertools.product(*[range(i) for i in chunks_in_each_dim])) items = [] for m in l: f = Future(key=("deisa-" + name, m), inform=True, deisa=True) d = da.from_delayed(dask.delayed(f), shape=chunksize, dtype=dtype) items.append([list(m), d]) ll = self.array_sort(items) arrays = da.block(ll) return arrays
def mosaic(ctx, path, screen_size): """ Generate mosaic for each layer. \f Args: path (str): path to the dataset size (str, optional): screen size to fit the result in """ show_trace = logger.getEffectiveLevel() <= logging.DEBUG ds = open_dataset(path, show_trace=show_trace) _, dy, dx = ds.voxel_size iz = 0 for tz, ds_xy in TiledDatasetIterator(ds, axes="z", return_key=True): if tz: logger.info(f"iterate over z tile, {tz}") # populating layers layer = [] for ds_x in TiledDatasetIterator(ds_xy, axes="y", return_key=False): row = [] for uuid in TiledDatasetIterator(ds_x, axes="x", return_key=False): row.append(ds[uuid]) layer.append(row) layer = da.block(layer) sampler = None for mosaic in layer: if sampler is None: ratio = _estaimte_resize_ratio(mosaic, resolution=screen_size) sampler = (slice(None, None, ratio), ) * 2 mosaic = mosaic[sampler] print(iz) tifffile.imwrite( f"mosaic_z{iz:05}.tif", mosaic, imagej=True, resolution=(dx, dy), metadata={"unit": "um"}, ) iz += 1
def open_RoughScan(self): # Open RoughScan tiffs filenames = self.filenames comp_sets = dict() for fn in filenames: # Break up filename into components comp_ = path.basename(fn)[:-5].split("_") for i, comp in enumerate(comp_): comp_sets.setdefault(i,set()) comp_sets[i].add(comp) shape = imageio.imread(filenames[0]).shape lazy_arrays = [dask.delayed(imageio.imread)(fn) for fn in filenames] lazy_arrays = [da.from_delayed(x, shape=shape, dtype='int16') for x in lazy_arrays] #images = [imageio.imread(fn) for fn in filenames] # Organize images #0 channel, 1 RoughScan, 2 x_step, 3 obj_step fn_comp_sets = list(comp_sets.values()) for i in [0,2]: fn_comp_sets[i] = [int(x[1:]) for x in fn_comp_sets[i]] fn_comp_sets = list(map(sorted, fn_comp_sets)) remap_comps = [fn_comp_sets[0], [1], fn_comp_sets[2]] a = np.empty(tuple(map(len, remap_comps)), dtype=object) for fn, x in zip(filenames, lazy_arrays): comp_ = path.basename(fn)[:-5].split("_") channel = fn_comp_sets[0].index(int(comp_[0][1:])) x_step = fn_comp_sets[2].index(int(comp_[2][1:])) a[channel, 0, x_step] = x # Label array dim_names = ['channel', 'row', 'col'] channels = [int(ch) for ch in fn_comp_sets[0]] coord_values = {'channel':channels} im = xr.DataArray(da.block(a.tolist()), dims = dim_names, coords = coord_values, name = 'RoughScan') im = im.assign_attrs(first_group = 0, machine = '', scale=1, overlap=0, fixed_bg = 0) self.im = im.sel(row=slice(64,None)) return len(fn_comp_sets[2])
def __init__(self, ld_dir, legend): # read legend self.legend = pd.read_table(legend, header=None) self.legend.columns = ['CHR', 'SNP', 'CM', 'BP', 'A1', 'A2'] # read ld self.ld_list = [] for chr_i in range(1, 23): chr_ld_dir = join(ld_dir, str(chr_i)) part_info = pd.read_table(join(chr_ld_dir, 'part.info'), header=None, sep='\t', names=['row', 'col']) # get last_index to determine shape last_ld = np.load( join(chr_ld_dir, 'part_{}.npy'.format(len(part_info)))) info_end = int(part_info['row'][len(part_info) - 1].split('-')[1]) index_end = int( part_info['row'][len(part_info) - 1].split('-')[0]) + last_ld.shape[0] ld_len = int(np.sqrt(len(part_info))) ld = np.zeros([ld_len, ld_len]).tolist() for part_i, part in part_info.iterrows(): row_start, row_end = [ int(i) for i in part_info['row'][part_i].split('-') ] col_start, col_end = [ int(i) for i in part_info['col'][part_i].split('-') ] if row_end == info_end: row_end = index_end if col_end == info_end: col_end = index_end local_ld = dask.delayed(np.load)(join( chr_ld_dir, 'part_{}.npy'.format(part_i + 1))) local_ld = da.from_delayed(local_ld, shape=(row_end - row_start, col_end - col_start), dtype=np.float64) ld[int(part_i / ld_len)][part_i % ld_len] = local_ld ld = da.block(ld) self.ld_list.append(ld)
def _mk_dask_from_delayed(shape, chunking, dtype='float32', filename=None, value=None): """ Create a dask array by combining individually created blocks If filename is not None will load from file using np.memmap otherwise will generate numbered partitions using np.ones * chunk_idx or partitions of uniform value if value is not None """ if filename is not None: create = dask.delayed(_mmap_load_chunk, name='create_chunk', pure=True, traverse=False) filename = pathlib.Path(filename) else: create = dask.delayed(_create_chunk, name='create_chunk', pure=True, traverse=False) slices_per_dim = _get_block_slices(chunking, shape) blocks = [] # rightmost advances fastest with itertools.product for chunk_idx, chunk_slices in enumerate( itertools.product(*slices_per_dim)): chunk_value = chunk_idx if value is None else value chunk_shape = _slices_to_chunk_shape(chunk_slices, shape) chunk = dask.array.from_delayed(create(dataset_shape=shape, chunk_shape=chunk_shape, dtype=dtype, value=chunk_value, filename=filename, sl=chunk_slices), shape=chunk_shape, dtype=dtype) blocks.append(chunk) nblocks_per_dim = tuple(len(ss) for ss in slices_per_dim) blocks = _reshape_list(blocks, nblocks_per_dim) return da.block(blocks)
def dask_data(self) -> da.core.Array: # Construct delayed many image reads if self._dask_data is None: try: with imageio.get_reader(self._file) as reader: # Store length as it is used a bunch image_length = reader.get_length() # Handle single image formats like png, jpeg, etc if image_length == 1: self._dask_data = da.from_array( self._get_data(self._file, 0)) # Handle many image formats like gif, mp4, etc elif image_length > 1: # Get a sample image sample = self._get_data(self._file, 0) # Create operating shape for the final dask array by prepending image length to a tuple of # ones that is the same length as the sample shape operating_shape = (image_length, ) + ( (1, ) * len(sample.shape)) # Create numpy array of empty arrays for delayed get data functions lazy_arrays = np.ndarray(operating_shape, dtype=object) for indicies, _ in np.ndenumerate(lazy_arrays): lazy_arrays[indicies] = da.from_delayed( delayed(self._get_data)(self._file, indicies[0]), shape=sample.shape, dtype=sample.dtype) # Block them into a single dask array self._dask_data = da.block(lazy_arrays.tolist()) # Catch all other image types as unsupported # https://imageio.readthedocs.io/en/stable/userapi.html#imageio.core.format.Reader.get_length else: exceptions.UnsupportedFileFormatError(self._file) # Reraise unsupported file format except exceptions.UnsupportedFileFormatError: raise exceptions.UnsupportedFileFormatError(self._file) return self._dask_data
def load_task_daskarray(self, task, index=None, chunks=None): # Load proc datasets proc_dsets = np.empty(len(self.proc_files), dtype=object) for i, proc_file in enumerate(self.proc_files): # Load dataset dset = proc_file['tasks'][task] # Cast to dask array if chunks is None: chunks = dset.chunks dset = da.from_array(dset, chunks=chunks) if index is not None: dset = dset[index] proc_dsets[i] = dset # Shape into nested list proc_dsets = proc_dsets.reshape(self.procs.shape) proc_dsets = proc_dsets.tolist() # Build using dask blocking dset = da.block(proc_dsets) return dset
def _chunk_numpy_array(data, chunk_size): """ Convert a numpy array into Dask array with chunks of given size. The function splits the array into chunks along axes 0 and 1. If the array has more than 2 dimensions, then the remaining dimensions are not chunked. Note, that `dask_array = da.array(data, chunks=...)` will set the chunk size, but not split the data into chunks, therefore the array can not be loaded block by block by workers controlled by a distributed scheduler. Parameters ---------- data: ndarray(float), 2 or more dimensions XRF map of the shape `(ny, nx, ne)`, where `ny` and `nx` represent the image size and `ne` is the number of points in spectra chunk_size: tuple(int, int) or list(int, int) Chunk size for axis 0 and 1: `(chunk_y, chunk_x`). The function will accept chunk size values that are larger then the respective `data` array dimensions. Returns ------- data_dask: dask.array Dask array with the given chunk size """ chunk_y, chunk_x = chunk_size ny, nx = data.shape[0:2] chunk_y, chunk_x = min(chunk_y, ny), min(chunk_x, nx) def _get_slice(n1, n2): data_slice = data[slice(n1 * chunk_y, min(n1 * chunk_y + chunk_y, ny)), slice(n2 * chunk_x, min(n2 * chunk_x + chunk_x, nx)), ] # Wrap the slice into a list wiht appropriate dimensions for _ in range(2, data.ndim): data_slice = [data_slice] return data_slice # Chunk the numpy array and assemble it as a dask array data_dask = da.block( [[_get_slice(_1, _2) for _2 in range(int(math.ceil(nx / chunk_x)))] for _1 in range(int(math.ceil(ny / chunk_y)))]) return data_dask
def parse_regex_tiff(glob_filenames, lazy_arrays): # Get various dimensions # THIS IS FOR PARSING BY SCAN_ITER AND CHANNELS # e.g. Scan_Iter_0000_CamA_ch0_CAM1_stack0000_488nm_0000000msec_0016966725msecAbs_000x_000y_000z_0000t.tif glob_filenames_terminal = [file.split('\\')[-1] for file in glob_filenames] fn_comp_sets = dict() for fn in glob_filenames_terminal: for i, comp in enumerate(os.path.splitext(fn)[0].split("_")): fn_comp_sets.setdefault(i, set()) fn_comp_sets[i].add(comp) fn_comp_sets = list(map(sorted, fn_comp_sets.values())) remap_comps = [ dict(map(reversed, enumerate( fn_comp_sets[2]))), # MUST be the index for scan_iter, e.g. '0003' dict(map(reversed, enumerate( fn_comp_sets[4]))) # MUST be the index for channel, e.g. 'ch0' ] # Create an empty object array to organize each chunk that loads a TIFF b = np.empty(tuple(map(len, remap_comps)) + (1, 1, 1), dtype=object) for fn, x in zip(glob_filenames_terminal, lazy_arrays): scan_iter = int(fn[fn.index("Scan_Iter_") + 10:fn.index("_Cam")].split("_")[0]) channel = int(fn[fn.index("_ch") + 3:].split("_")[0]) print(scan_iter, channel) b[scan_iter, channel, 0, 0, 0] = x # YOU MUST HAVE SIMILAR CHANNEL TO SCAN_ITER PATTERNS OR ELSE THE PROCESS WILL FAIL # e.g. every Scan_Iter_ must have 8x ch0 and 4x ch1. Deviate from this pattern will result in an exception! # Stitch together the many blocks into a single array b = da.block(b.tolist()) return b
def load_leica_frames(df, idx_mapper, coords=None, chunkby_dims='CZ'): """ Lazily load single image leica tiffs into an xarray.DataArray. Parameters ---------- df : pandas.DataFrame Data frame containing data file names in a column called "filename". idx_mapper : callable or pandas.DataFrame Means to map data files to the correct dimension index. If callable will be used by df.apply. If dataframe, will be joined to df directly. coords : dict or None, default None Coordinates for the dataarray. chunkby_dims : str, default "CZ" Dimensions along which to chunk the dask array. XY will automatically be chunked together. Returns ------- x_data : xarry.DataArray Dask backed data array containing leica images. Will have STCZYX dims. """ if callable(idx_mapper): df = df.join(df.apply(idx_mapper, axis=1, result_type='expand')) elif isinstance(idx_mapper, pd.DataFrame): df = df.join(idx_mapper) else: raise TypeError( "Must provide a callable to map names to indices or a pandas dataframe containing the indices" ) # ordered_cols = [df.columns[0]]+list('STCZ') # df = df[ordered_cols] group_dims = [x for x in df.columns[1:] if x not in chunkby_dims] # if you end early there might not be the same number of frames in each pos # cutoff at the worst case scenario so things can be rectangular cutoffs = df.groupby('S').nunique().min().drop('filename') df = df.loc[(df.loc[:, ~df.columns.isin(['S', 'filename'])] < cutoffs).all('columns')] chunks = np.zeros(df[group_dims].nunique().values, dtype='object') for idx, val in df.groupby(group_dims): darr = da.from_zarr(tiff.imread(val.filename.tolist(), aszarr=True)).rechunk(-1) # shape = tuple(cutoffs[x] for x in chunkby_dims) + darr.shape[-2:] shape = tuple(x for i, x in cutoffs.iteritems() if i in chunkby_dims) + darr.shape[-2:] # print(idx, shape) darr = darr.reshape(shape) chunks[idx] = darr chunks = np.expand_dims(chunks, tuple(range(-1, -len(chunkby_dims) - 3, -1))) d_data = da.block(chunks.tolist()) x_data = xr.DataArray( d_data, dims=group_dims + [x for x in df.columns if x in chunkby_dims] + ['Y', 'X'], ) if coords is not None: x_data = x_data.assign_coords(coords) x_data = x_data.transpose('S', 'T', 'C', ..., 'Z', 'Y', 'X') return x_data
def dask_data(self) -> da.core.Array: """ Read a TIFF image file as a delayed dask array where each chunk of the constructed array is a delayed YX plane. Returns ------- img: dask.array.core.Array The constructed delayed YX plane dask array. """ if self._dask_data is None: # Load Tiff with TiffFile(self._file) as tiff: # Check each scene has the same shape # If scene shape checking fails, use the specified scene and update operating shape scenes = tiff.series operating_shape = scenes[0].shape for scene in scenes: if scene.shape != operating_shape: operating_shape = scenes[self.specific_s_index].shape scenes = [scenes[self.specific_s_index]] log.info( f"File contains variable dimensions per scene, " f"selected scene: {self.specific_s_index} for data retrieval." ) break # Get sample yx plane sample = scenes[0].pages[0].asarray() # Combine length of scenes and operating shape # Replace YX dims with empty dimensions operating_shape = (len(scenes), *operating_shape) operating_shape = operating_shape[:-2] + (1, 1) # Make ndarray for lazy arrays to fill lazy_arrays = np.ndarray(operating_shape, dtype=object) for all_page_index, (np_index, _) in enumerate( np.ndenumerate(lazy_arrays)): # Scene index is the first index in np_index scene_index = np_index[0] # This page index is current enumeration divided by scene index + 1 # For example if the image has 10 Z slices and 5 scenes, there would be 50 total pages this_page_index = all_page_index // (scene_index + 1) # Fill the numpy array with the delayed arrays lazy_arrays[np_index] = da.from_delayed(delayed( TiffReader._imread)(self._file, scene_index, this_page_index), shape=sample.shape, dtype=sample.dtype) # Convert the numpy array of lazy readers into a dask array data = da.block(lazy_arrays.tolist()) # Only return the scene dimension if multiple scenes are present if len(scenes) == 1: data = data[0, :] # Set _dask_data self._dask_data = data return self._dask_data
def create_cf_map(mxds,gcf_dataset,beam_map,cf_beam_pair_id,pa,cf_pa_centers,chan_map, cf_pb_freq,cf_w,cf_pointing,pointing_ra_dec,sel_parms): import itertools from ._imaging_utils._general import _ndim_list from ._imaging_utils._dask_utils import _tree_combine_list, _find_unique_subset vis_dataset = mxds.attrs[sel_parms['xds']] n_chunks_in_each_dim = vis_dataset[sel_parms["data"]].data.numblocks chunk_sizes = vis_dataset[sel_parms["data"]].chunks w = vis_dataset.UVW[:,:,2] iter_chunks_indx = itertools.product(np.arange(n_chunks_in_each_dim[0]), np.arange(n_chunks_in_each_dim[1]), np.arange(n_chunks_in_each_dim[2])) ant_1 = vis_dataset.ANTENNA1 ant_2 = vis_dataset.ANTENNA2 ant_ids = mxds.ANTENNA.antenna_id.data beam_ids = mxds.beam_ids.data freq_chan = vis_dataset.chan.data n_chunks = np.prod(n_chunks_in_each_dim[:3]) cf_map_list = _ndim_list((n_chunks_in_each_dim[0],n_chunks_in_each_dim[1],n_chunks_in_each_dim[2])) cf_parms_indx_list = _ndim_list((n_chunks,)) a_parms_indx_list = _ndim_list((n_chunks,)) w_parms_indx_list = _ndim_list((n_chunks,)) #pg does not need chan dim, there will be redundant calculations. Maybe split later pg_map_list = _ndim_list((n_chunks_in_each_dim[0],n_chunks_in_each_dim[1])) pg_parms_indx_list = _ndim_list((n_chunks,)) i_chunk = 0 for c_time, c_baseline, c_chan in iter_chunks_indx: #print('c_time,c_baseline,c_chan',c_time,c_baseline,c_chan) chunk_cf_and_pg = dask.delayed(_cf_map_jit)( beam_map.data.partitions[c_baseline], beam_ids, cf_beam_pair_id.data, pa.data.partitions[c_time,0], cf_pa_centers.data, ant_1.data.partitions[c_time,c_baseline], ant_2.data.partitions[c_time,c_baseline], ant_ids, chan_map.data.partitions[c_chan], freq_chan, cf_pb_freq.data, w.data.partitions[c_time,c_baseline], cf_w.data, pointing_ra_dec.data.partitions[c_time,0], cf_pointing.data) #w_indx_arr, a_indx_arr, cf_indx_arr, cf_map, pg_indx_arr, pg_map w_parms_indx_list[i_chunk] = chunk_cf_and_pg[0] #can't do from_delayed since number of elements are unkown a_parms_indx_list[i_chunk] = chunk_cf_and_pg[1] #can't do from_delayed since number of elements are unkown cf_parms_indx_list[i_chunk] = chunk_cf_and_pg[2] #can't do from_delayed since number of elements are unkown cf_map_list[c_time][c_baseline][c_chan] = da.from_delayed(chunk_cf_and_pg[3], (chunk_sizes[0][c_time],chunk_sizes[1][c_baseline],chunk_sizes[2][c_chan]),dtype=np.int) pg_parms_indx_list[i_chunk] = chunk_cf_and_pg[4] #can't do from_delayed since number of elements are unkown pg_map_list[c_time][c_baseline] = da.from_delayed(chunk_cf_and_pg[5], (chunk_sizes[0][c_time],chunk_sizes[1][c_baseline]),dtype=np.int) i_chunk = i_chunk+1 cf_map = da.block(cf_map_list) #Awesome function pg_map = da.block(pg_map_list) w_parms_indx = _tree_combine_list(w_parms_indx_list,_find_unique_subset) a_parms_indx = _tree_combine_list(a_parms_indx_list,_find_unique_subset) cf_parms_indx = _tree_combine_list(cf_parms_indx_list,_find_unique_subset) pg_parms_indx = _tree_combine_list(pg_parms_indx_list,_find_unique_subset) #list_of_dask_delayed = [cf_map,pg_map,cf_parms_indx,pg_parms_indx,w_parms_indx,a_parms_indx] list_of_arrs= dask.compute([cf_map,pg_map,cf_parms_indx,pg_parms_indx,w_parms_indx,a_parms_indx]) cf_map,pg_map,cf_parms_indx,pg_parms_indx,w_parms_indx,a_parms_indx = list_of_arrs[0] time_chunksize = vis_dataset[sel_parms['data']].chunks[0][0] baseline_chunksize = vis_dataset[sel_parms['data']].chunks[1][0] chan_chunksize = vis_dataset[sel_parms['data']].chunks[2][0] cf_map = da.from_array(cf_map,chunks=(time_chunksize,baseline_chunksize,chan_chunksize)) w_parms_indx = da.from_array(w_parms_indx,chunks=(1,1)) a_parms_indx = da.from_array(a_parms_indx,chunks=(1,6)) cf_parms_indx = da.from_array(cf_parms_indx,chunks=(1,3)) pg_parms_indx = da.from_array(pg_parms_indx,chunks=(1,3)) pg_map = da.from_array(pg_map,chunks=(time_chunksize,baseline_chunksize)) gcf_dataset = xr.Dataset() coords = {'gcf_indx':['a','w','gcf_flat'],'pg_indx':['p1','p2','pg_flat'],'a_indx':['pa1','b1','pa2','b2','c','a_flat'],'w_indx':['w']} gcf_dataset = gcf_dataset.assign_coords(coords) gcf_dataset['GCF_MAP'] = xr.DataArray(cf_map, dims=('time','baseline','chan')) gcf_dataset['GCF_PARMS_INDX'] = xr.DataArray(cf_parms_indx, dims=('gcf','gcf_indx')) gcf_dataset['W_PARMS_INDX'] = xr.DataArray(w_parms_indx, dims=('w','w_indx')) gcf_dataset['A_PARMS_INDX'] = xr.DataArray(a_parms_indx, dims=('a','a_indx')) gcf_dataset['GCF_A_PA'] = cf_pa_centers gcf_dataset['GCF_A_FREQ'] = cf_pb_freq gcf_dataset['GCF_A_BEAM_ID'] = cf_beam_pair_id gcf_dataset['GCF_W'] = cf_w gcf_dataset['PG_MAP'] = xr.DataArray(pg_map, dims=('time','baseline')) gcf_dataset['PG_PARMS_INDX'] = xr.DataArray(pg_parms_indx, dims=('pg','pg_indx')) gcf_dataset['PG_POINTING'] = cf_pointing ''' cf_map = da.block(cf_map_list) #Awesome function pg_map = da.block(pg_map_list) w_parms_indx = da.from_delayed(_tree_combine_list(w_parms_indx_list,_find_unique_subset),shape=(np.nan,1),dtype=int) #(nan,1) first dim length is unkown a_parms_indx = da.from_delayed(_tree_combine_list(a_parms_indx_list,_find_unique_subset),shape=(np.nan,6),dtype=int) #(nan,6) first dim length is unkown cf_parms_indx = da.from_delayed(_tree_combine_list(cf_parms_indx_list,_find_unique_subset),shape=(np.nan,3),dtype=int) #(nan,3) first dim length is unkown pg_parms_indx = da.from_delayed(_tree_combine_list(pg_parms_indx_list,_find_unique_subset),shape=(np.nan,3),dtype=int) #(nan,3) first dim length is unkown #w_parms_indx = da.from_delayed(_tree_combine_list(w_parms_indx_list,_find_unique_subset),shape=(np.nan,1),dtype=int) #(nan,1) first dim length is unkown #a_parms_indx = da.from_delayed(_tree_combine_list(a_parms_indx_list,_find_unique_subset),shape=(np.nan,6),dtype=int) #(nan,6) first dim length is unkown #cf_parms_indx = da.from_delayed(_tree_combine_list(cf_parms_indx_list,_find_unique_subset),shape=(280,7),dtype=int) #(nan,3) first dim length is unkown #pg_parms_indx = da.from_delayed(_tree_combine_list(pg_parms_indx_list,_find_unique_subset),shape=(23,3),dtype=int) #(nan,3) first dim length is unkown gcf_dataset = xr.Dataset() coords = {'gcf_indx':['a','w','gcf_flat'],'pg_indx':['p1','p2','pg_flat'],'a_indx':['pa1','b1','pa2','b2','c','a_flat'],'w_indx':['w']} gcf_dataset = gcf_dataset.assign_coords(coords) gcf_dataset['GCF_MAP'] = xr.DataArray(cf_map, dims=('time','baseline','chan')) gcf_dataset['GCF_PARMS_INDX'] = xr.DataArray(cf_parms_indx, dims=('gcf','gcf_indx')) gcf_dataset['W_PARMS_INDX'] = xr.DataArray(w_parms_indx, dims=('w','w_indx')) gcf_dataset['A_PARMS_INDX'] = xr.DataArray(a_parms_indx, dims=('a','a_indx')) gcf_dataset['GCF_A_PA'] = cf_pa_centers gcf_dataset['GCF_A_FREQ'] = cf_pb_freq gcf_dataset['GCF_A_BEAM_ID'] = cf_beam_pair_id gcf_dataset['GCF_W'] = cf_w gcf_dataset['PG_MAP'] = xr.DataArray(pg_map, dims=('time','baseline')) gcf_dataset['PG_PARMS_INDX'] = xr.DataArray(pg_parms_indx, dims=('pg','pg_indx')) gcf_dataset['PG_POINTING'] = cf_pointing ''' #dask.visualize(gcf_dataset,'make_gcf_coords') return gcf_dataset
sample = klb.readfull(fnames[0]) #Sample image #Generate lazy arrays lazy_arrays = [dask.delayed(klb.readfull)(fn) for fn in fnames] lazy_arrays = [ da.from_delayed(x, shape=sample.shape, dtype=sample.dtype) for x in lazy_arrays ] #Generate empty object array to organize each chunk that loads the 3D volume a = np.empty((2, 2701, 1, 1, 1), dtype=object) #Dimension of (view,timepoint,Z,Y,X) #a = np.empty((2,10,1,1,1), dtype=object) #Dimension of (view,timepoint,Z,Y,X) for fn, x in zip(fnames, lazy_arrays): view = int(fn[fn.index("_CM") + 3:].split("_")[0]) timepoint = int(fn[fn.index("_TM") + 3:].split("_")[0]) a[view, timepoint, 0, 0, 0] = x print('CM', view, 'TM', timepoint) #Stitch together all these blocks into a single N-dimensional array a = da.block(a.tolist()) a = a.rechunk((1, 1, 75, 128, 308)) print(type(a), a.shape, a.dtype, a.chunksize, 'Size', round(a.size / (1024**3), 2), 'GB') if ch == 0: a.to_zarr(join(outPath, 'membrane-v2.zarr'), compressor=BZ2(level=9)) elif ch == 1: a.to_zarr(join(outPath, 'nuclei-v2.zarr'), compressor=BZ2(level=9)) print('Took', round(time.time() - t0, 2), 'sec')
def _daread( img: Path, offsets: List[np.ndarray], read_lengths: np.ndarray, chunk_by_dims: List[str] = [ Dimensions.SpatialZ, Dimensions.SpatialY, Dimensions.SpatialX, ], S: int = 0, ) -> Tuple[da.core.Array, str]: """ Read a LIF image file as a delayed dask array where certain dimensions act as the chunk size. Parameters ---------- img: Path The filepath to read. offsets: List[numpy.ndarray] A List of numpy ndarrays offsets, see _compute_offsets for more details. read_lengths: numpy.ndarray A 1D numpy array of read lengths, the index is the scene index chunk_by_dims: List[str] The dimensions to use as the for mapping the chunks / blocks. Default: [Dimensions.SpatialZ, Dimensions.SpatialY, Dimensions.SpatialX] Note: SpatialY and SpatialX will always be added to the list if not present. S: int If the image has different dimensions on any scene from another, the dask array construction will fail. In that case, use this parameter to specify a specific scene to construct a dask array for. Default: 0 (select the first scene) Returns ------- img: dask.array.core.Array The constructed dask array where certain dimensions are chunked. dims: str The dimension order as a string. """ # Get image dims indicies lif = LifFile(filename=img) image_dim_indices = LifReader._dims_shape(lif=lif) # Catch inconsistent scene dimension sizes if len(image_dim_indices) > 1: # Choose the provided scene try: image_dim_indices = image_dim_indices[S] log.info( f"File contains variable dimensions per scene, " f"selected scene: {S} for data retrieval." ) except IndexError: raise exceptions.InconsistentShapeError( f"The LIF image provided has variable dimensions per scene. " f"Please provide a valid index to the 'S' parameter to create a " f"dask array for the index provided. " f"Provided scene index: {S}. Scene index range: " f"0-{len(image_dim_indices)}." ) else: # If the list is length one that means that all the scenes in the image # have the same dimensions # Just select the first dictionary in the list image_dim_indices = image_dim_indices[0] # Uppercase dimensions provided to chunk by dims chunk_by_dims = [d.upper() for d in chunk_by_dims] # Always add Y and X dims to chunk by dims because that is how LIF files work if Dimensions.SpatialY not in chunk_by_dims: log.info( "Adding the Spatial Y dimension to chunk by dimensions as it was not " "found." ) chunk_by_dims.append(Dimensions.SpatialY) if Dimensions.SpatialX not in chunk_by_dims: log.info( "Adding the Spatial X dimension to chunk by dimensions as it was not " "found." ) chunk_by_dims.append(Dimensions.SpatialX) # Setup read dimensions for an example chunk first_chunk_read_dims = {} for dim, (dim_begin_index, dim_end_index) in image_dim_indices.items(): # Only add the dimension if the dimension isn't a part of the chunk if dim not in chunk_by_dims: # Add to read dims first_chunk_read_dims[dim] = dim_begin_index # Read first chunk for information used by dask.array.from_delayed sample, sample_dims = LifReader._get_array_from_offset( im_path=img, offsets=offsets, read_lengths=read_lengths, meta=lif.xml_root, read_dims=first_chunk_read_dims, ) # Get the shape for the chunk and operating shape for the dask array # We also collect the chunk and non chunk dimension ordering so that we can # swap the dimensions after we block the dask array together. sample_chunk_shape = [] operating_shape = [] non_chunk_dimension_ordering = [] chunk_dimension_ordering = [] for i, dim_info in enumerate(sample_dims): # Unpack dim info dim, size = dim_info # If the dim is part of the specified chunk dims then append it to the # sample, and, append the dimension to the chunk dimension ordering if dim in chunk_by_dims: sample_chunk_shape.append(size) chunk_dimension_ordering.append(dim) # Otherwise, append the dimension to the non chunk dimension ordering, and, # append the true size of the image at that dimension else: non_chunk_dimension_ordering.append(dim) operating_shape.append( image_dim_indices[dim][1] - image_dim_indices[dim][0] ) # Convert shapes to tuples and combine the non and chunked dimension orders as # that is the order the data will actually come out of the read data as sample_chunk_shape = tuple(sample_chunk_shape) blocked_dimension_order = ( non_chunk_dimension_ordering + chunk_dimension_ordering ) # Fill out the rest of the operating shape with dimension sizes of 1 to match # the length of the sample chunk. When dask.block happens it fills the # dimensions from inner-most to outer-most with the chunks as long as the # dimension is size 1. Basically, we are adding empty dimensions to the # operating shape that will be filled by the chunks from dask operating_shape = tuple(operating_shape) + (1,) * len(sample_chunk_shape) # Create empty numpy array with the operating shape so that we can iter through # and use the multi_index to create the readers. lazy_arrays = np.ndarray(operating_shape, dtype=object) # We can enumerate over the multi-indexed array and construct read_dims # dictionaries by simply zipping together the ordered dims list and the current # multi-index plus the begin index for that plane. We then set the value of the # array at the same multi-index to the delayed reader using the constructed # read_dims dictionary. dims = [d for d in Dimensions.DefaultOrder] begin_indicies = tuple(image_dim_indices[d][0] for d in dims) for i, _ in np.ndenumerate(lazy_arrays): # Add the czi file begin index for each dimension to the array dimension # index this_chunk_read_indicies = ( current_dim_begin_index + curr_dim_index for current_dim_begin_index, curr_dim_index in zip(begin_indicies, i) ) # Zip the dims with the read indices this_chunk_read_dims = dict( zip(blocked_dimension_order, this_chunk_read_indicies) ) # Remove the dimensions that we want to chunk by from the read dims for d in chunk_by_dims: if d in this_chunk_read_dims: this_chunk_read_dims.pop(d) # Add delayed array to lazy arrays at index lazy_arrays[i] = da.from_delayed( delayed(LifReader._imread)( img, offsets, read_lengths, lif.xml_root, this_chunk_read_dims ), shape=sample_chunk_shape, dtype=sample.dtype, ) # Convert the numpy array of lazy readers into a dask array and fill the inner # most empty dimensions with chunks merged = da.block(lazy_arrays.tolist()) # Because we have set certain dimensions to be chunked and others not # we will need to transpose back to original dimension ordering # Example being, if the original dimension ordering was "SZYX" and we want to # chunk by "S", "Y", and "X" we created an array with dimensions ordering "ZSYX" transpose_indices = [] transpose_required = False for i, d in enumerate(Dimensions.DefaultOrder): new_index = blocked_dimension_order.index(d) if new_index != i: transpose_required = True transpose_indices.append(new_index) else: transpose_indices.append(i) # Only run if the transpose is actually required # The default case is "Z", "Y", "X", which _usually_ doesn't need to be # transposed because that is _usually_ # The normal dimension order of the LIF file anyway if transpose_required: merged = da.transpose(merged, tuple(transpose_indices)) # Because dimensions outside of Y and X can be in any order and present or not # we also return the dimension order string. return merged, "".join(dims)
def mosaic_process_date( date, date_files, temporary_dir, output_dir, memory, multi=False, overwrite=False, gdal_translate="gdal_translate", gdalwarp="gdalwarp", ): """Mosaic and regrid MODIS Fpar data from a given date. Args: date (str): MODIS date string, e.g. '2021034'. date_files (iterable of pathlib.Path): Files containing data for `date`. temporary_dir (pathlib.Path): Directory for temporary files. output_dir (pathlib.Path): Directory for output files. memory (int): GDAL memory in MB. Capped at 9999 MB. multi (bool): If True, add the '-multi' option to gdalwarp. overwrite (bool): If True, overwrite existing files. gdal_translate (str): gdal_translate command path. gdalwarp (str): gdalwarp command path. Returns: None or pathlib.Path: None if no processing could be done, or the filename of the processed data. """ if date == "2000225": if len(date_files) != 131: logger.warning( f"Expected 131 files for 2000225. Got {len(date_files)}.") elif date == "2002081": if len(date_files) != 179: logger.warning( f"Expected 179 files for 2002081. Got {len(date_files)}.") elif len(date_files) < min_n_tiles: logger.warning(f"Found {len(date_files)} files (tiles) for '{date}'. " f"Expected at least {min_n_tiles}.") return None # Limit to 9999 because otherwise the parameter is interpreted as bytes instead of # megabytes. memory = min(9999, memory) output_base = temporary_dir / f"{fpar_band_name}_{date}" mosaic_file = output_base.with_name(output_base.stem + "_mosaic.hdf5") mosaic_vrt_file = mosaic_file.with_suffix(".vrt") regridded_file = output_base.with_name(output_base.stem + "_0d25_raw.nc") output_file = Path(output_dir) / (output_base.stem + "_0d25.nc") # Used to convert the bounding coordinates to MODIS (m) coordinates. # NOTE: transformer.transform(lat, lon) -> (x, y) transformer = Transformer.from_crs("EPSG:4326", modis_proj) bounds_coords = defaultdict(list) # Collection of 'delayed' objects containing the data, indexed using # (horizontal, vertical) MODIS tile numbers. tile_data = {} for data_file in date_files: fpar_dataset_name = ( f"HDF4_EOS:EOS_GRID:{data_file}:MOD_Grid_MOD15A2H:{fpar_band_name}" ) qc_dataset_name = ( f"HDF4_EOS:EOS_GRID:{data_file}:MOD_Grid_MOD15A2H:{qc_band_name}") with rasterio.open(fpar_dataset_name) as dataset: tags = dataset.tags() for bound_name, axis in bound_axes.items(): bound_value = float(tags[f"{bound_name}BOUNDINGCOORDINATE"]) bounds_coords[axis].append(bound_value) tile_data[tuple( # Parse the horizontal (h) and vertical (v) tile numbers. map(int, re.search(r"h(\d{2})v(\d{2})", str(data_file)).groups()))] = da.from_delayed( delayed_read_band_data(fpar_dataset_name, qc_dataset_name), shape=tile_shape, dtype=np.uint8, ) # Get the extreme bounding values in lat lon coordinates. extreme_bounds = { axis: (min(axis_bounds), max(axis_bounds)) for axis, axis_bounds in bounds_coords.items() } logger.debug(f"{date} {extreme_bounds}") # Transform the extreme bounding values to MODIS coordinates for reprojection. modis_bounds = {} for axis, bounds in extreme_bounds.items(): modis_bounds[axis] = sorted( transformer.transform( *(0, extreme_coord)[slice(None, None, 1 if axis == "x" else -1)]) [0 if axis == "x" else 1] for extreme_coord in bounds) logger.debug(f"{date} {modis_bounds}") # Create the mosaic of MODIS tiles. # Extract all possible vertical and horizontal tile numbers. hs, vs = zip(*tile_data) data_blocks = [] # Iterate over all tiles, using existing data where possible. for v_index in range(min(vs), max(vs) + 1): data_blocks.append([]) for h_index in range(min(hs), max(hs) + 1): data_blocks[-1].append( tile_data.get( (h_index, v_index), # Use full() to pad irrelevant tiles with the invalid data marker. da.full( tile_shape, fill_value=fill_value, dtype=np.uint8, # XXX: Specifying 'chunksize' here causes the following error # when calling 'to_hdf5': # OSError: Can't write data (no appropriate function for conversion path) # chunksize=tile_shape, ), )) data = da.block(data_blocks)[::-1] if mosaic_file.is_file() and overwrite: logger.info(f"'{mosaic_file}' exists. Deleting.") mosaic_file.unlink() recalculate = False if not mosaic_file.is_file(): recalculate = True data.to_hdf5(str(mosaic_file), "/fpar") else: logger.warning(f"'{mosaic_file}' exists. Not deleting.") # Attach information about the transform prior to calling 'gdalwarp'. y_pixels_max = data.shape[0] - 1 x_pixels_max = data.shape[1] - 1 y_min, y_max = modis_bounds["y"] x_min, x_max = modis_bounds["x"] gcp_opts = [] for y_pixel, y_loc, x_pixel, x_loc in [ (0, y_min, 0, x_min), (y_pixels_max, y_max, 0, x_min), (y_pixels_max, y_max, x_pixels_max, x_max), (0, y_min, x_pixels_max, x_max), ]: # -gcp <pixel> <line> <easting> <northing> gcp_opts.append(f"-gcp {x_pixel} {y_pixel} {x_loc} {y_loc}") cmd = " ".join(( f"{gdal_translate} -of VRT -a_srs '{modis_proj}'", " ".join(gcp_opts), f'HDF5:"{mosaic_file}"://fpar {mosaic_vrt_file}', )) logger.debug(f"{date} gdal_translate cmd: {cmd}") check_output(shlex.split(cmd)) execute_gdalwarp = True if regridded_file.is_file(): if recalculate or overwrite: logger.info(f"'{regridded_file}' exists. Deleting.") regridded_file.unlink() else: logger.warning( f"'{regridded_file}' exists and '{mosaic_file}' was not changed. " "Not executing gdalwarp.") execute_gdalwarp = False if execute_gdalwarp: cmd = " ".join(( f"{gdalwarp} -s_srs '{modis_proj}' -t_srs EPSG:4326 -ot Float32", "-srcnodata 255 -dstnodata -1", "-r average", *(("-multi", ) if multi else ()), "-te -180 -90 180 90 -ts 1440 720", f"-wm {memory}", f"-of netCDF {mosaic_vrt_file} {regridded_file}", )) logger.debug(f"{date} gdalwarp cmd: {cmd}") check_output(shlex.split(cmd)) if output_file.is_file(): if execute_gdalwarp or overwrite: logger.info(f"'{output_file}' exists. Deleting.") output_file.unlink() else: logger.warning( f"'{output_file}' exists and '{regridded_file}' was not changed. " "Not carrying out final processing.") return output_file # Read the regridded file, apply scaling factor, change metadata, and write to the # output file. cube = iris.load_cube(str(regridded_file)) cube *= 0.01 cube.var_name = None cube.standard_name = None cube.long_name = "Fraction of Absorbed Photosynthetically Active Radiation" cube.units = "1" safe_cube_save(cube, output_file, temporary_dir) logger.info(f"Finished writing to '{output_file}'.") return output_file
def dask_safeslice(data, indices, chunks=None): """ COPIED FROM https://github.com/dask/dask/issues/5540#issuecomment-601150129 Added fancy indexing xarray.core.indexing.DaskIndexingAdapter Return a subset of a dask array, but with indexing applied independently to each slice of the input array, *prior* to their recombination to produce the result array. Args: * data (dask array): input data * indices (int or slice or tuple(int or slice)): required sub-section of the data. Kwargs: * chunks (list of (int or "auto")): chunking argument for 'rechunk' applied to the input. If set, forces the input to be rechunked as specified. ( This replaces the normal operation, which is to rechunk the input making the indexed dimensions undivided ). Mainly for testing on small arrays. .. note:: 'indices' currently does not support Ellipsis or newaxis. """ from collections.abc import Iterable import dask.array as da # The idea is to "push down" the indexing operation to "underneath" the # result concatenation, so it gets done _before_ that. # This 'result concatenation' is actually implicit: the _implied_ # concatenation of all the result chunks into a single output array. # We assume that any *one* chunk *can* be successfully computed. # By applying the indexing operation to each chunk, prior to the # complete result (re-)construction, we hope to make this work. # Normalise input to a list over all data dimensions. # NOTE: FOR NOW, this does not support Ellipsis. # TODO: that could easily be fixed. # Convert the slicing indices to a list of (int or slice). # ( NOTE: not supporting Ellipsis. ) if not isinstance(indices, Iterable): # Convert a single key (slice or integer) to a length-1 list. indices = [indices] else: # Convert other iterable types to lists. indices = list(indices) n_data_dims = data.ndim assert len(indices) <= n_data_dims # Extend with ":" in all the additional (trailing) dims. all_slice = slice(None) indices += (n_data_dims - len(indices)) * [all_slice] assert len(indices) == n_data_dims # Discriminate indexed and non-indexed dims. # An "indexed" dim is where input index is *anything* other than a ":". dim_is_indexed = [index != all_slice for index in indices] # Work out which indices are simple integer values. # ( by definition, all of these will be "indexed" dims ) dim_is_removed = [isinstance(key, int) for key in indices] # Replace single-value indices with length-1 indices, so the indexing # preserves all dimensions (as this makes reconstruction easier). # ( We use the above 'dim_is_removed' to correct this afterwards. ) indices = [slice(key, key + 1) if isinstance(key, int) else key for key in indices] # We will now rechunk to get "our chunks" : but these must not be divided # in dimensions affected by the requested indexing. # So we rechunk, but insist that those dimensions are kept whole. # ( Obviously, not always optimal ... ) # As the indexed dimensions will always be _reduced_ by the indexing, this # is obviously over-conservative + may give chunks which are rather too # small. Let's just ignore that problem for now! if chunks is not None: rechunk_dim_specs = list(chunks) else: rechunk_dim_specs = ["auto"] * n_data_dims for i_dim in range(n_data_dims): if dim_is_indexed[i_dim]: rechunk_dim_specs[i_dim] = -1 data = da.rechunk(data, chunks=rechunk_dim_specs) # Calculate multidimensional indexings of the original data array which # correspond to all these chunks. # Note: following the "-1"s in the above rechunking spec, the indexed dims # should all have only one chunk in them. assert all( len(data.chunks[i_dim]) == 1 for i_dim in range(n_data_dims) if dim_is_removed[i_dim] ) # Make an array of multidimensional indexes corresponding to all chunks. chunks_shape = [len(chunk_lengths) for chunk_lengths in data.chunks] chunks_shape += [n_data_dims] chunk_indices = np.zeros(chunks_shape, dtype=object) # The chunk_indices array ... # * has dimensions of n-data-dims + 1 # * has shape of "chunks-shape" + (n_data_dims,) # * each entry[i0, i1, iN-1] --> n_data_dims * slice-objects. # Pre-fill indexes array with [:, :, ...] chunk_indices[...] = all_slice # Set slice ranges for each dimension at a time. for i_dim in range(n_data_dims): # Fix all keys for this data dimension : chunk_indices[..., i_dim] dim_inds = [all_slice] * n_data_dims + [i_dim] if dim_is_indexed[i_dim]: # This is a user-indexed dim, so should be un-chunked. assert len(data.chunks[i_dim]) == 1 # Set keys for this dim to the user-requested indexing. if EMBED_INDEXES: chunk_indices[tuple(dim_inds)] = indices[i_dim] else: # Replace keys for this dim with the slice range for the # relevant chunk, for each chunk in the dim. startend_positions = np.cumsum([0] + list(data.chunks[i_dim])) starts, ends = startend_positions[:-1], startend_positions[1:] for i_key, (i_start, i_end) in enumerate(zip(starts, ends)): dim_inds[i_dim] = i_key chunk_indices[tuple(dim_inds)] = slice(i_start, i_end) # E.G. chunk_indices[:, :, 1, :][2] = slice(3,6) # Make actual addressed chunks by indexing the original array, arrange them # in the same pattern, and re-combine them all to make a result array. # This needs to be a list-of-lists construction, as da.block requires it. # ( an array of arrays is presumably too confusing ?!? ) def get_chunks(multidim_indices): if multidim_indices.ndim > 1: # Convert the "array of chunks" dims --> lists-of-lists result = [ get_chunks(multidim_indices[i_part]) for i_part in range(multidim_indices.shape[0]) ] else: # Innermost dim contains n-dims * slice-objects # Convert these into a slice of the data array. result = data.__getitem__(tuple(multidim_indices)) if not EMBED_INDEXES: # Now *also* apply the required indexing to this chunk. # It initially seemed *essential* that this be an independent # operation, so that the memory associated with the whole chunk # can be released. # But ACTUALLY this is not so, given the next step (see on). try: result = result.__getitem__(tuple(indices)) except NotImplementedError: result = data for axis, subkey in reversed(list(enumerate(tuple(indices)))): result = result[(slice(None),) * axis + (subkey,)] # AND FINALLY : apply a numpy copy to this indexed-chunk. # This is essential, to release the source chunks ?? # see: https://github.com/dask/dask/issues/3595#issuecomment-449546228 result = result.map_blocks(np.copy) return result listoflists_of_chunks = get_chunks(chunk_indices) result = da.block(listoflists_of_chunks) assert result.ndim == n_data_dims # Unchanged as 'da.block' concatenates. # Finally remove the extra dimensions for single-value indices. assert all( result.shape[i_dim] == 1 for i_dim in range(n_data_dims) if dim_is_removed[i_dim] ) all_dim_indices = [ 0 if dim_is_removed[i_dim] else all_slice for i_dim in range(n_data_dims) ] result = result.__getitem__(tuple(all_dim_indices)) return result
def phase_rotate_sgraph(vis_dataset, global_dataset, rotation_parms, sel_parms, storage_parms): """ Rotate uvw with faceting style rephasing for multifield mosaic. The specified phasecenter and field phase centers are assumed to be in the same frame. This does not support east-west arrays, emphemeris objects or objects within the nearfield. (no refocus). Parameters ---------- vis_dataset : xarray.core.dataset.Dataset input Visibility Dataset Returns ------- psf_dataset : xarray.core.dataset.Dataset """ #based on UVWMachine and FTMachine #measures/Measures/UVWMachine.cc #Important: Can not applyflags before calling rotate (uvw coordinates are also flagged). This will destroy the rotation transform. #Performance improvements apply_rotation_matrix (jit code) #print('1. numpy',vis_dataset.DATA[:,0,0,0].values) from ngcasa._ngcasa_utils._store import _store from scipy.spatial.transform import Rotation as R import numpy as np import copy import dask.array as da import xarray as xr from ngcasa._ngcasa_utils._check_parms import _check_storage_parms, _check_sel_parms, _check_existence_sel_parms from ._imaging_utils._check_imaging_parms import _check_rotation_parms import time import numba from numba import double import dask import itertools _sel_parms = copy.deepcopy(sel_parms) _rotation_parms = copy.deepcopy(rotation_parms) _storage_parms = copy.deepcopy(storage_parms) assert(_check_sel_parms(_sel_parms,{'uvw_in':'UVW','uvw_out':'UVW_ROT','data_in':'DATA','data_out':'DATA_ROT'})), "######### ERROR: sel_parms checking failed" assert(_check_existence_sel_parms(vis_dataset,{'uvw_in':_sel_parms['uvw_in'],'data_in':_sel_parms['data_in']})), "######### ERROR: sel_parms checking failed" assert(_check_rotation_parms(_rotation_parms)), "######### ERROR: rotation_parms checking failed" assert(_check_storage_parms(_storage_parms,'dataset.vis.zarr','phase_rotate')), "######### ERROR: storage_parms checking failed" assert(_sel_parms['uvw_out'] != _sel_parms['uvw_in']), "######### ERROR: sel_parms checking failed sel_parms['uvw_out'] can not be the same as sel_parms['uvw_in']." assert(_sel_parms['data_out'] != _sel_parms['data_in']), "######### ERROR: sel_parms checking failed sel_parms['data_out'] can not be the same as sel_parms['data_in']." #Phase center ra_image = _rotation_parms['image_phase_center'][0] dec_image = _rotation_parms['image_phase_center'][1] rotmat_image_phase_center = R.from_euler('XZ',[[np.pi/2 - dec_image, - ra_image + np.pi/2]]).as_matrix()[0] image_phase_center_cosine = _directional_cosine([ra_image,dec_image]) n_fields = global_dataset.dims['field'] field_names = global_dataset.field uvw_rotmat = np.zeros((n_fields,3,3),np.double) phase_rotation = np.zeros((n_fields,3),np.double) fields_phase_center = global_dataset.FIELD_PHASE_DIR.values[:,:,vis_dataset.attrs['ddi']] #print(fields_phase_center) #Create a rotation matrix for each field for i_field in range(n_fields): #Not sure if last dimention in FIELD_PHASE_DIR is the ddi number field_phase_center = fields_phase_center[i_field,:] # Define rotation to a coordinate system with pole towards in-direction # and X-axis W; by rotating around z-axis over -(90-long); and around # x-axis (lat-90). rotmat_field_phase_center = R.from_euler('ZX',[[-np.pi/2 + field_phase_center[0],field_phase_center[1] - np.pi/2]]).as_matrix()[0] uvw_rotmat[i_field,:,:] = np.matmul(rotmat_image_phase_center,rotmat_field_phase_center).T if _rotation_parms['common_tangent_reprojection'] == True: uvw_rotmat[i_field,2,0:2] = 0.0 # (Common tangent rotation needed for joint mosaics, see last part of FTMachine::girarUVW in CASA) field_phase_center_cosine = _directional_cosine(field_phase_center) phase_rotation[i_field,:] = np.matmul(rotmat_image_phase_center,(image_phase_center_cosine - field_phase_center_cosine)) chunk_sizes = vis_dataset[sel_parms["data_in"]].chunks freq_chan = da.from_array(vis_dataset.coords['chan'].values, chunks=(chunk_sizes[2][0])) n_chunks_in_each_dim = vis_dataset[_sel_parms['data_in']].data.numblocks iter_chunks_indx = itertools.product(np.arange(n_chunks_in_each_dim[0]), np.arange(n_chunks_in_each_dim[1]), np.arange(n_chunks_in_each_dim[2]), np.arange(n_chunks_in_each_dim[3])) list_of_vis_data = ndim_list(n_chunks_in_each_dim) list_of_uvw = ndim_list(n_chunks_in_each_dim[0:2]+(1,)) for c_time, c_baseline, c_chan, c_pol in iter_chunks_indx: vis_data_and_uvw = dask.delayed(apply_phasor)( vis_dataset[sel_parms["data_in"]].data.partitions[c_time, c_baseline, c_chan, c_pol], vis_dataset[sel_parms["uvw_in"]].data.partitions[c_time, c_baseline, 0], vis_dataset.field_id.data.partitions[c_time], freq_chan.partitions[c_chan], dask.delayed(uvw_rotmat), dask.delayed(phase_rotation), dask.delayed(_rotation_parms['common_tangent_reprojection'])) list_of_vis_data[c_time][c_baseline][c_chan][c_pol] = da.from_delayed(vis_data_and_uvw[0], (chunk_sizes[0][c_time], chunk_sizes[1][c_baseline], chunk_sizes[2][c_chan], chunk_sizes[3][c_pol]),dtype=np.complex128) list_of_uvw[c_time][c_baseline][0] = da.from_delayed(vis_data_and_uvw[1],(chunk_sizes[0][c_time], chunk_sizes[1][c_baseline], 3),dtype=np.float64) vis_dataset[_sel_parms['data_out']] = xr.DataArray(da.block(list_of_vis_data), dims=vis_dataset[_sel_parms['data_in']].dims) vis_dataset[_sel_parms['uvw_out']] = xr.DataArray(da.block(list_of_uvw), dims=vis_dataset[_sel_parms['uvw_in']].dims) #dask.visualize(vis_dataset[_sel_parms['uvw_out']],filename='uvw_rot_dataset') #dask.visualize(vis_dataset[_sel_parms['data_out']],filename='vis_rot_dataset') #dask.visualize(vis_dataset,filename='vis_dataset_before_append_custom_graph') list_xarray_data_variables = [vis_dataset[_sel_parms['uvw_out']],vis_dataset[_sel_parms['data_out']]] return _store(vis_dataset,list_xarray_data_variables,_storage_parms)
def recurse_axes(loop_axes, point_axes): """ Used to create a nested list of images, with each nesting level corresponding to a particular axis. Each time this function is recursively called, it will descend one level deeper. The recursive calls can be thought of as a tree structure, where each depth level of the tree is one axis, and it has a branch (i.e. a subsequent call of recurse_axes) corresponding to every value of the the next axis. :param loop_axes: The remaining axes that need to be looped over (i.e. the innermost ones) :param point_axes: The axes that have been assigned values already by a previous call of this function :return: Nested list of images """ if len(loop_axes.values()) == 0: # There are no more axes over which to loop (i.e. we're at the maximum depth), so return # the image defined by point_axes, or a blank image if it is undefined (so that the full # nested list will have the expected rectangular shape) if verbose: print("\rAdding data chunk {} of {}".format( self._count, total), end="") self._count += 1 if None not in point_axes.values() and self.has_image( **point_axes): recurse_axes.empty = False # track that actual data was read if stitched: img = self.read_image(**point_axes, memmapped=True) if self.half_overlap[0] != 0: img = img[ self.half_overlap[0]:-self.half_overlap[0], self.half_overlap[1]:-self.half_overlap[1], ] return img else: return self.read_image(**point_axes, memmapped=True) else: # return np.zeros((self.image_height, self.image_width), self.dtype) return self._empty_tile else: # Still have axes over which to loop # do row and col first because it makes stitching faster if "row" in loop_axes.keys() and stitched: axis = "row" elif "column" in loop_axes.keys() and stitched: axis = "column" else: # Take the next axis in the list that needs to be looped over axis = list(loop_axes.keys())[0] # copy so multiple calls dont collide on the same data structure remaining_loop_axes = loop_axes.copy() if (axis == "row" or axis == "column") and stitched: # do these both at once del remaining_loop_axes["row"] del remaining_loop_axes["column"] else: # remove because this axis is now being assigned a point value del remaining_loop_axes[axis] if (axis == "row" or axis == "column") and stitched: # Do stitching along existing axis # Stitch tiles acquired in a grid (i.e. data acquired by Micro-Magellan or in multi-res mode) self.half_overlap = (self.overlap[0] // 2, self.overlap[1] // 2) # get spatial layout of position indices row_values = np.array(list(self.axes["row"])) column_values = np.array(list(self.axes["column"])) # make nested list of rows and columns blocks = [] for row in row_values: blocks.append([]) for column in column_values: valed_axes = point_axes.copy() if verbose: print( "\rAdding data chunk {} of {}".format( self._count, total), end="", ) valed_axes["row"] = row valed_axes["column"] = column blocks[-1].append( da.stack( recurse_axes(remaining_loop_axes, valed_axes))) rgb = self.bytes_per_pixel == 3 and self.dtype == np.uint8 if rgb: stitched_array = np.concatenate( [ np.concatenate( row, axis=len(blocks[0][0].shape) - 2) for row in blocks ], axis=len(blocks[0][0].shape) - 3, ) else: stitched_array = da.block(blocks) return stitched_array else: # Do stacking along new axis (i.e. not stiching along exisitng) blocks = [] # Loop through every value of the next axis (i.e. create new branches of the tree) for val in loop_axes[axis]: # Copy to avoid unexpected errors by multiple calls valed_axes = point_axes.copy() # Move this axis from one that needs to be looped over to one that has a discrete value. valed_axes[axis] = val blocks.append( recurse_axes(remaining_loop_axes, valed_axes)) return blocks