示例#1
0
def expand_arr_1d(arr: da.Array, required_shape: Tuple[int]) -> da.Array:
    missing = (required_shape[0] - arr.shape[0], )
    values = da.block([arr, da.zeros(missing, dtype=arr.dtype)])
    mask = da.block(
        [da.zeros(arr.shape, dtype=bool),
         da.ones(missing, dtype=bool)])
    return da.ma.masked_array(values, mask=mask)
示例#2
0
def da_diagsvd(s, M, N):
    """
    Construct the sigma matrix in SVD from singular values and size M, N.
    Parameters
    ----------
    s : (M,) or (N,) array_like
        Singular values
    M : int
        Size of the matrix whose singular values are `s`.
    N : int
        Size of the matrix whose singular values are `s`.
    Returns
    -------
    S : (M, N) ndarray
        The S-matrix in the singular value decomposition
    """
    part = da.diag(s)

    MorN = len(s)
    if MorN == M:
        return da.block([part, da.zeros((M, N - M), dtype=s.dtype)])
    elif MorN == N:
        return da.block([[part], [da.zeros((M - N, N), dtype=s.dtype)]])
    else:
        raise ValueError("Length of s must be M or N.")
示例#3
0
    def _solve(self, HH, HY):
        """Compute output weights from HH and HY using Dask functionality.
        """
        # make HH/HY divisible by chunk size
        n_features, _ = HH.shape
        padding = 0
        if n_features > self.bsize_ and n_features % self.bsize_ > 0:
            print("Adjusting batch size {} to n_features {}".format(
                self.bsize_, n_features))
            padding = self.bsize_ - (n_features % self.bsize_)
            P01 = da.zeros((n_features, padding))
            P10 = da.zeros((padding, n_features))
            P11 = da.zeros((padding, padding))
            HH = da.block([[HH, P01], [P10, P11]])

            P1 = da.zeros((padding, HY.shape[1]))
            HY = da.block([[HY], [P1]])

        # rechunk, add bias, and solve
        HH = HH.rechunk(
            self.bsize_) + self.alpha * da.eye(HH.shape[1], chunks=self.bsize_)
        HY = HY.rechunk(self.bsize_)

        B = da.linalg.solve(HH, HY, sym_pos=True)
        if padding > 0:
            B = B[:n_features]

        return B
示例#4
0
def stitch_fields(fields, blocksize):
    """
    """

    # weight block edges
    weighted_fields = da.map_blocks(
        weight_block,
        fields,
        blocksize=blocksize,
        dtype=np.float32,
    )

    # remove block index dimensions
    sh = fields.shape[:3]
    list_of_blocks = [[[[weighted_fields[i, j, k]] for k in range(sh[2])]
                       for j in range(sh[1])] for i in range(sh[0])]
    aug_fields = da.block(list_of_blocks)

    # merge overlap regions
    overlaps = tuple([int(round(x / 8)) for x in blocksize] + [
        0,
    ])
    return da.map_overlap(
        merge_overlaps,
        aug_fields,
        blocksize=blocksize,
        depth=overlaps,
        boundary=0.,
        trim=False,
        dtype=np.float32,
        chunks=blocksize + [
            3,
        ],
    )
示例#5
0
def naive_merge(work_dir="/run/media/user/HDD 1TB/", prefix="split_part_", ask=False, rechunk=False):
    """ Write multiple files into a big array file.
    """
    def get_tuple_id(file_name, prefix):
        """ this function returns the position of the block file in the total array
        """
        strings = file_name.replace(prefix, "").split('_')
        integers = map(lambda s: int(s), strings)
        return tuple(integers)

    def get_max_dim(keys, dim):
        """ key = (x, y, z) position of the block file in the total array
        this function returns the number of blocks in a given dimension
        """
        return max([key[dim] for key in keys])

    total_time = time.time()

    file_names = {get_tuple_id(f.split('.')[0], prefix) : f for f in os.listdir(work_dir) if os.path.isfile(os.path.join(work_dir, f)) and prefix in f}
    keys = file_names.keys()
    i_max, j_max, k_max = (get_max_dim(keys, 0), get_max_dim(keys, 1), get_max_dim(keys, 2))

    data = list()
    for i in range(i_max + 1):
        stack_i = list()
        for j in range(j_max + 1):
            stack_j = list()
            for k in range(k_max + 1):
                file_name = file_names[(i, j, k)]
                arr_k = get_dask_array_from_hdf5(file_path=os.path.join(work_dir, file_name), cast=True, key='/data')
                if rechunk:
                    arr_k = arr_k.rechunk((arr_k.shape[0], arr_k.shape[1], "auto"))
                stack_j.append(arr_k)
            stack_i.append(stack_j)
        data.append(stack_i)

    arr = da.block(data)
    print("Output shape: " + str(arr.shape))

    if ask:
        while True:
            try:
                save = input("Do you want to proceed to the saving ? (y/n)")
                if save in ["y", "n"]:
                    break
            except ValueError:
                print("Invalid answer.")
                continue
    else:
        save = "y"

    if save == "y":
        print("start saving...")
        IO_time = time.time()
        save_arr(arr, "hdf5", work_dir + "merged.hdf5",
                 key='/data', chunks_shape=None)
        IO_time = time.time() - IO_time

    total_time = time.time() - total_time
    return total_time, IO_time
示例#6
0
def block_regex_tif(tif_filepath: str, lazy_arrays: list) -> dask.array:
    """Sort .tif files in order. Map key regex components to set chunking
    for .tif array. Block these chunks together and return as dask.array"""
    # THIS IS FOR PARSING BY SCAN_ITER AND CHANNELS
    # e.g. Scan_Iter_0000_CamA_ch0_CAM1_stack0000_488nm_0000000msec_0016966725msecAbs_000x_000y_000z_0000t.tif
    tif_files = [fn.split('\\')[-1] for fn in tif_filepath]
    fn_comp_sets = dict()
    for fn in tif_files:
        for i, comp in enumerate(os.path.splitext(fn)[0].split("_")):
            fn_comp_sets.setdefault(i, set())
            fn_comp_sets[i].add(comp)
    fn_comp_sets = list(map(sorted, fn_comp_sets.values()))

    remap_comps = [
        dict(map(reversed, enumerate(fn_comp_sets[2]))),  # MUST be the index for scan_iter, e.g. '0003'
        dict(map(reversed, enumerate(fn_comp_sets[4])))  # MUST be the index for channel, e.g. 'ch0'
    ]
    # Create an empty object array to organize each chunk that loads a TIFF
    b = np.empty(tuple(map(len, remap_comps)) + (1, 1, 1), dtype=object)
    for fn, x in zip(tif_files, lazy_arrays):
        scan_iter = int(fn[fn.index("Scan_Iter_") + 10:fn.index("_Cam")].split("_")[0])
        channel = int(fn[fn.index("_ch") + 3:].split("_")[0])
        b[scan_iter, channel, 0, 0, 0] = x

    # YOU MUST HAVE SIMILAR CHANNEL PATTERNS TO SCAN_ITER PATTERNS OR ELSE THE PROCESS WILL FAIL
    # e.g. every Scan_Iter_ must have 8x ch0 and 4x ch1. Deviate from this pattern will result in an exception!
    # Stitch together the many blocks into a single array
    b = da.block(b.tolist())
    return b
def _graph_standard_degrid(vis_dataset, grid, briggs_factors, cgk_1D, grid_parms):
   import dask
   import dask.array as da
   import xarray as xr
   import time
   import itertools
   
   # Getting data for gridding
   chan_chunk_size = vis_dataset[grid_parms["imaging_weight_name"]].chunks[2][0]

   freq_chan = da.from_array(vis_dataset.coords['chan'].values, chunks=(chan_chunk_size))

   n_chunks_in_each_dim = vis_dataset[grid_parms["imaging_weight_name"]].data.numblocks
   chunk_indx = []

   iter_chunks_indx = itertools.product(np.arange(n_chunks_in_each_dim[0]), np.arange(n_chunks_in_each_dim[1]),
                                        np.arange(n_chunks_in_each_dim[2]), np.arange(n_chunks_in_each_dim[3]))

   #n_delayed = np.prod(n_chunks_in_each_dim)
   chunk_sizes = vis_dataset[grid_parms["imaging_weight_name"]].chunks

   n_chan_chunks_img = n_chunks_in_each_dim[2]
   list_of_degrids = []
   list_of_sum_weights = []
   
   list_of_degrids = ndim_list(n_chunks_in_each_dim)
   
   
   # Build graph
   for c_time, c_baseline, c_chan, c_pol in iter_chunks_indx:
       if grid_parms['chan_mode'] == 'cube':
            a_c_chan = c_chan
       else:
            a_c_chan = 0
       
       if grid_parms['do_imaging_weight']:
           sub_degrid = dask.delayed(_standard_imaging_weight_degrid_numpy_wrap)(
                grid.partitions[0,0,a_c_chan,c_pol],
                vis_dataset[grid_parms["uvw_name"]].data.partitions[c_time, c_baseline, 0],
                vis_dataset[grid_parms["imaging_weight_name"]].data.partitions[c_time, c_baseline, c_chan, c_pol],
                briggs_factors.partitions[:,a_c_chan,c_pol],
                freq_chan.partitions[c_chan],
                dask.delayed(grid_parms))
                
           single_chunk_size = (chunk_sizes[0][c_time], chunk_sizes[1][c_baseline],chunk_sizes[2][c_chan], chunk_sizes[3][c_pol])
           list_of_degrids[c_time][c_baseline][c_chan][c_pol] = da.from_delayed(sub_degrid, single_chunk_size,dtype=np.double)
       else:
           print('Degridding of visibilities and psf still needs to be implemented')
           
           #sub_grid_and_sum_weights = dask.delayed(_standard_grid_numpy_wrap)(
           #vis_dataset[vis_dataset[grid_parms["data"]].data.partitions[c_time, c_baseline, c_chan, c_pol],
           #vis_dataset[grid_parms["uvw"]].data.partitions[c_time, c_baseline, 0],
           #vis_dataset[grid_parms["imaging_weight"]].data.partitions[c_time, c_baseline, c_chan, c_pol],
           #freq_chan.partitions[c_chan],
           #dask.delayed(cgk_1D), dask.delayed(grid_parms))
       
   degrid = da.block(list_of_degrids)
   return degrid
示例#8
0
def run(ds, size_limit=4096, mip=False):
    # estimate resize ratio, no larger than 4k
    tile_shape, (im_shape, im_dtype) = ds.tile_shape, ds._load_array_info()
    shape = tuple(t * i for t, i in zip(tile_shape, im_shape))
    logger.debug(f"original preview {shape}, {im_dtype}")
    ratio, layer_shape = 1, shape[1:] if len(shape) == 3 else shape
    while True:
        if all((s // ratio) > size_limit for s in layer_shape):
            logger.debug(f"ratio={ratio}, exceeds size limit ({size_limit})")
            ratio *= 2
        else:
            break
    logger.info(f"target downsampling {ratio}x")

    # retrieve tiles
    def retrieve(tile):
        data = ds[tile]

        sampler = (slice(None, None, ratio), ) * 2
        if data.ndim == 3:
            if mip:
                # flatten the entire tile
                data = data.max(axis=0)
            else:
                # normally, we don't sub-sample z
                sampler = (slice(None, None, None), ) + sampler
        data = data[sampler]

        return data

    def groupby_tiles(inventory, index: List[str]):
        """
        Aggregation function that generates the proper internal list layout for all the tiles in their natural N-D layout.

        Args:
            inventory (pd.DataFrame): the listing inventory
            index (list of str): the column header
        """
        tiles = []
        for _, tile in inventory.groupby(index[0]):
            if len(index) > 1:
                # we are not at the fastest dimension yet, decrease 1 level
                tiles.append(groupby_tiles(tile, index[1:]))
            else:
                # fastest dimension, call retrieval function
                tiles.append(retrieve(tile))
        return tiles

    index = ["tile_y", "tile_x"]
    if "tile_z" in ds.index.names:
        index = ["tile_z"] + index
    logger.info(f"a {len(index)}-D tiled dataset")

    # pack as a huge array
    preview = da.block(groupby_tiles(ds, index))

    return preview
示例#9
0
def merge_hdf5_multiple(input_dirpath, out_filepath, out_file, dataset_key,
                        store):
    """ Merge separated hdf5 files into one hdf5 output file.
    
    Arguments: 
    ----------
        input_dirpath: path to input files
        out_filepath: path to output file
        out_file: empty pointer. will contain file object to be free after computations by Merge object.
        dataset_key: dataset key of the block stored into each input file
    """
    def print_blocks(l, depth):
        tab = depth * ['\t']
        if not isinstance(l, list):
            logger.info(''.join(tab) + '%s', l)
        else:
            logger.info(''.join(tab) + '[')
            for e in l:
                print_blocks(e, depth + 1)
            logger.info(''.join(tab) + ']')

    # get array parts from input files
    workdir = os.getcwd()
    os.chdir(input_dirpath)
    data = dict()
    for infilepath in glob.glob("[0-9]*_[0-9]*_[0-9]*.hdf5"):
        pos = infilepath.split('_')
        pos[-1] = pos[-1].split('.')[0]
        pos = tuple(list(map(lambda s: int(s), pos)))
        arr = get_dask_array_from_hdf5(infilepath,
                                       dataset_key,
                                       logic_cs="dataset_shape")
        data[pos] = arr
    os.chdir(workdir)

    if len(data.keys()) == 0:
        msg = 'Could not find input file matching regex'
        logger.error(msg)
        raise ValueError(msg)

    for pos in data.keys():
        logger.debug('%s', pos)

    # create reconstructed_array
    blocks = to_list(data)
    print_blocks(blocks, 0)
    reconstructed_array = da.block(blocks)

    if not store:
        return reconstructed_array

    # store new array in output file
    out_file = h5py.File(out_filepath, 'w')
    dset = out_file.create_dataset('/data', shape=reconstructed_array.shape)
    return da.store(reconstructed_array, dset, compute=False)
示例#10
0
    def as_stitched_array(self):

        def read_tile(channel_index, t_index, pos_index, z_index):
            if not np.isnan(pos_index) and channel_index in self.c_z_t_p_tree and \
                    z_index in self.c_z_t_p_tree[channel_index] and \
                    t_index in self.c_z_t_p_tree[channel_index][z_index] and \
                    pos_index in self.c_z_t_p_tree[channel_index][z_index][t_index]:
                img = self.read_image(channel_index=channel_index, z_index=z_index, t_index=t_index,
                                      pos_index=pos_index, memmapped=True)
            else:
                img = self._empty_tile
            # crop to center of tile for stitching
            return img[self.half_overlap:-self.half_overlap, self.half_overlap:-self.half_overlap]

        def z_stack(c_index, t_index, p_index):
            if np.isnan(p_index):
                return da.stack(self.z_indices.size * [self._empty_tile[self.half_overlap:-self.half_overlap,
                                  self.half_overlap:-self.half_overlap]])
            else:
                z_list = []
                for z_index in self.z_indices:
                    z_list.append(read_tile(c_index, t_index, p_index, z_index))
                return da.stack(z_list)

        self.half_overlap = self.overlap[0] // 2

        #get spatial layout of position indices
        zero_min_row_col = (self.row_col_array - np.nanmin(self.row_col_array, axis=0))
        row_col_mat = np.nan * np.ones([int(np.nanmax(zero_min_row_col[:, 0])) + 1, int(np.nanmax(zero_min_row_col[:, 1])) + 1])
        rows = zero_min_row_col[self.position_indices][:, 0]
        cols = zero_min_row_col[self.position_indices][:, 1]
        #mask in case some positions were corrupted
        mask = np.logical_not(np.isnan(rows))
        row_col_mat[rows[mask].astype(np.int), cols[mask].astype(np.int)] = self.position_indices[mask]

        total = self.time_indices.size * self.channel_indices.size * row_col_mat.shape[0] * row_col_mat.shape[1]
        count = 1
        stacks = []
        for t_index in self.time_indices:
            stacks.append([])
            for c_index in self.channel_indices:
                blocks = []
                for row in row_col_mat:
                    blocks.append([])
                    for p_index in row:
                        print('\rAdding data chunk {} of {}'.format(count, total), end='')
                        count += 1
                        blocks[-1].append(z_stack(c_index, t_index, p_index))

                stacks[-1].append(da.block(blocks))

        print('\rDask array opened')
        return da.stack(stacks)
示例#11
0
    def _read_delayed(self) -> da.core.Array:
        # Load Tiff
        with TiffFile(self._file) as tiff:
            # Check each scene has the same shape
            # If scene shape checking fails, use the specified scene and update
            # operating shape
            scenes = tiff.series
            operating_shape = scenes[0].shape
            if not self._scene_shape_is_consistent(tiff,
                                                   S=self.specific_s_index):
                operating_shape = scenes[self.specific_s_index].shape
                scenes = [scenes[self.specific_s_index]]

            # Get sample yx plane
            sample = scenes[0].pages[0].asarray()

            # Combine length of scenes and operating shape
            # Replace YX dims with empty dimensions
            operating_shape = (len(scenes), *operating_shape)
            if scenes[0].keyframe.samplesperpixel != 1:
                # if it's a multichannel (RGB) we need to pull in the channels as well
                operating_shape = operating_shape[:-3] + (1, 1, 1)
            else:  # the data is a 2D (Y, X) so read 2D planes
                operating_shape = operating_shape[:-2] + (1, 1)

            # Make ndarray for lazy arrays to fill
            lazy_arrays = np.ndarray(operating_shape, dtype=object)
            for all_page_index, (np_index,
                                 _) in enumerate(np.ndenumerate(lazy_arrays)):
                # Scene index is the first index in np_index
                scene_index = np_index[0]

                # This page index is current enumeration divided by scene index + 1
                # For example if the image has 10 Z slices and 5 scenes, there
                # would be 50 total pages
                this_page_index = all_page_index // (scene_index + 1)

                # Fill the numpy array with the delayed arrays
                lazy_arrays[np_index] = da.from_delayed(
                    delayed(TiffReader._imread)(self._file, scene_index,
                                                this_page_index),
                    shape=sample.shape,
                    dtype=sample.dtype,
                )

            # Convert the numpy array of lazy readers into a dask array
            data = da.block(lazy_arrays.tolist())

            # Only return the scene dimension if multiple scenes are present
            if len(scenes) == 1:
                data = data[0, :]

            return data
示例#12
0
        def recurse_axes(loop_axes, point_axes):
            if len(loop_axes.values()) == 0:
                print('\rAdding data chunk {} of {}'.format(self._count, total), end='')
                self._count += 1
                if None not in point_axes.values() and self.has_image(**point_axes):
                    return self.read_image(**point_axes, memmapped=True)
                else:
                    # return np.zeros((self.image_height, self.image_width), self.dtype)
                    return self._empty_tile
            else:
                #do position first because it makes stitching faster
                axis = 'position' if 'position' in loop_axes.keys() and stitched else list(loop_axes.keys())[0]
                remaining_axes = loop_axes.copy()
                del remaining_axes[axis]
                if axis == 'position' and stitched:
                    #Stitch tiles acquired in a grid
                    self.half_overlap = self.overlap[0] // 2

                    # get spatial layout of position indices
                    zero_min_row_col = (self.row_col_array - np.nanmin(self.row_col_array, axis=0))
                    row_col_mat = np.nan * np.ones(
                        [int(np.nanmax(zero_min_row_col[:, 0])) + 1, int(np.nanmax(zero_min_row_col[:, 1])) + 1])
                    positions_indices = np.array(list(loop_axes['position']))
                    rows = zero_min_row_col[positions_indices][:, 0]
                    cols = zero_min_row_col[positions_indices][:, 1]
                    # mask in case some positions were corrupted
                    mask = np.logical_not(np.isnan(rows))
                    row_col_mat[rows[mask].astype(np.int), cols[mask].astype(np.int)] = positions_indices[mask]

                    blocks = []
                    for row in row_col_mat:
                        blocks.append([])
                        for p_index in row:
                            print('\rAdding data chunk {} of {}'.format(self._count, total), end='')
                            valed_axes = point_axes.copy()
                            valed_axes[axis] = int(p_index) if not np.isnan(p_index) else None
                            blocks[-1].append(da.stack(recurse_axes(remaining_axes, valed_axes)))

                    if self.rgb:
                        stitched_array = np.concatenate(
                            [np.concatenate(row, axis=len(blocks[0][0].shape) - 2) for row in blocks],
                            axis=len(blocks[0][0].shape) - 3)
                    else:
                        stitched_array = da.block(blocks)
                    return stitched_array
                else:
                    blocks = []
                    for val in loop_axes[axis]:
                        valed_axes = point_axes.copy()
                        valed_axes[axis] = val
                        blocks.append(recurse_axes(remaining_axes, valed_axes))
                    return blocks
示例#13
0
 def create_array(self, name, shape, chunksize, dtype, timedim):
     chunks_in_each_dim = [
         shape[i] // chunksize[i] for i in range(len(shape))
     ]
     l = list(itertools.product(*[range(i) for i in chunks_in_each_dim]))
     items = []
     for m in l:
         f = Future(key=("deisa-" + name, m), inform=True, deisa=True)
         d = da.from_delayed(dask.delayed(f), shape=chunksize, dtype=dtype)
         items.append([list(m), d])
     ll = self.array_sort(items)
     arrays = da.block(ll)
     return arrays
示例#14
0
def mosaic(ctx, path, screen_size):
    """
    Generate mosaic for each layer.
    \f

    Args:
        path (str): path to the dataset    
        size (str, optional): screen size to fit the result in
    """
    show_trace = logger.getEffectiveLevel() <= logging.DEBUG
    ds = open_dataset(path, show_trace=show_trace)

    _, dy, dx = ds.voxel_size

    iz = 0
    for tz, ds_xy in TiledDatasetIterator(ds, axes="z", return_key=True):
        if tz:
            logger.info(f"iterate over z tile, {tz}")

        # populating layers
        layer = []
        for ds_x in TiledDatasetIterator(ds_xy, axes="y", return_key=False):
            row = []
            for uuid in TiledDatasetIterator(ds_x, axes="x", return_key=False):
                row.append(ds[uuid])
            layer.append(row)
        layer = da.block(layer)

        sampler = None
        for mosaic in layer:
            if sampler is None:
                ratio = _estaimte_resize_ratio(mosaic, resolution=screen_size)
                sampler = (slice(None, None, ratio), ) * 2
            mosaic = mosaic[sampler]

            print(iz)

            tifffile.imwrite(
                f"mosaic_z{iz:05}.tif",
                mosaic,
                imagej=True,
                resolution=(dx, dy),
                metadata={"unit": "um"},
            )

            iz += 1
示例#15
0
    def open_RoughScan(self):
        # Open RoughScan tiffs
        filenames = self.filenames

        comp_sets = dict()
        for fn in filenames:
            # Break up filename into components
            comp_ = path.basename(fn)[:-5].split("_")
            for i, comp in enumerate(comp_):
                comp_sets.setdefault(i,set())
                comp_sets[i].add(comp)

        shape = imageio.imread(filenames[0]).shape
        lazy_arrays = [dask.delayed(imageio.imread)(fn) for fn in filenames]
        lazy_arrays = [da.from_delayed(x, shape=shape, dtype='int16') for x in lazy_arrays]
        #images = [imageio.imread(fn) for fn in filenames]

        # Organize images
        #0 channel, 1 RoughScan, 2 x_step, 3 obj_step
        fn_comp_sets = list(comp_sets.values())
        for i in [0,2]:
            fn_comp_sets[i] = [int(x[1:]) for x in fn_comp_sets[i]]
        fn_comp_sets = list(map(sorted, fn_comp_sets))
        remap_comps = [fn_comp_sets[0], [1], fn_comp_sets[2]]
        a = np.empty(tuple(map(len, remap_comps)), dtype=object)
        for fn, x in zip(filenames, lazy_arrays):
            comp_ = path.basename(fn)[:-5].split("_")
            channel = fn_comp_sets[0].index(int(comp_[0][1:]))
            x_step = fn_comp_sets[2].index(int(comp_[2][1:]))
            a[channel, 0, x_step] = x


        # Label array
        dim_names = ['channel', 'row', 'col']
        channels = [int(ch) for ch in fn_comp_sets[0]]
        coord_values = {'channel':channels}
        im = xr.DataArray(da.block(a.tolist()),
                               dims = dim_names,
                               coords = coord_values,
                               name = 'RoughScan')

        im = im.assign_attrs(first_group = 0, machine = '', scale=1, overlap=0,
                             fixed_bg = 0)
        self.im = im.sel(row=slice(64,None))

        return len(fn_comp_sets[2])
示例#16
0
    def __init__(self, ld_dir, legend):
        # read legend
        self.legend = pd.read_table(legend, header=None)
        self.legend.columns = ['CHR', 'SNP', 'CM', 'BP', 'A1', 'A2']

        # read ld
        self.ld_list = []
        for chr_i in range(1, 23):
            chr_ld_dir = join(ld_dir, str(chr_i))
            part_info = pd.read_table(join(chr_ld_dir, 'part.info'),
                                      header=None,
                                      sep='\t',
                                      names=['row', 'col'])

            # get last_index to determine shape
            last_ld = np.load(
                join(chr_ld_dir, 'part_{}.npy'.format(len(part_info))))
            info_end = int(part_info['row'][len(part_info) - 1].split('-')[1])
            index_end = int(
                part_info['row'][len(part_info) -
                                 1].split('-')[0]) + last_ld.shape[0]
            ld_len = int(np.sqrt(len(part_info)))
            ld = np.zeros([ld_len, ld_len]).tolist()

            for part_i, part in part_info.iterrows():
                row_start, row_end = [
                    int(i) for i in part_info['row'][part_i].split('-')
                ]
                col_start, col_end = [
                    int(i) for i in part_info['col'][part_i].split('-')
                ]
                if row_end == info_end:
                    row_end = index_end
                if col_end == info_end:
                    col_end = index_end
                local_ld = dask.delayed(np.load)(join(
                    chr_ld_dir, 'part_{}.npy'.format(part_i + 1)))
                local_ld = da.from_delayed(local_ld,
                                           shape=(row_end - row_start,
                                                  col_end - col_start),
                                           dtype=np.float64)
                ld[int(part_i / ld_len)][part_i % ld_len] = local_ld

            ld = da.block(ld)
            self.ld_list.append(ld)
示例#17
0
def _mk_dask_from_delayed(shape,
                          chunking,
                          dtype='float32',
                          filename=None,
                          value=None):
    """
    Create a dask array by combining individually created blocks

    If filename is not None will load from file using np.memmap
    otherwise will generate numbered partitions using np.ones * chunk_idx
    or partitions of uniform value if value is not None
    """
    if filename is not None:
        create = dask.delayed(_mmap_load_chunk,
                              name='create_chunk',
                              pure=True,
                              traverse=False)
        filename = pathlib.Path(filename)
    else:
        create = dask.delayed(_create_chunk,
                              name='create_chunk',
                              pure=True,
                              traverse=False)

    slices_per_dim = _get_block_slices(chunking, shape)
    blocks = []
    # rightmost advances fastest with itertools.product
    for chunk_idx, chunk_slices in enumerate(
            itertools.product(*slices_per_dim)):
        chunk_value = chunk_idx if value is None else value
        chunk_shape = _slices_to_chunk_shape(chunk_slices, shape)
        chunk = dask.array.from_delayed(create(dataset_shape=shape,
                                               chunk_shape=chunk_shape,
                                               dtype=dtype,
                                               value=chunk_value,
                                               filename=filename,
                                               sl=chunk_slices),
                                        shape=chunk_shape,
                                        dtype=dtype)
        blocks.append(chunk)

    nblocks_per_dim = tuple(len(ss) for ss in slices_per_dim)
    blocks = _reshape_list(blocks, nblocks_per_dim)
    return da.block(blocks)
示例#18
0
    def dask_data(self) -> da.core.Array:
        # Construct delayed many image reads
        if self._dask_data is None:
            try:
                with imageio.get_reader(self._file) as reader:
                    # Store length as it is used a bunch
                    image_length = reader.get_length()

                    # Handle single image formats like png, jpeg, etc
                    if image_length == 1:
                        self._dask_data = da.from_array(
                            self._get_data(self._file, 0))

                    # Handle many image formats like gif, mp4, etc
                    elif image_length > 1:
                        # Get a sample image
                        sample = self._get_data(self._file, 0)

                        # Create operating shape for the final dask array by prepending image length to a tuple of
                        # ones that is the same length as the sample shape
                        operating_shape = (image_length, ) + (
                            (1, ) * len(sample.shape))
                        # Create numpy array of empty arrays for delayed get data functions
                        lazy_arrays = np.ndarray(operating_shape, dtype=object)
                        for indicies, _ in np.ndenumerate(lazy_arrays):
                            lazy_arrays[indicies] = da.from_delayed(
                                delayed(self._get_data)(self._file,
                                                        indicies[0]),
                                shape=sample.shape,
                                dtype=sample.dtype)

                        # Block them into a single dask array
                        self._dask_data = da.block(lazy_arrays.tolist())

                    # Catch all other image types as unsupported
                    # https://imageio.readthedocs.io/en/stable/userapi.html#imageio.core.format.Reader.get_length
                    else:
                        exceptions.UnsupportedFileFormatError(self._file)

            # Reraise unsupported file format
            except exceptions.UnsupportedFileFormatError:
                raise exceptions.UnsupportedFileFormatError(self._file)

        return self._dask_data
示例#19
0
 def load_task_daskarray(self, task, index=None, chunks=None):
     # Load proc datasets
     proc_dsets = np.empty(len(self.proc_files), dtype=object)
     for i, proc_file in enumerate(self.proc_files):
         # Load dataset
         dset = proc_file['tasks'][task]
         # Cast to dask array
         if chunks is None:
             chunks = dset.chunks
         dset = da.from_array(dset, chunks=chunks)
         if index is not None:
             dset = dset[index]
         proc_dsets[i] = dset
     # Shape into nested list
     proc_dsets = proc_dsets.reshape(self.procs.shape)
     proc_dsets = proc_dsets.tolist()
     # Build using dask blocking
     dset = da.block(proc_dsets)
     return dset
示例#20
0
def _chunk_numpy_array(data, chunk_size):
    """
    Convert a numpy array into Dask array with chunks of given size. The function
    splits the array into chunks along axes 0 and 1. If the array has more than 2 dimensions,
    then the remaining dimensions are not chunked. Note, that
    `dask_array = da.array(data, chunks=...)` will set the chunk size, but not split the
    data into chunks, therefore the array can not be loaded block by block by workers
    controlled by a distributed scheduler.

    Parameters
    ----------
    data: ndarray(float), 2 or more dimensions
        XRF map of the shape `(ny, nx, ne)`, where `ny` and `nx` represent the image size
        and `ne` is the number of points in spectra
    chunk_size: tuple(int, int) or list(int, int)
         Chunk size for axis 0 and 1: `(chunk_y, chunk_x`). The function will accept
         chunk size values that are larger then the respective `data` array dimensions.

    Returns
    -------
    data_dask: dask.array
        Dask array with the given chunk size
    """

    chunk_y, chunk_x = chunk_size
    ny, nx = data.shape[0:2]
    chunk_y, chunk_x = min(chunk_y, ny), min(chunk_x, nx)

    def _get_slice(n1, n2):
        data_slice = data[slice(n1 * chunk_y, min(n1 * chunk_y + chunk_y, ny)),
                          slice(n2 * chunk_x, min(n2 * chunk_x +
                                                  chunk_x, nx)), ]
        # Wrap the slice into a list wiht appropriate dimensions
        for _ in range(2, data.ndim):
            data_slice = [data_slice]
        return data_slice

    # Chunk the numpy array and assemble it as a dask array
    data_dask = da.block(
        [[_get_slice(_1, _2) for _2 in range(int(math.ceil(nx / chunk_x)))]
         for _1 in range(int(math.ceil(ny / chunk_y)))])

    return data_dask
def parse_regex_tiff(glob_filenames, lazy_arrays):
    # Get various dimensions
    # THIS IS FOR PARSING BY SCAN_ITER AND CHANNELS
    # e.g. Scan_Iter_0000_CamA_ch0_CAM1_stack0000_488nm_0000000msec_0016966725msecAbs_000x_000y_000z_0000t.tif

    glob_filenames_terminal = [file.split('\\')[-1] for file in glob_filenames]
    fn_comp_sets = dict()
    for fn in glob_filenames_terminal:
        for i, comp in enumerate(os.path.splitext(fn)[0].split("_")):
            fn_comp_sets.setdefault(i, set())
            fn_comp_sets[i].add(comp)
    fn_comp_sets = list(map(sorted, fn_comp_sets.values()))

    remap_comps = [
        dict(map(reversed, enumerate(
            fn_comp_sets[2]))),  # MUST be the index for scan_iter, e.g. '0003'
        dict(map(reversed, enumerate(
            fn_comp_sets[4])))  # MUST be the index for channel, e.g. 'ch0'
    ]

    # Create an empty object array to organize each chunk that loads a TIFF
    b = np.empty(tuple(map(len, remap_comps)) + (1, 1, 1), dtype=object)

    for fn, x in zip(glob_filenames_terminal, lazy_arrays):
        scan_iter = int(fn[fn.index("Scan_Iter_") +
                           10:fn.index("_Cam")].split("_")[0])
        channel = int(fn[fn.index("_ch") + 3:].split("_")[0])
        print(scan_iter, channel)

        b[scan_iter, channel, 0, 0, 0] = x
    # YOU MUST HAVE SIMILAR CHANNEL TO SCAN_ITER PATTERNS OR ELSE THE PROCESS WILL FAIL
    # e.g. every Scan_Iter_ must have 8x ch0 and 4x ch1. Deviate from this pattern will result in an exception!

    # Stitch together the many blocks into a single array
    b = da.block(b.tolist())
    return b
示例#22
0
def load_leica_frames(df, idx_mapper, coords=None, chunkby_dims='CZ'):
    """
    Lazily load single image leica tiffs into an xarray.DataArray.

    Parameters
    ----------
    df : pandas.DataFrame
        Data frame containing data file names in a column called "filename".
    idx_mapper : callable or pandas.DataFrame
        Means to map data files to the correct dimension index. If
        callable will be used by df.apply. If dataframe, will be joined
        to df directly.
    coords : dict or None, default None
        Coordinates for the dataarray.
    chunkby_dims : str, default "CZ"
        Dimensions along which to chunk the dask array. XY will automatically
        be chunked together.

    Returns
    -------
    x_data : xarry.DataArray
        Dask backed data array containing leica images. Will have STCZYX dims.
    """
    if callable(idx_mapper):
        df = df.join(df.apply(idx_mapper, axis=1, result_type='expand'))
    elif isinstance(idx_mapper, pd.DataFrame):
        df = df.join(idx_mapper)
    else:
        raise TypeError(
            "Must provide a callable to map names to indices or a pandas dataframe containing the indices"
        )

    #     ordered_cols = [df.columns[0]]+list('STCZ')
    #     df = df[ordered_cols]
    group_dims = [x for x in df.columns[1:] if x not in chunkby_dims]

    # if you end early there might not be the same number of frames in each pos
    # cutoff at the worst case scenario so things can be rectangular
    cutoffs = df.groupby('S').nunique().min().drop('filename')
    df = df.loc[(df.loc[:, ~df.columns.isin(['S', 'filename'])] <
                 cutoffs).all('columns')]
    chunks = np.zeros(df[group_dims].nunique().values, dtype='object')

    for idx, val in df.groupby(group_dims):
        darr = da.from_zarr(tiff.imread(val.filename.tolist(),
                                        aszarr=True)).rechunk(-1)
        # shape = tuple(cutoffs[x] for x in  chunkby_dims) + darr.shape[-2:]
        shape = tuple(x for i, x in cutoffs.iteritems()
                      if i in chunkby_dims) + darr.shape[-2:]
        # print(idx, shape)
        darr = darr.reshape(shape)
        chunks[idx] = darr

    chunks = np.expand_dims(chunks, tuple(range(-1, -len(chunkby_dims) - 3,
                                                -1)))

    d_data = da.block(chunks.tolist())
    x_data = xr.DataArray(
        d_data,
        dims=group_dims + [x for x in df.columns if x in chunkby_dims] +
        ['Y', 'X'],
    )
    if coords is not None:
        x_data = x_data.assign_coords(coords)
    x_data = x_data.transpose('S', 'T', 'C', ..., 'Z', 'Y', 'X')
    return x_data
示例#23
0
    def dask_data(self) -> da.core.Array:
        """
        Read a TIFF image file as a delayed dask array where each chunk of the constructed array is a delayed YX plane.

        Returns
        -------
        img: dask.array.core.Array
            The constructed delayed YX plane dask array.
        """
        if self._dask_data is None:
            # Load Tiff
            with TiffFile(self._file) as tiff:
                # Check each scene has the same shape
                # If scene shape checking fails, use the specified scene and update operating shape
                scenes = tiff.series
                operating_shape = scenes[0].shape
                for scene in scenes:
                    if scene.shape != operating_shape:
                        operating_shape = scenes[self.specific_s_index].shape
                        scenes = [scenes[self.specific_s_index]]
                        log.info(
                            f"File contains variable dimensions per scene, "
                            f"selected scene: {self.specific_s_index} for data retrieval."
                        )
                        break

                # Get sample yx plane
                sample = scenes[0].pages[0].asarray()

                # Combine length of scenes and operating shape
                # Replace YX dims with empty dimensions
                operating_shape = (len(scenes), *operating_shape)
                operating_shape = operating_shape[:-2] + (1, 1)

                # Make ndarray for lazy arrays to fill
                lazy_arrays = np.ndarray(operating_shape, dtype=object)
                for all_page_index, (np_index, _) in enumerate(
                        np.ndenumerate(lazy_arrays)):
                    # Scene index is the first index in np_index
                    scene_index = np_index[0]

                    # This page index is current enumeration divided by scene index + 1
                    # For example if the image has 10 Z slices and 5 scenes, there would be 50 total pages
                    this_page_index = all_page_index // (scene_index + 1)

                    # Fill the numpy array with the delayed arrays
                    lazy_arrays[np_index] = da.from_delayed(delayed(
                        TiffReader._imread)(self._file, scene_index,
                                            this_page_index),
                                                            shape=sample.shape,
                                                            dtype=sample.dtype)

                # Convert the numpy array of lazy readers into a dask array
                data = da.block(lazy_arrays.tolist())

                # Only return the scene dimension if multiple scenes are present
                if len(scenes) == 1:
                    data = data[0, :]

                # Set _dask_data
                self._dask_data = data

        return self._dask_data
def create_cf_map(mxds,gcf_dataset,beam_map,cf_beam_pair_id,pa,cf_pa_centers,chan_map, cf_pb_freq,cf_w,cf_pointing,pointing_ra_dec,sel_parms):
    import itertools
    from ._imaging_utils._general import _ndim_list
    from ._imaging_utils._dask_utils import _tree_combine_list, _find_unique_subset

    vis_dataset = mxds.attrs[sel_parms['xds']]
    n_chunks_in_each_dim = vis_dataset[sel_parms["data"]].data.numblocks
    chunk_sizes = vis_dataset[sel_parms["data"]].chunks
    
    w = vis_dataset.UVW[:,:,2]
    
    iter_chunks_indx = itertools.product(np.arange(n_chunks_in_each_dim[0]), np.arange(n_chunks_in_each_dim[1]),
                                         np.arange(n_chunks_in_each_dim[2]))
                                         
    ant_1 = vis_dataset.ANTENNA1
    ant_2 = vis_dataset.ANTENNA2
    ant_ids = mxds.ANTENNA.antenna_id.data
    beam_ids = mxds.beam_ids.data
    freq_chan = vis_dataset.chan.data
    n_chunks = np.prod(n_chunks_in_each_dim[:3])
    
    cf_map_list = _ndim_list((n_chunks_in_each_dim[0],n_chunks_in_each_dim[1],n_chunks_in_each_dim[2]))
    cf_parms_indx_list = _ndim_list((n_chunks,))
    a_parms_indx_list = _ndim_list((n_chunks,))
    w_parms_indx_list = _ndim_list((n_chunks,))
    
    #pg does not need chan dim, there will be redundant calculations. Maybe split later
    pg_map_list = _ndim_list((n_chunks_in_each_dim[0],n_chunks_in_each_dim[1]))
    pg_parms_indx_list = _ndim_list((n_chunks,))
    
    i_chunk = 0
    for c_time, c_baseline, c_chan in iter_chunks_indx:
        #print('c_time,c_baseline,c_chan',c_time,c_baseline,c_chan)
        chunk_cf_and_pg = dask.delayed(_cf_map_jit)(
            beam_map.data.partitions[c_baseline],
            beam_ids,
            cf_beam_pair_id.data,
            pa.data.partitions[c_time,0],
            cf_pa_centers.data,
            ant_1.data.partitions[c_time,c_baseline],
            ant_2.data.partitions[c_time,c_baseline],
            ant_ids,
            chan_map.data.partitions[c_chan],
            freq_chan,
            cf_pb_freq.data,
            w.data.partitions[c_time,c_baseline],
            cf_w.data,
            pointing_ra_dec.data.partitions[c_time,0],
            cf_pointing.data)
            
        #w_indx_arr, a_indx_arr,   cf_indx_arr, cf_map, pg_indx_arr, pg_map
        w_parms_indx_list[i_chunk] = chunk_cf_and_pg[0] #can't do from_delayed since number of elements are unkown
        a_parms_indx_list[i_chunk] = chunk_cf_and_pg[1] #can't do from_delayed since number of elements are unkown
        
        cf_parms_indx_list[i_chunk] = chunk_cf_and_pg[2] #can't do from_delayed since number of elements are unkown
        cf_map_list[c_time][c_baseline][c_chan] = da.from_delayed(chunk_cf_and_pg[3], (chunk_sizes[0][c_time],chunk_sizes[1][c_baseline],chunk_sizes[2][c_chan]),dtype=np.int)
        
        pg_parms_indx_list[i_chunk] = chunk_cf_and_pg[4] #can't do from_delayed since number of elements are unkown
        pg_map_list[c_time][c_baseline] = da.from_delayed(chunk_cf_and_pg[5], (chunk_sizes[0][c_time],chunk_sizes[1][c_baseline]),dtype=np.int)
            
        i_chunk = i_chunk+1
        
    cf_map = da.block(cf_map_list) #Awesome function
    pg_map = da.block(pg_map_list)
    
    w_parms_indx = _tree_combine_list(w_parms_indx_list,_find_unique_subset)
    a_parms_indx = _tree_combine_list(a_parms_indx_list,_find_unique_subset)
    cf_parms_indx = _tree_combine_list(cf_parms_indx_list,_find_unique_subset)
    pg_parms_indx = _tree_combine_list(pg_parms_indx_list,_find_unique_subset)
    
    #list_of_dask_delayed = [cf_map,pg_map,cf_parms_indx,pg_parms_indx,w_parms_indx,a_parms_indx]
    
    list_of_arrs= dask.compute([cf_map,pg_map,cf_parms_indx,pg_parms_indx,w_parms_indx,a_parms_indx])
    cf_map,pg_map,cf_parms_indx,pg_parms_indx,w_parms_indx,a_parms_indx = list_of_arrs[0]
    
    
    time_chunksize = vis_dataset[sel_parms['data']].chunks[0][0]
    baseline_chunksize = vis_dataset[sel_parms['data']].chunks[1][0]
    chan_chunksize = vis_dataset[sel_parms['data']].chunks[2][0]
    
    cf_map = da.from_array(cf_map,chunks=(time_chunksize,baseline_chunksize,chan_chunksize))
    w_parms_indx = da.from_array(w_parms_indx,chunks=(1,1))
    a_parms_indx = da.from_array(a_parms_indx,chunks=(1,6))
    cf_parms_indx = da.from_array(cf_parms_indx,chunks=(1,3))
    
    pg_parms_indx = da.from_array(pg_parms_indx,chunks=(1,3))
    pg_map = da.from_array(pg_map,chunks=(time_chunksize,baseline_chunksize))
    
    gcf_dataset = xr.Dataset()
    coords = {'gcf_indx':['a','w','gcf_flat'],'pg_indx':['p1','p2','pg_flat'],'a_indx':['pa1','b1','pa2','b2','c','a_flat'],'w_indx':['w']}
    gcf_dataset = gcf_dataset.assign_coords(coords)
    
    gcf_dataset['GCF_MAP'] = xr.DataArray(cf_map, dims=('time','baseline','chan'))
    gcf_dataset['GCF_PARMS_INDX'] = xr.DataArray(cf_parms_indx, dims=('gcf','gcf_indx'))
    gcf_dataset['W_PARMS_INDX'] = xr.DataArray(w_parms_indx, dims=('w','w_indx'))
    
    gcf_dataset['A_PARMS_INDX'] = xr.DataArray(a_parms_indx, dims=('a','a_indx'))
    
    gcf_dataset['GCF_A_PA'] = cf_pa_centers
    gcf_dataset['GCF_A_FREQ'] = cf_pb_freq
    gcf_dataset['GCF_A_BEAM_ID'] = cf_beam_pair_id
    gcf_dataset['GCF_W'] = cf_w
    
    gcf_dataset['PG_MAP'] =  xr.DataArray(pg_map, dims=('time','baseline'))
    gcf_dataset['PG_PARMS_INDX'] =  xr.DataArray(pg_parms_indx, dims=('pg','pg_indx'))
    gcf_dataset['PG_POINTING'] = cf_pointing
    
        
    '''
    cf_map = da.block(cf_map_list) #Awesome function
    pg_map = da.block(pg_map_list)
    
    w_parms_indx = da.from_delayed(_tree_combine_list(w_parms_indx_list,_find_unique_subset),shape=(np.nan,1),dtype=int) #(nan,1) first dim length is unkown
    a_parms_indx = da.from_delayed(_tree_combine_list(a_parms_indx_list,_find_unique_subset),shape=(np.nan,6),dtype=int) #(nan,6) first dim length is unkown
    cf_parms_indx = da.from_delayed(_tree_combine_list(cf_parms_indx_list,_find_unique_subset),shape=(np.nan,3),dtype=int) #(nan,3) first dim length is unkown
    pg_parms_indx = da.from_delayed(_tree_combine_list(pg_parms_indx_list,_find_unique_subset),shape=(np.nan,3),dtype=int) #(nan,3) first dim length is unkown
    
    #w_parms_indx = da.from_delayed(_tree_combine_list(w_parms_indx_list,_find_unique_subset),shape=(np.nan,1),dtype=int) #(nan,1) first dim length is unkown
    #a_parms_indx = da.from_delayed(_tree_combine_list(a_parms_indx_list,_find_unique_subset),shape=(np.nan,6),dtype=int) #(nan,6) first dim length is unkown
    #cf_parms_indx = da.from_delayed(_tree_combine_list(cf_parms_indx_list,_find_unique_subset),shape=(280,7),dtype=int) #(nan,3) first dim length is unkown
    #pg_parms_indx = da.from_delayed(_tree_combine_list(pg_parms_indx_list,_find_unique_subset),shape=(23,3),dtype=int) #(nan,3) first dim length is unkown
    
    
    
    gcf_dataset = xr.Dataset()
    coords = {'gcf_indx':['a','w','gcf_flat'],'pg_indx':['p1','p2','pg_flat'],'a_indx':['pa1','b1','pa2','b2','c','a_flat'],'w_indx':['w']}
    gcf_dataset = gcf_dataset.assign_coords(coords)
    
    gcf_dataset['GCF_MAP'] = xr.DataArray(cf_map, dims=('time','baseline','chan'))
    gcf_dataset['GCF_PARMS_INDX'] = xr.DataArray(cf_parms_indx, dims=('gcf','gcf_indx'))
    
    gcf_dataset['W_PARMS_INDX'] = xr.DataArray(w_parms_indx, dims=('w','w_indx'))
    gcf_dataset['A_PARMS_INDX'] = xr.DataArray(a_parms_indx, dims=('a','a_indx'))
    
    gcf_dataset['GCF_A_PA'] = cf_pa_centers
    gcf_dataset['GCF_A_FREQ'] = cf_pb_freq
    gcf_dataset['GCF_A_BEAM_ID'] = cf_beam_pair_id
    gcf_dataset['GCF_W'] = cf_w
    
    gcf_dataset['PG_MAP'] =  xr.DataArray(pg_map, dims=('time','baseline'))
    gcf_dataset['PG_PARMS_INDX'] =  xr.DataArray(pg_parms_indx, dims=('pg','pg_indx'))
    gcf_dataset['PG_POINTING'] = cf_pointing
    '''

    #dask.visualize(gcf_dataset,'make_gcf_coords')
    return gcf_dataset
    sample = klb.readfull(fnames[0])  #Sample image

    #Generate lazy arrays
    lazy_arrays = [dask.delayed(klb.readfull)(fn) for fn in fnames]
    lazy_arrays = [
        da.from_delayed(x, shape=sample.shape, dtype=sample.dtype)
        for x in lazy_arrays
    ]

    #Generate empty object array to organize each chunk that loads the 3D volume
    a = np.empty((2, 2701, 1, 1, 1),
                 dtype=object)  #Dimension of (view,timepoint,Z,Y,X)
    #a = np.empty((2,10,1,1,1), dtype=object) #Dimension of (view,timepoint,Z,Y,X)

    for fn, x in zip(fnames, lazy_arrays):
        view = int(fn[fn.index("_CM") + 3:].split("_")[0])
        timepoint = int(fn[fn.index("_TM") + 3:].split("_")[0])
        a[view, timepoint, 0, 0, 0] = x
        print('CM', view, 'TM', timepoint)

    #Stitch together all these blocks into a single N-dimensional array
    a = da.block(a.tolist())
    a = a.rechunk((1, 1, 75, 128, 308))
    print(type(a), a.shape, a.dtype, a.chunksize, 'Size',
          round(a.size / (1024**3), 2), 'GB')

    if ch == 0:
        a.to_zarr(join(outPath, 'membrane-v2.zarr'), compressor=BZ2(level=9))
    elif ch == 1:
        a.to_zarr(join(outPath, 'nuclei-v2.zarr'), compressor=BZ2(level=9))
print('Took', round(time.time() - t0, 2), 'sec')
示例#26
0
    def _daread(
        img: Path,
        offsets: List[np.ndarray],
        read_lengths: np.ndarray,
        chunk_by_dims: List[str] = [
            Dimensions.SpatialZ,
            Dimensions.SpatialY,
            Dimensions.SpatialX,
        ],
        S: int = 0,
    ) -> Tuple[da.core.Array, str]:
        """
        Read a LIF image file as a delayed dask array where certain dimensions act as
        the chunk size.

        Parameters
        ----------
        img: Path
            The filepath to read.
        offsets: List[numpy.ndarray]
            A List of numpy ndarrays offsets, see _compute_offsets for more details.
        read_lengths: numpy.ndarray
            A 1D numpy array of read lengths, the index is the scene index
        chunk_by_dims: List[str]
            The dimensions to use as the for mapping the chunks / blocks.
            Default: [Dimensions.SpatialZ, Dimensions.SpatialY, Dimensions.SpatialX]
            Note: SpatialY and SpatialX will always be added to the list if not present.
        S: int
            If the image has different dimensions on any scene from another, the dask
            array construction will fail.
            In that case, use this parameter to specify a specific scene to construct a
            dask array for.
            Default: 0 (select the first scene)

        Returns
        -------
        img: dask.array.core.Array
            The constructed dask array where certain dimensions are chunked.
        dims: str
            The dimension order as a string.
        """
        # Get image dims indicies
        lif = LifFile(filename=img)
        image_dim_indices = LifReader._dims_shape(lif=lif)

        # Catch inconsistent scene dimension sizes
        if len(image_dim_indices) > 1:
            # Choose the provided scene
            try:
                image_dim_indices = image_dim_indices[S]
                log.info(
                    f"File contains variable dimensions per scene, "
                    f"selected scene: {S} for data retrieval."
                )
            except IndexError:
                raise exceptions.InconsistentShapeError(
                    f"The LIF image provided has variable dimensions per scene. "
                    f"Please provide a valid index to the 'S' parameter to create a "
                    f"dask array for the index provided. "
                    f"Provided scene index: {S}. Scene index range: "
                    f"0-{len(image_dim_indices)}."
                )
        else:
            # If the list is length one that means that all the scenes in the image
            # have the same dimensions
            # Just select the first dictionary in the list
            image_dim_indices = image_dim_indices[0]

        # Uppercase dimensions provided to chunk by dims
        chunk_by_dims = [d.upper() for d in chunk_by_dims]

        # Always add Y and X dims to chunk by dims because that is how LIF files work
        if Dimensions.SpatialY not in chunk_by_dims:
            log.info(
                "Adding the Spatial Y dimension to chunk by dimensions as it was not "
                "found."
            )
            chunk_by_dims.append(Dimensions.SpatialY)
        if Dimensions.SpatialX not in chunk_by_dims:
            log.info(
                "Adding the Spatial X dimension to chunk by dimensions as it was not "
                "found."
            )
            chunk_by_dims.append(Dimensions.SpatialX)

        # Setup read dimensions for an example chunk
        first_chunk_read_dims = {}
        for dim, (dim_begin_index, dim_end_index) in image_dim_indices.items():
            # Only add the dimension if the dimension isn't a part of the chunk
            if dim not in chunk_by_dims:
                # Add to read dims
                first_chunk_read_dims[dim] = dim_begin_index

        # Read first chunk for information used by dask.array.from_delayed
        sample, sample_dims = LifReader._get_array_from_offset(
            im_path=img,
            offsets=offsets,
            read_lengths=read_lengths,
            meta=lif.xml_root,
            read_dims=first_chunk_read_dims,
        )

        # Get the shape for the chunk and operating shape for the dask array
        # We also collect the chunk and non chunk dimension ordering so that we can
        # swap the dimensions after we block the dask array together.
        sample_chunk_shape = []
        operating_shape = []
        non_chunk_dimension_ordering = []
        chunk_dimension_ordering = []
        for i, dim_info in enumerate(sample_dims):
            # Unpack dim info
            dim, size = dim_info

            # If the dim is part of the specified chunk dims then append it to the
            # sample, and, append the dimension to the chunk dimension ordering
            if dim in chunk_by_dims:
                sample_chunk_shape.append(size)
                chunk_dimension_ordering.append(dim)

            # Otherwise, append the dimension to the non chunk dimension ordering, and,
            # append the true size of the image at that dimension
            else:
                non_chunk_dimension_ordering.append(dim)
                operating_shape.append(
                    image_dim_indices[dim][1] - image_dim_indices[dim][0]
                )

        # Convert shapes to tuples and combine the non and chunked dimension orders as
        # that is the order the data will actually come out of the read data as
        sample_chunk_shape = tuple(sample_chunk_shape)
        blocked_dimension_order = (
            non_chunk_dimension_ordering + chunk_dimension_ordering
        )

        # Fill out the rest of the operating shape with dimension sizes of 1 to match
        # the length of the sample chunk. When dask.block happens it fills the
        # dimensions from inner-most to outer-most with the chunks as long as the
        # dimension is size 1. Basically, we are adding empty dimensions to the
        # operating shape that will be filled by the chunks from dask
        operating_shape = tuple(operating_shape) + (1,) * len(sample_chunk_shape)

        # Create empty numpy array with the operating shape so that we can iter through
        # and use the multi_index to create the readers.
        lazy_arrays = np.ndarray(operating_shape, dtype=object)

        # We can enumerate over the multi-indexed array and construct read_dims
        # dictionaries by simply zipping together the ordered dims list and the current
        # multi-index plus the begin index for that plane. We then set the value of the
        # array at the same multi-index to the delayed reader using the constructed
        # read_dims dictionary.
        dims = [d for d in Dimensions.DefaultOrder]
        begin_indicies = tuple(image_dim_indices[d][0] for d in dims)
        for i, _ in np.ndenumerate(lazy_arrays):
            # Add the czi file begin index for each dimension to the array dimension
            # index
            this_chunk_read_indicies = (
                current_dim_begin_index + curr_dim_index
                for current_dim_begin_index, curr_dim_index in zip(begin_indicies, i)
            )

            # Zip the dims with the read indices
            this_chunk_read_dims = dict(
                zip(blocked_dimension_order, this_chunk_read_indicies)
            )

            # Remove the dimensions that we want to chunk by from the read dims
            for d in chunk_by_dims:
                if d in this_chunk_read_dims:
                    this_chunk_read_dims.pop(d)

            # Add delayed array to lazy arrays at index
            lazy_arrays[i] = da.from_delayed(
                delayed(LifReader._imread)(
                    img, offsets, read_lengths, lif.xml_root, this_chunk_read_dims
                ),
                shape=sample_chunk_shape,
                dtype=sample.dtype,
            )

        # Convert the numpy array of lazy readers into a dask array and fill the inner
        # most empty dimensions with chunks
        merged = da.block(lazy_arrays.tolist())

        # Because we have set certain dimensions to be chunked and others not
        # we will need to transpose back to original dimension ordering
        # Example being, if the original dimension ordering was "SZYX" and we want to
        # chunk by "S", "Y", and "X" we created an array with dimensions ordering "ZSYX"
        transpose_indices = []
        transpose_required = False
        for i, d in enumerate(Dimensions.DefaultOrder):
            new_index = blocked_dimension_order.index(d)
            if new_index != i:
                transpose_required = True
                transpose_indices.append(new_index)
            else:
                transpose_indices.append(i)

        # Only run if the transpose is actually required
        # The default case is "Z", "Y", "X", which _usually_ doesn't need to be
        # transposed because that is _usually_
        # The normal dimension order of the LIF file anyway
        if transpose_required:
            merged = da.transpose(merged, tuple(transpose_indices))

        # Because dimensions outside of Y and X can be in any order and present or not
        # we also return the dimension order string.
        return merged, "".join(dims)
示例#27
0
def mosaic_process_date(
    date,
    date_files,
    temporary_dir,
    output_dir,
    memory,
    multi=False,
    overwrite=False,
    gdal_translate="gdal_translate",
    gdalwarp="gdalwarp",
):
    """Mosaic and regrid MODIS Fpar data from a given date.

    Args:
        date (str): MODIS date string, e.g. '2021034'.
        date_files (iterable of pathlib.Path): Files containing data for `date`.
        temporary_dir (pathlib.Path): Directory for temporary files.
        output_dir (pathlib.Path): Directory for output files.
        memory (int): GDAL memory in MB. Capped at 9999 MB.
        multi (bool): If True, add the '-multi' option to gdalwarp.
        overwrite (bool): If True, overwrite existing files.
        gdal_translate (str): gdal_translate command path.
        gdalwarp (str): gdalwarp command path.

    Returns:
        None or pathlib.Path: None if no processing could be done, or the filename of
            the processed data.

    """
    if date == "2000225":
        if len(date_files) != 131:
            logger.warning(
                f"Expected 131 files for 2000225. Got {len(date_files)}.")
    elif date == "2002081":
        if len(date_files) != 179:
            logger.warning(
                f"Expected 179 files for 2002081. Got {len(date_files)}.")
    elif len(date_files) < min_n_tiles:
        logger.warning(f"Found {len(date_files)} files (tiles) for '{date}'. "
                       f"Expected at least {min_n_tiles}.")
        return None

    # Limit to 9999 because otherwise the parameter is interpreted as bytes instead of
    # megabytes.
    memory = min(9999, memory)

    output_base = temporary_dir / f"{fpar_band_name}_{date}"

    mosaic_file = output_base.with_name(output_base.stem + "_mosaic.hdf5")
    mosaic_vrt_file = mosaic_file.with_suffix(".vrt")
    regridded_file = output_base.with_name(output_base.stem + "_0d25_raw.nc")
    output_file = Path(output_dir) / (output_base.stem + "_0d25.nc")

    # Used to convert the bounding coordinates to MODIS (m) coordinates.
    # NOTE: transformer.transform(lat, lon) -> (x, y)
    transformer = Transformer.from_crs("EPSG:4326", modis_proj)

    bounds_coords = defaultdict(list)

    # Collection of 'delayed' objects containing the data, indexed using
    # (horizontal, vertical) MODIS tile numbers.
    tile_data = {}
    for data_file in date_files:
        fpar_dataset_name = (
            f"HDF4_EOS:EOS_GRID:{data_file}:MOD_Grid_MOD15A2H:{fpar_band_name}"
        )
        qc_dataset_name = (
            f"HDF4_EOS:EOS_GRID:{data_file}:MOD_Grid_MOD15A2H:{qc_band_name}")
        with rasterio.open(fpar_dataset_name) as dataset:
            tags = dataset.tags()
            for bound_name, axis in bound_axes.items():
                bound_value = float(tags[f"{bound_name}BOUNDINGCOORDINATE"])
                bounds_coords[axis].append(bound_value)

        tile_data[tuple(
            # Parse the horizontal (h) and vertical (v) tile numbers.
            map(int,
                re.search(r"h(\d{2})v(\d{2})",
                          str(data_file)).groups()))] = da.from_delayed(
                              delayed_read_band_data(fpar_dataset_name,
                                                     qc_dataset_name),
                              shape=tile_shape,
                              dtype=np.uint8,
                          )

    # Get the extreme bounding values in lat lon coordinates.
    extreme_bounds = {
        axis: (min(axis_bounds), max(axis_bounds))
        for axis, axis_bounds in bounds_coords.items()
    }
    logger.debug(f"{date} {extreme_bounds}")

    # Transform the extreme bounding values to MODIS coordinates for reprojection.
    modis_bounds = {}
    for axis, bounds in extreme_bounds.items():
        modis_bounds[axis] = sorted(
            transformer.transform(
                *(0,
                  extreme_coord)[slice(None, None, 1 if axis == "x" else -1)])
            [0 if axis == "x" else 1] for extreme_coord in bounds)

    logger.debug(f"{date} {modis_bounds}")

    # Create the mosaic of MODIS tiles.

    # Extract all possible vertical and horizontal tile numbers.
    hs, vs = zip(*tile_data)

    data_blocks = []

    # Iterate over all tiles, using existing data where possible.
    for v_index in range(min(vs), max(vs) + 1):
        data_blocks.append([])
        for h_index in range(min(hs), max(hs) + 1):
            data_blocks[-1].append(
                tile_data.get(
                    (h_index, v_index),
                    # Use full() to pad irrelevant tiles with the invalid data marker.
                    da.full(
                        tile_shape,
                        fill_value=fill_value,
                        dtype=np.uint8,
                        # XXX: Specifying 'chunksize' here causes the following error
                        # when calling 'to_hdf5':
                        # OSError: Can't write data (no appropriate function for conversion path)
                        # chunksize=tile_shape,
                    ),
                ))

    data = da.block(data_blocks)[::-1]

    if mosaic_file.is_file() and overwrite:
        logger.info(f"'{mosaic_file}' exists. Deleting.")
        mosaic_file.unlink()

    recalculate = False
    if not mosaic_file.is_file():
        recalculate = True
        data.to_hdf5(str(mosaic_file), "/fpar")
    else:
        logger.warning(f"'{mosaic_file}' exists. Not deleting.")

    # Attach information about the transform prior to calling 'gdalwarp'.
    y_pixels_max = data.shape[0] - 1
    x_pixels_max = data.shape[1] - 1

    y_min, y_max = modis_bounds["y"]
    x_min, x_max = modis_bounds["x"]

    gcp_opts = []
    for y_pixel, y_loc, x_pixel, x_loc in [
        (0, y_min, 0, x_min),
        (y_pixels_max, y_max, 0, x_min),
        (y_pixels_max, y_max, x_pixels_max, x_max),
        (0, y_min, x_pixels_max, x_max),
    ]:
        # -gcp <pixel> <line> <easting> <northing>
        gcp_opts.append(f"-gcp {x_pixel} {y_pixel} {x_loc} {y_loc}")

    cmd = " ".join((
        f"{gdal_translate} -of VRT -a_srs '{modis_proj}'",
        " ".join(gcp_opts),
        f'HDF5:"{mosaic_file}"://fpar {mosaic_vrt_file}',
    ))

    logger.debug(f"{date} gdal_translate cmd: {cmd}")

    check_output(shlex.split(cmd))

    execute_gdalwarp = True
    if regridded_file.is_file():
        if recalculate or overwrite:
            logger.info(f"'{regridded_file}' exists. Deleting.")
            regridded_file.unlink()
        else:
            logger.warning(
                f"'{regridded_file}' exists and '{mosaic_file}' was not changed. "
                "Not executing gdalwarp.")
            execute_gdalwarp = False

    if execute_gdalwarp:
        cmd = " ".join((
            f"{gdalwarp} -s_srs '{modis_proj}' -t_srs EPSG:4326 -ot Float32",
            "-srcnodata 255 -dstnodata -1",
            "-r average",
            *(("-multi", ) if multi else ()),
            "-te -180 -90 180 90 -ts 1440 720",
            f"-wm {memory}",
            f"-of netCDF {mosaic_vrt_file} {regridded_file}",
        ))
        logger.debug(f"{date} gdalwarp cmd: {cmd}")
        check_output(shlex.split(cmd))

    if output_file.is_file():
        if execute_gdalwarp or overwrite:
            logger.info(f"'{output_file}' exists. Deleting.")
            output_file.unlink()
        else:
            logger.warning(
                f"'{output_file}' exists and '{regridded_file}' was not changed. "
                "Not carrying out final processing.")
            return output_file

    # Read the regridded file, apply scaling factor, change metadata, and write to the
    # output file.
    cube = iris.load_cube(str(regridded_file))
    cube *= 0.01
    cube.var_name = None
    cube.standard_name = None
    cube.long_name = "Fraction of Absorbed Photosynthetically Active Radiation"
    cube.units = "1"
    safe_cube_save(cube, output_file, temporary_dir)

    logger.info(f"Finished writing to '{output_file}'.")
    return output_file
示例#28
0
文件: dask.py 项目: dcherian/dcpy
def dask_safeslice(data, indices, chunks=None):
    """
    COPIED FROM https://github.com/dask/dask/issues/5540#issuecomment-601150129
    Added fancy indexing xarray.core.indexing.DaskIndexingAdapter

    Return a subset of a dask array, but with indexing applied independently to
    each slice of the input array, *prior* to their recombination to produce
    the result array.

    Args:

    * data (dask array):
        input data
    * indices (int or slice or tuple(int or slice)):
        required sub-section of the data.

    Kwargs:

    * chunks (list of (int or "auto")):
        chunking argument for 'rechunk' applied to the input.
        If set, forces the input to be rechunked as specified.
        ( This replaces the normal operation, which is to rechunk the input
        making the indexed dimensions undivided ).
        Mainly for testing on small arrays.

    .. note::

        'indices' currently does not support Ellipsis or newaxis.

    """

    from collections.abc import Iterable
    import dask.array as da

    # The idea is to "push down" the indexing operation to "underneath" the
    # result concatenation, so it gets done _before_ that.
    # This 'result concatenation' is actually implicit: the _implied_
    # concatenation of all the result chunks into a single output array.
    # We assume that any *one* chunk *can* be successfully computed.
    # By applying the indexing operation to each chunk, prior to the
    # complete result (re-)construction, we hope to make this work.

    # Normalise input to a list over all data dimensions.

    # NOTE: FOR NOW, this does not support Ellipsis.
    # TODO: that could easily be fixed.

    # Convert the slicing indices to a list of (int or slice).
    # ( NOTE: not supporting Ellipsis. )
    if not isinstance(indices, Iterable):
        # Convert a single key (slice or integer) to a length-1 list.
        indices = [indices]
    else:
        # Convert other iterable types to lists.
        indices = list(indices)

    n_data_dims = data.ndim
    assert len(indices) <= n_data_dims

    # Extend with ":" in all the additional (trailing) dims.
    all_slice = slice(None)
    indices += (n_data_dims - len(indices)) * [all_slice]

    assert len(indices) == n_data_dims

    # Discriminate indexed and non-indexed dims.
    # An "indexed" dim is where input index is *anything* other than a ":".
    dim_is_indexed = [index != all_slice for index in indices]

    # Work out which indices are simple integer values.
    # ( by definition, all of these will be "indexed" dims )
    dim_is_removed = [isinstance(key, int) for key in indices]

    # Replace single-value indices with length-1 indices, so the indexing
    # preserves all dimensions (as this makes reconstruction easier).
    # ( We use the above 'dim_is_removed' to correct this afterwards. )
    indices = [slice(key, key + 1) if isinstance(key, int) else key for key in indices]

    # We will now rechunk to get "our chunks" : but these must not be divided
    # in dimensions affected by the requested indexing.
    # So we rechunk, but insist that those dimensions are kept whole.
    # ( Obviously, not always optimal ... )
    # As the indexed dimensions will always be _reduced_ by the indexing, this
    # is obviously over-conservative + may give chunks which are rather too
    # small.  Let's just ignore that problem for now!
    if chunks is not None:
        rechunk_dim_specs = list(chunks)
    else:
        rechunk_dim_specs = ["auto"] * n_data_dims
    for i_dim in range(n_data_dims):
        if dim_is_indexed[i_dim]:
            rechunk_dim_specs[i_dim] = -1
    data = da.rechunk(data, chunks=rechunk_dim_specs)

    # Calculate multidimensional indexings of the original data array which
    # correspond to all these chunks.
    # Note: following the "-1"s in the above rechunking spec, the indexed dims
    # should all have only one chunk in them.
    assert all(
        len(data.chunks[i_dim]) == 1
        for i_dim in range(n_data_dims)
        if dim_is_removed[i_dim]
    )

    # Make an array of multidimensional indexes corresponding to all chunks.
    chunks_shape = [len(chunk_lengths) for chunk_lengths in data.chunks]
    chunks_shape += [n_data_dims]
    chunk_indices = np.zeros(chunks_shape, dtype=object)
    # The chunk_indices array ...
    #     * has dimensions of n-data-dims + 1
    #     * has shape of "chunks-shape" + (n_data_dims,)
    #     * each entry[i0, i1, iN-1] --> n_data_dims * slice-objects.

    # Pre-fill indexes array with [:, :, ...]
    chunk_indices[...] = all_slice
    # Set slice ranges for each dimension at a time.
    for i_dim in range(n_data_dims):
        # Fix all keys for this data dimension : chunk_indices[..., i_dim]
        dim_inds = [all_slice] * n_data_dims + [i_dim]
        if dim_is_indexed[i_dim]:
            # This is a user-indexed dim, so should be un-chunked.
            assert len(data.chunks[i_dim]) == 1
            # Set keys for this dim to the user-requested indexing.
            if EMBED_INDEXES:
                chunk_indices[tuple(dim_inds)] = indices[i_dim]
        else:
            # Replace keys for this dim with the slice range for the
            # relevant chunk, for each chunk in the dim.
            startend_positions = np.cumsum([0] + list(data.chunks[i_dim]))
            starts, ends = startend_positions[:-1], startend_positions[1:]
            for i_key, (i_start, i_end) in enumerate(zip(starts, ends)):
                dim_inds[i_dim] = i_key
                chunk_indices[tuple(dim_inds)] = slice(i_start, i_end)
                # E.G. chunk_indices[:, :, 1, :][2] = slice(3,6)

    # Make actual addressed chunks by indexing the original array, arrange them
    # in the same pattern, and re-combine them all to make a result array.
    # This needs to be a list-of-lists construction, as da.block requires it.
    # ( an array of arrays is presumably too confusing ?!? )
    def get_chunks(multidim_indices):
        if multidim_indices.ndim > 1:
            # Convert the "array of chunks" dims --> lists-of-lists
            result = [
                get_chunks(multidim_indices[i_part])
                for i_part in range(multidim_indices.shape[0])
            ]
        else:
            # Innermost dim contains n-dims * slice-objects
            # Convert these into a slice of the data array.
            result = data.__getitem__(tuple(multidim_indices))

            if not EMBED_INDEXES:
                # Now *also* apply the required indexing to this chunk.
                # It initially seemed *essential* that this be an independent
                # operation, so that the memory associated with the whole chunk
                # can be released.
                # But ACTUALLY this is not so, given the next step (see on).
                try:
                    result = result.__getitem__(tuple(indices))
                except NotImplementedError:
                    result = data
                    for axis, subkey in reversed(list(enumerate(tuple(indices)))):
                        result = result[(slice(None),) * axis + (subkey,)]

            # AND FINALLY : apply a numpy copy to this indexed-chunk.
            # This is essential, to release the source chunks ??
            # see: https://github.com/dask/dask/issues/3595#issuecomment-449546228
            result = result.map_blocks(np.copy)

        return result

    listoflists_of_chunks = get_chunks(chunk_indices)
    result = da.block(listoflists_of_chunks)

    assert result.ndim == n_data_dims  # Unchanged as 'da.block' concatenates.

    # Finally remove the extra dimensions for single-value indices.
    assert all(
        result.shape[i_dim] == 1
        for i_dim in range(n_data_dims)
        if dim_is_removed[i_dim]
    )
    all_dim_indices = [
        0 if dim_is_removed[i_dim] else all_slice for i_dim in range(n_data_dims)
    ]
    result = result.__getitem__(tuple(all_dim_indices))
    return result
示例#29
0
def phase_rotate_sgraph(vis_dataset, global_dataset, rotation_parms, sel_parms, storage_parms):
    """
    Rotate uvw with faceting style rephasing for multifield mosaic.
    The specified phasecenter and field phase centers are assumed to be in the same frame.
    This does not support east-west arrays, emphemeris objects or objects within the nearfield.
    (no refocus).
    
    Parameters
    ----------
    vis_dataset : xarray.core.dataset.Dataset
        input Visibility Dataset
    Returns
    -------
    psf_dataset : xarray.core.dataset.Dataset
    """
    #based on UVWMachine and FTMachine
    #measures/Measures/UVWMachine.cc
    
    #Important: Can not applyflags before calling rotate (uvw coordinates are also flagged). This will destroy the rotation transform.
    #Performance improvements apply_rotation_matrix (jit code)
    
    #print('1. numpy',vis_dataset.DATA[:,0,0,0].values)
    
    from ngcasa._ngcasa_utils._store import _store
    from scipy.spatial.transform import Rotation as R
    import numpy as np
    import copy
    import dask.array as da
    import xarray as xr
    from ngcasa._ngcasa_utils._check_parms import _check_storage_parms, _check_sel_parms, _check_existence_sel_parms
    from ._imaging_utils._check_imaging_parms import _check_rotation_parms
    import time
    import numba
    from numba import double
    import dask
    import itertools
    
    _sel_parms = copy.deepcopy(sel_parms)
    _rotation_parms = copy.deepcopy(rotation_parms)
    _storage_parms = copy.deepcopy(storage_parms)
    
    assert(_check_sel_parms(_sel_parms,{'uvw_in':'UVW','uvw_out':'UVW_ROT','data_in':'DATA','data_out':'DATA_ROT'})), "######### ERROR: sel_parms checking failed"
    assert(_check_existence_sel_parms(vis_dataset,{'uvw_in':_sel_parms['uvw_in'],'data_in':_sel_parms['data_in']})), "######### ERROR: sel_parms checking failed"
    assert(_check_rotation_parms(_rotation_parms)), "######### ERROR: rotation_parms checking failed"
    assert(_check_storage_parms(_storage_parms,'dataset.vis.zarr','phase_rotate')), "######### ERROR: storage_parms checking failed"
    
    assert(_sel_parms['uvw_out'] != _sel_parms['uvw_in']), "######### ERROR: sel_parms checking failed sel_parms['uvw_out'] can not be the same as sel_parms['uvw_in']."
    assert(_sel_parms['data_out'] != _sel_parms['data_in']), "######### ERROR: sel_parms checking failed sel_parms['data_out'] can not be the same as sel_parms['data_in']."
    
    #Phase center
    ra_image = _rotation_parms['image_phase_center'][0]
    dec_image = _rotation_parms['image_phase_center'][1]
    
    rotmat_image_phase_center = R.from_euler('XZ',[[np.pi/2 - dec_image, - ra_image + np.pi/2]]).as_matrix()[0]
    image_phase_center_cosine = _directional_cosine([ra_image,dec_image])
    
    n_fields = global_dataset.dims['field']
    field_names = global_dataset.field
    uvw_rotmat = np.zeros((n_fields,3,3),np.double)
    phase_rotation = np.zeros((n_fields,3),np.double)
    
    fields_phase_center = global_dataset.FIELD_PHASE_DIR.values[:,:,vis_dataset.attrs['ddi']]
    
    #print(fields_phase_center)
    
    #Create a rotation matrix for each field
    for i_field in range(n_fields):
        #Not sure if last dimention in FIELD_PHASE_DIR is the ddi number
        field_phase_center = fields_phase_center[i_field,:]
        # Define rotation to a coordinate system with pole towards in-direction
        # and X-axis W; by rotating around z-axis over -(90-long); and around
        # x-axis (lat-90).
        rotmat_field_phase_center = R.from_euler('ZX',[[-np.pi/2 + field_phase_center[0],field_phase_center[1] - np.pi/2]]).as_matrix()[0]
        uvw_rotmat[i_field,:,:] = np.matmul(rotmat_image_phase_center,rotmat_field_phase_center).T
        
        if _rotation_parms['common_tangent_reprojection'] == True:
            uvw_rotmat[i_field,2,0:2] = 0.0 # (Common tangent rotation needed for joint mosaics, see last part of FTMachine::girarUVW in CASA)
        
        field_phase_center_cosine = _directional_cosine(field_phase_center)
        phase_rotation[i_field,:] = np.matmul(rotmat_image_phase_center,(image_phase_center_cosine - field_phase_center_cosine))
    
    chunk_sizes = vis_dataset[sel_parms["data_in"]].chunks
    freq_chan = da.from_array(vis_dataset.coords['chan'].values, chunks=(chunk_sizes[2][0]))
    n_chunks_in_each_dim = vis_dataset[_sel_parms['data_in']].data.numblocks
    iter_chunks_indx = itertools.product(np.arange(n_chunks_in_each_dim[0]), np.arange(n_chunks_in_each_dim[1]),
                                         np.arange(n_chunks_in_each_dim[2]), np.arange(n_chunks_in_each_dim[3]))
                                         
    list_of_vis_data = ndim_list(n_chunks_in_each_dim)
    list_of_uvw = ndim_list(n_chunks_in_each_dim[0:2]+(1,))
    
    for c_time, c_baseline, c_chan, c_pol in iter_chunks_indx:
        vis_data_and_uvw = dask.delayed(apply_phasor)(
        vis_dataset[sel_parms["data_in"]].data.partitions[c_time, c_baseline, c_chan, c_pol],
        vis_dataset[sel_parms["uvw_in"]].data.partitions[c_time, c_baseline, 0],
        vis_dataset.field_id.data.partitions[c_time],
        freq_chan.partitions[c_chan],
        dask.delayed(uvw_rotmat),
        dask.delayed(phase_rotation), dask.delayed(_rotation_parms['common_tangent_reprojection']))
    
        list_of_vis_data[c_time][c_baseline][c_chan][c_pol] = da.from_delayed(vis_data_and_uvw[0], (chunk_sizes[0][c_time], chunk_sizes[1][c_baseline], chunk_sizes[2][c_chan], chunk_sizes[3][c_pol]),dtype=np.complex128)
        list_of_uvw[c_time][c_baseline][0]  = da.from_delayed(vis_data_and_uvw[1],(chunk_sizes[0][c_time], chunk_sizes[1][c_baseline], 3),dtype=np.float64)
    
    vis_dataset[_sel_parms['data_out']] =  xr.DataArray(da.block(list_of_vis_data), dims=vis_dataset[_sel_parms['data_in']].dims)
    vis_dataset[_sel_parms['uvw_out']] =  xr.DataArray(da.block(list_of_uvw), dims=vis_dataset[_sel_parms['uvw_in']].dims)
    
    #dask.visualize(vis_dataset[_sel_parms['uvw_out']],filename='uvw_rot_dataset')
    #dask.visualize(vis_dataset[_sel_parms['data_out']],filename='vis_rot_dataset')
    #dask.visualize(vis_dataset,filename='vis_dataset_before_append_custom_graph')
    
    list_xarray_data_variables = [vis_dataset[_sel_parms['uvw_out']],vis_dataset[_sel_parms['data_out']]]
    return _store(vis_dataset,list_xarray_data_variables,_storage_parms)
示例#30
0
        def recurse_axes(loop_axes, point_axes):
            """
            Used to create a nested list of images, with each nesting level corresponding to a particular axis.
            Each time this function is recursively called, it will descend one level deeper. The recursive calls
            can be thought of as a tree structure, where each depth level of the tree is one axis, and it has a
            branch (i.e. a subsequent call of recurse_axes) corresponding to every value of the the next axis.

            :param loop_axes: The remaining axes that need to be looped over (i.e. the innermost ones)
            :param point_axes: The axes that have been assigned values already by a previous call of this function

            :return: Nested list of images
            """
            if len(loop_axes.values()) == 0:
                # There are no more axes over which to loop (i.e. we're at the maximum depth), so return
                # the image defined by point_axes, or a blank image if it is undefined (so that the full
                # nested list will have the expected rectangular shape)
                if verbose:
                    print("\rAdding data chunk {} of {}".format(
                        self._count, total),
                          end="")
                self._count += 1
                if None not in point_axes.values() and self.has_image(
                        **point_axes):
                    recurse_axes.empty = False  # track that actual data was read
                    if stitched:
                        img = self.read_image(**point_axes, memmapped=True)
                        if self.half_overlap[0] != 0:
                            img = img[
                                self.half_overlap[0]:-self.half_overlap[0],
                                self.half_overlap[1]:-self.half_overlap[1], ]
                        return img
                    else:
                        return self.read_image(**point_axes, memmapped=True)
                else:
                    # return np.zeros((self.image_height, self.image_width), self.dtype)
                    return self._empty_tile
            else:
                # Still have axes over which to loop
                # do row and col first because it makes stitching faster
                if "row" in loop_axes.keys() and stitched:
                    axis = "row"
                elif "column" in loop_axes.keys() and stitched:
                    axis = "column"
                else:
                    # Take the next axis in the list that needs to be looped over
                    axis = list(loop_axes.keys())[0]

                # copy so multiple calls dont collide on the same data structure
                remaining_loop_axes = loop_axes.copy()
                if (axis == "row" or axis == "column") and stitched:
                    # do these both at once
                    del remaining_loop_axes["row"]
                    del remaining_loop_axes["column"]
                else:
                    # remove because this axis is now being assigned a point value
                    del remaining_loop_axes[axis]
                if (axis == "row" or axis == "column") and stitched:
                    # Do stitching along existing axis
                    # Stitch tiles acquired in a grid (i.e. data acquired by Micro-Magellan or in multi-res mode)
                    self.half_overlap = (self.overlap[0] // 2,
                                         self.overlap[1] // 2)

                    # get spatial layout of position indices
                    row_values = np.array(list(self.axes["row"]))
                    column_values = np.array(list(self.axes["column"]))

                    # make nested list of rows and columns
                    blocks = []
                    for row in row_values:
                        blocks.append([])
                        for column in column_values:
                            valed_axes = point_axes.copy()
                            if verbose:
                                print(
                                    "\rAdding data chunk {} of {}".format(
                                        self._count, total),
                                    end="",
                                )
                            valed_axes["row"] = row
                            valed_axes["column"] = column

                            blocks[-1].append(
                                da.stack(
                                    recurse_axes(remaining_loop_axes,
                                                 valed_axes)))

                    rgb = self.bytes_per_pixel == 3 and self.dtype == np.uint8
                    if rgb:
                        stitched_array = np.concatenate(
                            [
                                np.concatenate(
                                    row, axis=len(blocks[0][0].shape) - 2)
                                for row in blocks
                            ],
                            axis=len(blocks[0][0].shape) - 3,
                        )
                    else:
                        stitched_array = da.block(blocks)
                    return stitched_array
                else:
                    # Do stacking along new axis (i.e. not stiching along exisitng)
                    blocks = []
                    # Loop through every value of the next axis (i.e. create new branches of the tree)
                    for val in loop_axes[axis]:
                        # Copy to avoid unexpected errors by multiple calls
                        valed_axes = point_axes.copy()
                        # Move this axis from one that needs to be looped over to one that has a discrete value.
                        valed_axes[axis] = val
                        blocks.append(
                            recurse_axes(remaining_loop_axes, valed_axes))
                    return blocks